From d9d88f14488ff73a21876b151e08ada91288236e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 20 May 2015 10:25:11 +0200 Subject: [PATCH] [pixiv] fix ugoira parsing --- gallery_dl/extractor/pixiv.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 828ee9d2..fa0ac764 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -52,7 +52,7 @@ class PixivExtractor(SequentialExtractor): work.update(metadata) if work["type"] == "ugoira": - url, framelist = self.parse_ugoira(work["id"]) + url, framelist = self.parse_ugoira(work) work["extension"] = "zip" yield Message.Url, url, work.copy() work["extension"] = "txt" @@ -93,19 +93,22 @@ class PixivExtractor(SequentialExtractor): return page = pinfo["next"] - def parse_ugoira(self, illust_id): + def parse_ugoira(self, data): """Parse ugoira data""" # get illust page text = self.request( - self.illust_url, params={"illust_id": illust_id}, + self.illust_url, params={"illust_id": data["id"]}, ).text # parse page - url , pos = self.extract(text, 'ugokuIllustFullscreenData = {"src":"', '"') - frames, pos = self.extract(text, '"frames":[', ']', pos) + frames, _ = self.extract(text, ',"frames":[', ']') - # fix url - url = url.replace("\\/", "/") + # build url + url = re.sub( + r"/img-original/(.+/\d+)[^/]+", + r"/img-zip-ugoira/\g<1>_ugoira1920x1080.zip", + data["url"] + ) # build framelist framelist = re.sub(