From 73373c06ec8b482b13d90fc017f05d74a66d890c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 6 Oct 2020 18:16:08 +0200 Subject: [PATCH] [weibo] handle posts with more than 9 images (closes #926) Responses from '/api/container/getIndex' don't list more than 9 images per 'status' object, but the embedded JSON from a '/detail/' page does. --- gallery_dl/extractor/weibo.py | 55 +++++++++++++++++++---------------- 1 file changed, 30 insertions(+), 25 deletions(-) diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py index 0b1b2d93..a325f873 100644 --- a/gallery_dl/extractor/weibo.py +++ b/gallery_dl/extractor/weibo.py @@ -47,21 +47,31 @@ class WeiboExtractor(Extractor): file["num"] = num yield Message.Url, file["url"], file + def statuses(self): + """Returns an iterable containing all relevant 'status' objects""" + + def _status_by_id(self, status_id): + url = "{}/detail/{}".format(self.root, status_id) + page = self.request(url, fatal=False).text + data = text.extract(page, "var $render_data = [", "][0] || {};")[0] + return json.loads(data)["status"] if data else None + def _files_from_status(self, status): - images = status.pop("pics", ()) page_info = status.pop("page_info", ()) - - for image in images: - pid = image["pid"] - if "large" in image: - image = image["large"] - geo = image.get("geo") or {} - yield text.nameext_from_url(image["url"], { - "url" : image["url"], - "pid" : pid, - "width" : text.parse_int(geo.get("width")), - "height": text.parse_int(geo.get("height")), - }) + if "pics" in status: + if len(status["pics"]) < status["pic_num"]: + status = self._status_by_id(status["id"]) or status + for image in status.pop("pics"): + pid = image["pid"] + if "large" in image: + image = image["large"] + geo = image.get("geo") or {} + yield text.nameext_from_url(image["url"], { + "url" : image["url"], + "pid" : pid, + "width" : text.parse_int(geo.get("width")), + "height": text.parse_int(geo.get("height")), + }) if self.videos and "media_info" in page_info: info = page_info["media_info"] @@ -79,9 +89,6 @@ class WeiboExtractor(Extractor): data["_ytdl_extra"] = {"protocol": "m3u8_native"} yield data - def statuses(self): - """Returns an iterable containing all relevant 'status' objects""" - class WeiboUserExtractor(WeiboExtractor): """Extractor for all images of a user on weibo.cn""" @@ -107,13 +114,13 @@ class WeiboUserExtractor(WeiboExtractor): while True: data = self.request(url, params=params).json() + cards = data["data"]["cards"] - for card in data["data"]["cards"]: + if not cards: + return + for card in cards: if "mblog" in card: yield card["mblog"] - - if not data["data"]["cards"]: - return params["page"] += 1 @@ -145,9 +152,7 @@ class WeiboStatusExtractor(WeiboExtractor): self.status_id = match.group(1) def statuses(self): - url = "{}/detail/{}".format(self.root, self.status_id) - page = self.request(url, notfound="status").text - data = text.extract(page, "var $render_data = [", "][0] || {};")[0] - if not data: + status = self._status_by_id(self.status_id) + if not status: raise exception.NotFoundError("status") - return (json.loads(data)["status"],) + return (status,)