From 73373c06ec8b482b13d90fc017f05d74a66d890c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Tue, 6 Oct 2020 18:16:08 +0200
Subject: [PATCH] [weibo] handle posts with more than 9 images (closes #926)

Responses from '/api/container/getIndex' don't list more than
9 images per 'status' object, but the embedded JSON from a
'/detail/<ID>' page does.
---
 gallery_dl/extractor/weibo.py | 55 +++++++++++++++++++----------------
 1 file changed, 30 insertions(+), 25 deletions(-)
diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py
index 0b1b2d93..a325f873 100644
--- a/gallery_dl/extractor/weibo.py
+++ b/gallery_dl/extractor/weibo.py
@@ -47,21 +47,31 @@ class WeiboExtractor(Extractor):
                 file["num"] = num
                 yield Message.Url, file["url"], file
 
+    def statuses(self):
+        """Returns an iterable containing all relevant 'status' objects"""
+
+    def _status_by_id(self, status_id):
+        url = "{}/detail/{}".format(self.root, status_id)
+        page = self.request(url, fatal=False).text
+        data = text.extract(page, "var $render_data = [", "][0] || {};")[0]
+        return json.loads(data)["status"] if data else None
+
     def _files_from_status(self, status):
-        images = status.pop("pics", ())
         page_info = status.pop("page_info", ())
-
-        for image in images:
-            pid = image["pid"]
-            if "large" in image:
-                image = image["large"]
-            geo = image.get("geo") or {}
-            yield text.nameext_from_url(image["url"], {
-                "url"   : image["url"],
-                "pid"   : pid,
-                "width" : text.parse_int(geo.get("width")),
-                "height": text.parse_int(geo.get("height")),
-            })
+        if "pics" in status:
+            if len(status["pics"]) < status["pic_num"]:
+                status = self._status_by_id(status["id"]) or status
+            for image in status.pop("pics"):
+                pid = image["pid"]
+                if "large" in image:
+                    image = image["large"]
+                geo = image.get("geo") or {}
+                yield text.nameext_from_url(image["url"], {
+                    "url"   : image["url"],
+                    "pid"   : pid,
+                    "width" : text.parse_int(geo.get("width")),
+                    "height": text.parse_int(geo.get("height")),
+                })
 
         if self.videos and "media_info" in page_info:
             info = page_info["media_info"]
@@ -79,9 +89,6 @@ class WeiboExtractor(Extractor):
                     data["_ytdl_extra"] = {"protocol": "m3u8_native"}
                 yield data
 
-    def statuses(self):
-        """Returns an iterable containing all relevant 'status' objects"""
-
 
 class WeiboUserExtractor(WeiboExtractor):
     """Extractor for all images of a user on weibo.cn"""
@@ -107,13 +114,13 @@ class WeiboUserExtractor(WeiboExtractor):
 
         while True:
             data = self.request(url, params=params).json()
+            cards = data["data"]["cards"]
 
-            for card in data["data"]["cards"]:
+            if not cards:
+                return
+            for card in cards:
                 if "mblog" in card:
                     yield card["mblog"]
-
-            if not data["data"]["cards"]:
-                return
             params["page"] += 1
 
 
@@ -145,9 +152,7 @@ class WeiboStatusExtractor(WeiboExtractor):
         self.status_id = match.group(1)
 
     def statuses(self):
-        url = "{}/detail/{}".format(self.root, self.status_id)
-        page = self.request(url, notfound="status").text
-        data = text.extract(page, "var $render_data = [", "][0] || {};")[0]
-        if not data:
+        status = self._status_by_id(self.status_id)
+        if not status:
             raise exception.NotFoundError("status")
-        return (json.loads(data)["status"],)
+        return (status,)