[weibo] handle posts with more than 9 images (closes #926)

Responses from '/api/container/getIndex' don't list more than
9 images per 'status' object, but the embedded JSON from a
'/detail/<ID>' page does.
pull/1053/head
Mike Fährmann 4 years ago
parent dd1e545597
commit 73373c06ec
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -47,21 +47,31 @@ class WeiboExtractor(Extractor):
file["num"] = num
yield Message.Url, file["url"], file
def statuses(self):
"""Returns an iterable containing all relevant 'status' objects"""
def _status_by_id(self, status_id):
url = "{}/detail/{}".format(self.root, status_id)
page = self.request(url, fatal=False).text
data = text.extract(page, "var $render_data = [", "][0] || {};")[0]
return json.loads(data)["status"] if data else None
def _files_from_status(self, status):
images = status.pop("pics", ())
page_info = status.pop("page_info", ())
for image in images:
pid = image["pid"]
if "large" in image:
image = image["large"]
geo = image.get("geo") or {}
yield text.nameext_from_url(image["url"], {
"url" : image["url"],
"pid" : pid,
"width" : text.parse_int(geo.get("width")),
"height": text.parse_int(geo.get("height")),
})
if "pics" in status:
if len(status["pics"]) < status["pic_num"]:
status = self._status_by_id(status["id"]) or status
for image in status.pop("pics"):
pid = image["pid"]
if "large" in image:
image = image["large"]
geo = image.get("geo") or {}
yield text.nameext_from_url(image["url"], {
"url" : image["url"],
"pid" : pid,
"width" : text.parse_int(geo.get("width")),
"height": text.parse_int(geo.get("height")),
})
if self.videos and "media_info" in page_info:
info = page_info["media_info"]
@ -79,9 +89,6 @@ class WeiboExtractor(Extractor):
data["_ytdl_extra"] = {"protocol": "m3u8_native"}
yield data
def statuses(self):
"""Returns an iterable containing all relevant 'status' objects"""
class WeiboUserExtractor(WeiboExtractor):
"""Extractor for all images of a user on weibo.cn"""
@ -107,13 +114,13 @@ class WeiboUserExtractor(WeiboExtractor):
while True:
data = self.request(url, params=params).json()
cards = data["data"]["cards"]
for card in data["data"]["cards"]:
if not cards:
return
for card in cards:
if "mblog" in card:
yield card["mblog"]
if not data["data"]["cards"]:
return
params["page"] += 1
@ -145,9 +152,7 @@ class WeiboStatusExtractor(WeiboExtractor):
self.status_id = match.group(1)
def statuses(self):
url = "{}/detail/{}".format(self.root, self.status_id)
page = self.request(url, notfound="status").text
data = text.extract(page, "var $render_data = [", "][0] || {};")[0]
if not data:
status = self._status_by_id(self.status_id)
if not status:
raise exception.NotFoundError("status")
return (json.loads(data)["status"],)
return (status,)

Loading…
Cancel
Save