[weibo] handle posts with more than 9 images (closes #926)

Responses from '/api/container/getIndex' don't list more than
9 images per 'status' object, but the embedded JSON from a
'/detail/<ID>' page does.
pull/1053/head
Mike Fährmann 4 years ago
parent dd1e545597
commit 73373c06ec
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -47,11 +47,21 @@ class WeiboExtractor(Extractor):
file["num"] = num
yield Message.Url, file["url"], file
def statuses(self):
"""Returns an iterable containing all relevant 'status' objects"""
def _status_by_id(self, status_id):
url = "{}/detail/{}".format(self.root, status_id)
page = self.request(url, fatal=False).text
data = text.extract(page, "var $render_data = [", "][0] || {};")[0]
return json.loads(data)["status"] if data else None
def _files_from_status(self, status):
images = status.pop("pics", ())
page_info = status.pop("page_info", ())
for image in images:
if "pics" in status:
if len(status["pics"]) < status["pic_num"]:
status = self._status_by_id(status["id"]) or status
for image in status.pop("pics"):
pid = image["pid"]
if "large" in image:
image = image["large"]
@ -79,9 +89,6 @@ class WeiboExtractor(Extractor):
data["_ytdl_extra"] = {"protocol": "m3u8_native"}
yield data
def statuses(self):
"""Returns an iterable containing all relevant 'status' objects"""
class WeiboUserExtractor(WeiboExtractor):
"""Extractor for all images of a user on weibo.cn"""
@ -107,13 +114,13 @@ class WeiboUserExtractor(WeiboExtractor):
while True:
data = self.request(url, params=params).json()
cards = data["data"]["cards"]
for card in data["data"]["cards"]:
if not cards:
return
for card in cards:
if "mblog" in card:
yield card["mblog"]
if not data["data"]["cards"]:
return
params["page"] += 1
@ -145,9 +152,7 @@ class WeiboStatusExtractor(WeiboExtractor):
self.status_id = match.group(1)
def statuses(self):
url = "{}/detail/{}".format(self.root, self.status_id)
page = self.request(url, notfound="status").text
data = text.extract(page, "var $render_data = [", "][0] || {};")[0]
if not data:
status = self._status_by_id(self.status_id)
if not status:
raise exception.NotFoundError("status")
return (json.loads(data)["status"],)
return (status,)

Loading…
Cancel
Save