|
|
|
@ -47,11 +47,21 @@ class WeiboExtractor(Extractor):
|
|
|
|
|
file["num"] = num
|
|
|
|
|
yield Message.Url, file["url"], file
|
|
|
|
|
|
|
|
|
|
def statuses(self):
|
|
|
|
|
"""Returns an iterable containing all relevant 'status' objects"""
|
|
|
|
|
|
|
|
|
|
def _status_by_id(self, status_id):
|
|
|
|
|
url = "{}/detail/{}".format(self.root, status_id)
|
|
|
|
|
page = self.request(url, fatal=False).text
|
|
|
|
|
data = text.extract(page, "var $render_data = [", "][0] || {};")[0]
|
|
|
|
|
return json.loads(data)["status"] if data else None
|
|
|
|
|
|
|
|
|
|
def _files_from_status(self, status):
|
|
|
|
|
images = status.pop("pics", ())
|
|
|
|
|
page_info = status.pop("page_info", ())
|
|
|
|
|
|
|
|
|
|
for image in images:
|
|
|
|
|
if "pics" in status:
|
|
|
|
|
if len(status["pics"]) < status["pic_num"]:
|
|
|
|
|
status = self._status_by_id(status["id"]) or status
|
|
|
|
|
for image in status.pop("pics"):
|
|
|
|
|
pid = image["pid"]
|
|
|
|
|
if "large" in image:
|
|
|
|
|
image = image["large"]
|
|
|
|
@ -79,9 +89,6 @@ class WeiboExtractor(Extractor):
|
|
|
|
|
data["_ytdl_extra"] = {"protocol": "m3u8_native"}
|
|
|
|
|
yield data
|
|
|
|
|
|
|
|
|
|
def statuses(self):
|
|
|
|
|
"""Returns an iterable containing all relevant 'status' objects"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class WeiboUserExtractor(WeiboExtractor):
|
|
|
|
|
"""Extractor for all images of a user on weibo.cn"""
|
|
|
|
@ -107,13 +114,13 @@ class WeiboUserExtractor(WeiboExtractor):
|
|
|
|
|
|
|
|
|
|
while True:
|
|
|
|
|
data = self.request(url, params=params).json()
|
|
|
|
|
cards = data["data"]["cards"]
|
|
|
|
|
|
|
|
|
|
for card in data["data"]["cards"]:
|
|
|
|
|
if not cards:
|
|
|
|
|
return
|
|
|
|
|
for card in cards:
|
|
|
|
|
if "mblog" in card:
|
|
|
|
|
yield card["mblog"]
|
|
|
|
|
|
|
|
|
|
if not data["data"]["cards"]:
|
|
|
|
|
return
|
|
|
|
|
params["page"] += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
@ -145,9 +152,7 @@ class WeiboStatusExtractor(WeiboExtractor):
|
|
|
|
|
self.status_id = match.group(1)
|
|
|
|
|
|
|
|
|
|
def statuses(self):
|
|
|
|
|
url = "{}/detail/{}".format(self.root, self.status_id)
|
|
|
|
|
page = self.request(url, notfound="status").text
|
|
|
|
|
data = text.extract(page, "var $render_data = [", "][0] || {};")[0]
|
|
|
|
|
if not data:
|
|
|
|
|
status = self._status_by_id(self.status_id)
|
|
|
|
|
if not status:
|
|
|
|
|
raise exception.NotFoundError("status")
|
|
|
|
|
return (json.loads(data)["status"],)
|
|
|
|
|
return (status,)
|
|
|
|
|