[weibo] fix retweets (#2825, #3874, #5263)

- handle 快转 retweets
- disable 'retweets' by default
- skip all retweet media when 'retweets' are disabled
- extract all retweet media when 'retweets' is set to "original"
pull/5321/head
Mike Fährmann 7 months ago
parent 0676a9d6ec
commit ace16f00f5
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -4039,7 +4039,7 @@ extractor.weibo.retweets
Type
``bool``
Default
``true``
``false``
Description
Fetch media from retweeted posts.

@ -30,9 +30,9 @@ class WeiboExtractor(Extractor):
self._prefix, self.user = match.groups()
def _init(self):
self.retweets = self.config("retweets", True)
self.videos = self.config("videos", True)
self.livephoto = self.config("livephoto", True)
self.retweets = self.config("retweets", False)
self.videos = self.config("videos", True)
self.gifs = self.config("gifs", True)
self.gifs_video = (self.gifs == "video")
@ -59,15 +59,25 @@ class WeiboExtractor(Extractor):
for status in self.statuses():
files = []
if self.retweets and "retweeted_status" in status:
if "ori_mid" in status and not self.retweets:
self.log.debug("Skipping %s (快转 retweet)", status["id"])
continue
if "retweeted_status" in status:
if not self.retweets:
self.log.debug("Skipping %s (retweet)", status["id"])
continue
# videos of the original post are in status
# images of the original post are in status["retweeted_status"]
files = []
self._extract_status(status, files)
self._extract_status(status["retweeted_status"], files)
if original_retweets:
status = status["retweeted_status"]
self._extract_status(status, files)
else:
self._extract_status(status, files)
self._extract_status(status["retweeted_status"], files)
else:
files = []
self._extract_status(status, files)
status["date"] = text.parse_datetime(

@ -80,11 +80,11 @@ __tests__ = (
"#category": ("", "weibo", "home"),
"#class" : weibo.WeiboHomeExtractor,
"#range" : "1-30",
"#count" : 30,
"#count" : 0,
},
{
"#url" : "https://weibo.com/1758989602?tabtype=feed",
"#url" : "https://weibo.com/2553930725?tabtype=feed",
"#category": ("", "weibo", "feed"),
"#class" : weibo.WeiboFeedExtractor,
"#range" : "1-30",
@ -194,6 +194,28 @@ __tests__ = (
"#class" : weibo.WeiboStatusExtractor,
},
{
"#url" : "https://weibo.cn/detail/4600272267522211",
"#comment" : "retweet",
"#category": ("", "weibo", "status"),
"#class" : weibo.WeiboStatusExtractor,
"#count" : 0,
},
{
"#url" : "https://weibo.cn/detail/4600272267522211",
"#comment" : "retweet",
"#category": ("", "weibo", "status"),
"#class" : weibo.WeiboStatusExtractor,
"#options" : {"retweets": True},
"#count" : 2,
"status": {
"id" : 4600272267522211,
"retweeted_status": {"id": 4600167083287033},
},
},
{
"#url" : "https://m.weibo.cn/detail/4600272267522211",
"#comment" : "original retweets (#1542)",

Loading…
Cancel
Save