[nitter] add 'retweets' option (#3278)

pull/3307/head
Mike Fährmann 2 years ago
parent 3d6489a4c0
commit a41d093bb1
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -1860,6 +1860,16 @@ Description
You can use ``"all"`` instead of listing all values separately. You can use ``"all"`` instead of listing all values separately.
extractor.nitter.retweets
-------------------------
Type
``bool``
Default
``false``
Description
Fetch media from Retweets.
extractor.nitter.videos extractor.nitter.videos
----------------------- -----------------------
Type Type

@ -205,6 +205,7 @@
"include": "illustration,doujin" "include": "illustration,doujin"
}, },
"nitter": { "nitter": {
"retweets": false,
"videos": true "videos": true
}, },
"oauth": "oauth":

@ -26,6 +26,7 @@ class NitterExtractor(BaseExtractor):
self.user_obj = None self.user_obj = None
def items(self): def items(self):
retweets = self.config("retweets", False)
videos = self.config("videos", True) videos = self.config("videos", True)
if videos: if videos:
ytdl = (videos == "ytdl") ytdl = (videos == "ytdl")
@ -35,6 +36,10 @@ class NitterExtractor(BaseExtractor):
for tweet_html in self.tweets(): for tweet_html in self.tweets():
tweet = self._tweet_from_html(tweet_html) tweet = self._tweet_from_html(tweet_html)
if not retweets and tweet["retweet"]:
self.log.debug("Skipping %s (retweet)", tweet["tweet_id"])
continue
attachments = tweet.pop("_attach", "") attachments = tweet.pop("_attach", "")
if attachments: if attachments:
files = [] files = []
@ -87,13 +92,13 @@ class NitterExtractor(BaseExtractor):
extr('<span class="tweet-date', '') extr('<span class="tweet-date', '')
link = extr('href="', '"') link = extr('href="', '"')
return { return {
"author": author, "author" : author,
"user": self.user_obj or author, "user" : self.user_obj or author,
"date": text.parse_datetime( "date" : text.parse_datetime(
extr('title="', '"'), "%b %d, %Y · %I:%M %p %Z"), extr('title="', '"'), "%b %d, %Y · %I:%M %p %Z"),
"tweet_id": link.rpartition("/")[2].partition("#")[0], "tweet_id": link.rpartition("/")[2].partition("#")[0],
"content": extr('class="tweet-content', "</div").partition(">")[2], "content": extr('class="tweet-content', "</div").partition(">")[2],
"_attach": extr('class="attachments', 'class="tweet-stats'), "_attach" : extr('class="attachments', 'class="tweet-stats'),
"comments": text.parse_int(extr( "comments": text.parse_int(extr(
'class="icon-comment', '</div>').rpartition(">")[2]), 'class="icon-comment', '</div>').rpartition(">")[2]),
"retweets": text.parse_int(extr( "retweets": text.parse_int(extr(
@ -102,6 +107,7 @@ class NitterExtractor(BaseExtractor):
'class="icon-quote', '</div>').rpartition(">")[2]), 'class="icon-quote', '</div>').rpartition(">")[2]),
"likes" : text.parse_int(extr( "likes" : text.parse_int(extr(
'class="icon-heart', '</div>').rpartition(">")[2]), 'class="icon-heart', '</div>').rpartition(">")[2]),
"retweet" : 'class="retweet-header' in html,
} }
def _user_from_html(self, html): def _user_from_html(self, html):

Loading…
Cancel
Save