[nitter] add 'retweets' option (#3278)

2 years ago · a41d093bb1
parent 3d6489a4c0
commit a41d093bb1
3 changed files with 21 additions and 4 deletions
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@ -1860,6 +1860,16 @@ Description
    You can use ``"all"`` instead of listing all values separately.


+extractor.nitter.retweets
+-------------------------
+Type
+    ``bool``
+Default
+    ``false``
+Description
+    Fetch media from Retweets.
+
+
 extractor.nitter.videos
 -----------------------
 Type
--- a/docs/gallery-dl.conf
+++ b/docs/gallery-dl.conf
@ -205,6 +205,7 @@
            "include": "illustration,doujin"
        },
        "nitter": {
+            "retweets": false,
            "videos": true
        },
        "oauth":
--- a/gallery_dl/extractor/nitter.py
+++ b/gallery_dl/extractor/nitter.py
@ -26,6 +26,7 @@ class NitterExtractor(BaseExtractor):
        self.user_obj = None

    def items(self):
+        retweets = self.config("retweets", False)
        videos = self.config("videos", True)
        if videos:
            ytdl = (videos == "ytdl")
@ -35,6 +36,10 @@ class NitterExtractor(BaseExtractor):
        for tweet_html in self.tweets():
            tweet = self._tweet_from_html(tweet_html)

+            if not retweets and tweet["retweet"]:
+                self.log.debug("Skipping %s (retweet)", tweet["tweet_id"])
+                continue
+
            attachments = tweet.pop("_attach", "")
            if attachments:
                files = []
@ -87,13 +92,13 @@ class NitterExtractor(BaseExtractor):
        extr('<span class="tweet-date', '')
        link = extr('href="', '"')
        return {
-            "author": author,
-            "user": self.user_obj or author,
-            "date": text.parse_datetime(
+            "author"  : author,
+            "user"    : self.user_obj or author,
+            "date"    : text.parse_datetime(
                extr('title="', '"'), "%b %d, %Y · %I:%M %p %Z"),
            "tweet_id": link.rpartition("/")[2].partition("#")[0],
            "content": extr('class="tweet-content', "</div").partition(">")[2],
-            "_attach": extr('class="attachments', 'class="tweet-stats'),
+            "_attach" : extr('class="attachments', 'class="tweet-stats'),
            "comments": text.parse_int(extr(
                'class="icon-comment', '</div>').rpartition(">")[2]),
            "retweets": text.parse_int(extr(
@ -102,6 +107,7 @@ class NitterExtractor(BaseExtractor):
                'class="icon-quote', '</div>').rpartition(">")[2]),
            "likes"   : text.parse_int(extr(
                'class="icon-heart', '</div>').rpartition(">")[2]),
+            "retweet" : 'class="retweet-header' in html,
        }

    def _user_from_html(self, html):