[twitter] add 'syndication' option (#2354)

to fetch age-restricted content using Twitter's syndication API
3 years ago · 1171911dc3
parent a53cfc845e
commit 1171911dc3
2 changed files with 90 additions and 32 deletions
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@ -2182,6 +2182,16 @@ Description
    ``4096x4096``, ``orig``, ``large``, ``medium``, and ``small``.
 extractor.twitter.syndication
 -----------------------------
 Type
    ``bool``
 Default
    ``false``
 Description
    Retrieve age-restricted content using Twitter's syndication API.
 extractor.twitter.logout
 ------------------------
 Type
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@ -217,23 +217,24 @@ class TwitterExtractor(Extractor):
        if "legacy" in tweet:
            tweet = tweet["legacy"]
        tget = tweet.get
        entities = tweet["entities"]
        tdata = {
            "tweet_id"      : text.parse_int(tweet["id_str"]),
            "retweet_id"    : text.parse_int(
-                tweet.get("retweeted_status_id_str")),
+                tget("retweeted_status_id_str")),
            "quote_id"      : text.parse_int(
-                tweet.get("quoted_status_id_str")),
+                tget("quoted_status_id_str")),
            "reply_id"      : text.parse_int(
-                tweet.get("in_reply_to_status_id_str")),
+                tget("in_reply_to_status_id_str")),
            "date"          : text.parse_datetime(
                tweet["created_at"], "%a %b %d %H:%M:%S %z %Y"),
            "user"          : user,
            "lang"          : tweet["lang"],
-            "favorite_count": tweet["favorite_count"],
+            "favorite_count": tget("favorite_count"),
-            "quote_count"   : tweet["quote_count"],
+            "quote_count"   : tget("quote_count"),
-            "reply_count"   : tweet["reply_count"],
+            "reply_count"   : tget("reply_count"),
-            "retweet_count" : tweet["retweet_count"],
+            "retweet_count" : tget("retweet_count"),
        }
        hashtags = entities.get("hashtags")
@ -248,7 +249,7 @@ class TwitterExtractor(Extractor):
                "nick": u["name"],
            } for u in mentions]
-        content = tweet["full_text"]
+        content = tget("full_text") or tget("text") or ""
        urls = entities.get("urls")
        if urls:
            for url in urls:
@ -269,33 +270,36 @@ class TwitterExtractor(Extractor):
        return tdata
    def _transform_user(self, user):
        uid = user.get("rest_id") or user["id_str"]
        try:
-            return self._user_cache[user.get("rest_id") or user["id_str"]]
+            return self._user_cache[uid]
        except KeyError:
            pass
        uid = user.get("rest_id") or user["id_str"]
        if "legacy" in user:
            user = user["legacy"]
        uget = user.get
        entities = user["entities"]
        self._user_cache[uid] = udata = {
            "id"              : text.parse_int(uid),
            "name"            : user["screen_name"],
            "nick"            : user["name"],
-            "location"        : user["location"],
+            "location"        : uget("location"),
            "date"            : text.parse_datetime(
-                user["created_at"], "%a %b %d %H:%M:%S %z %Y"),
+                uget("created_at"), "%a %b %d %H:%M:%S %z %Y"),
-            "verified"        : user.get("verified", False),
+            "verified"        : uget("verified", False),
-            "profile_banner"  : user.get("profile_banner_url", ""),
+            "profile_banner"  : uget("profile_banner_url", ""),
-            "profile_image"   : user.get(
+            "profile_image"   : uget(
                "profile_image_url_https", "").replace("_normal.", "."),
-            "favourites_count": user["favourites_count"],
+            "favourites_count": uget("favourites_count"),
-            "followers_count" : user["followers_count"],
+            "followers_count" : uget("followers_count"),
-            "friends_count"   : user["friends_count"],
+            "friends_count"   : uget("friends_count"),
-            "listed_count"    : user["listed_count"],
+            "listed_count"    : uget("listed_count"),
-            "media_count"     : user["media_count"],
+            "media_count"     : uget("media_count"),
-            "statuses_count"  : user["statuses_count"],
+            "statuses_count"  : uget("statuses_count"),
        }
        descr = user["description"]
@ -653,6 +657,11 @@ class TwitterTweetExtractor(TwitterExtractor):
        ("https://twitter.com/i/web/status/1486373748911575046", {
            "count": 4,
        }),
        # age-restricted (#2354)
        ("https://twitter.com/mightbecursed/status/1492954264909479936", {
            "options": (("syndication", True),),
            "count": 1,
        }),
    )
    def __init__(self, match):
@ -770,6 +779,7 @@ class TwitterAPI():
        }
        self._nsfw_warning = True
        self._syndication = extractor.config("syndication")
        self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
        self._user = None
@ -1153,9 +1163,10 @@ class TwitterAPI():
                elif esw("conversationthread-"):
                    tweets.extend(entry["content"]["items"])
                elif esw("tombstone-"):
-                    self._report_tombstone(
+                    item = entry["content"]["itemContent"]
-                        entry,
+                    item["tweet_results"] = \
-                        entry["content"]["itemContent"]["tombstoneInfo"])
+                        {"result": {"tombstone": item["tombstoneInfo"]}}
                    tweets.append(entry)
                elif esw("cursor-bottom-"):
                    cursor = entry["content"]
                    if not cursor.get("stopOnEmptyResponse", True):
@ -1168,7 +1179,9 @@ class TwitterAPI():
                    tweet = ((entry.get("content") or entry["item"])
                             ["itemContent"]["tweet_results"]["result"])
                    if "tombstone" in tweet:
-                        self._report_tombstone(entry, tweet["tombstone"])
+                        tweet = self._process_tombstone(
                            entry, tweet["tombstone"])
                        if not tweet:
                            continue
                    if "tweet" in tweet:
                        tweet = tweet["tweet"]
@ -1259,10 +1272,45 @@ class TwitterAPI():
                return
            variables["cursor"] = cursor
-    def _report_tombstone(self, entry, tombstone):
+    def _process_tombstone(self, entry, tombstone):
        text = (tombstone.get("richText") or tombstone["text"])["text"]
-        if text.startswith("Age-restricted") and self._nsfw_warning:
+        tweet_id = entry["entryId"].rpartition("-")[2]
-            self.extractor.log.warning(text)
+
        if text.startswith("Age-restricted"):
            if self._syndication:
                return self._syndication_tweet(tweet_id)
            elif self._nsfw_warning:
                self._nsfw_warning = False
-        self.extractor.log.debug(
+                self.extractor.log.warning('"%s"', text)
-            "Skipping %s (%s)", entry["entryId"].rpartition("-")[2], text)
+
        self.extractor.log.debug("Skipping %s (\"%s\")", tweet_id, text)
    def _syndication_tweet(self, tweet_id):
        tweet = self.extractor.request(
            "https://cdn.syndication.twimg.com/tweet?id=" + tweet_id).json()
        tweet["user"]["description"] = ""
        tweet["user"]["entities"] = {"description": {}}
        if "video" in tweet:
            video = tweet["video"]
            del video["variants"][:-1]
            video["variants"][0]["url"] = video["variants"][0]["src"]
            tweet["extended_entities"] = {"media": [{
                "video_info"   : video,
                "original_info": {"width" : 0, "height": 0},
            }]}
        elif "photos" in tweet:
            for p in tweet["photos"]:
                p["media_url_https"] = p["url"]
                p["original_info"] = {
                    "width" : p["width"],
                    "height": p["height"],
                }
            tweet["extended_entities"] = {"media": tweet["photos"]}
        return {
            "rest_id": tweet["id_str"],
            "legacy" : tweet,
            "user"   : tweet["user"],
        }