[twitter] extract alt texts as 'description' (closes #2617)

2 years ago · dcb580240d
parent 44ffc017ea
commit dcb580240d
1 changed files with 20 additions and 12 deletions
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@ -104,6 +104,7 @@ class TwitterExtractor(Extractor):

    def _extract_media(self, tweet, entities, files):
        for media in entities:
+            descr = media.get("ext_alt_text")
            width = media["original_info"].get("width", 0)
            height = media["original_info"].get("height", 0)

@ -112,9 +113,10 @@ class TwitterExtractor(Extractor):
                    files.append({
                        "url": "ytdl:{}/i/web/status/{}".format(
                            self.root, tweet["id_str"]),
-                        "width"    : width,
-                        "height"   : height,
-                        "extension": None,
+                        "width"      : width,
+                        "height"     : height,
+                        "extension"  : None,
+                        "description": descr,
                    })
                elif self.videos:
                    video_info = media["video_info"]
@ -123,22 +125,24 @@ class TwitterExtractor(Extractor):
                        key=lambda v: v.get("bitrate", 0),
                    )
                    files.append({
-                        "url"     : variant["url"],
-                        "width"   : width,
-                        "height"  : height,
-                        "bitrate" : variant.get("bitrate", 0),
-                        "duration": video_info.get(
+                        "url"        : variant["url"],
+                        "width"      : width,
+                        "height"     : height,
+                        "bitrate"    : variant.get("bitrate", 0),
+                        "duration"   : video_info.get(
                            "duration_millis", 0) / 1000,
+                        "description": descr,
                    })
            elif "media_url_https" in media:
                url = media["media_url_https"]
                base, _, fmt = url.rpartition(".")
                base += "?format=" + fmt + "&name="
                files.append(text.nameext_from_url(url, {
-                    "url"      : base + self._size_image,
-                    "width"    : width,
-                    "height"   : height,
-                    "_fallback": self._image_fallback(base),
+                    "url"        : base + self._size_image,
+                    "width"      : width,
+                    "height"     : height,
+                    "_fallback"  : self._image_fallback(base),
+                    "description": descr,
                }))
            else:
                files.append({"url": media["media_url"]})
@ -711,6 +715,10 @@ class TwitterTweetExtractor(TwitterExtractor):
            "options": (("syndication", True),),
            "count": 1,
        }),
+        # media alt texts / descriptions (#2617)
+        ("https://twitter.com/my0nruri/status/1528379296041299968", {
+            "keyword": {"description": "oc"}
+        }),
    )

    def __init__(self, match):