[twitter] handle missing 'expanded_url' fields (#5463, #5490)

5 months ago · 347af7f5c8
parent c9d3b5e5d9
commit 347af7f5c8
1 changed files with 10 additions and 4 deletions
--- a/gallery_dl/extractor/twitter.py
+++ b/gallery_dl/extractor/twitter.py
@ -243,8 +243,8 @@ class TwitterExtractor(Extractor):
        # collect URLs from entities
        for url in tweet["entities"].get("urls") or ():
-            url = url["expanded_url"]
+            url = url.get("expanded_url") or url.get("url") or ""
-            if "//twitpic.com/" not in url or "/photos/" in url:
+            if not url or "//twitpic.com/" not in url or "/photos/" in url:
                continue
            if url.startswith("http:"):
                url = "https" + url[4:]
@ -336,7 +336,10 @@ class TwitterExtractor(Extractor):
        urls = entities.get("urls")
        if urls:
            for url in urls:
                try:
                    content = content.replace(url["url"], url["expanded_url"])
                except KeyError:
                    pass
        txt, _, tco = content.rpartition(" ")
        tdata["content"] = txt if tco.startswith("https://t.co/") else content
@ -403,7 +406,10 @@ class TwitterExtractor(Extractor):
        urls = entities["description"].get("urls")
        if urls:
            for url in urls:
                try:
                    descr = descr.replace(url["url"], url["expanded_url"])
                except KeyError:
                    pass
        udata["description"] = descr
        if "url" in entities: