From f58b0e6fc7972e1432fa7032afddfb108802a8a1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 20 Jun 2024 23:57:26 +0200 Subject: [PATCH] [twitter] ignore 'Unavailable' media (#5736) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit … including geo-restricted content. add 'unavailable' option to allow re-enabling them again --- docs/configuration.rst | 11 +++++++++++ gallery_dl/extractor/twitter.py | 10 ++++++++++ test/results/twitter.py | 8 ++++++++ 3 files changed, 29 insertions(+) diff --git a/docs/configuration.rst b/docs/configuration.rst index 7dfce735..7cacabb9 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -3896,6 +3896,17 @@ Description Note: This requires at least 1 additional API call per initial Tweet. +extractor.twitter.unavailable +----------------------------- +Type + ``bool`` +Default + ``false`` +Description + Try to download media marked as ``Unavailable``, + e.g. ``Geoblocked`` videos. + + extractor.twitter.include ------------------------- Type diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 04970a40..ec098aae 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -36,6 +36,7 @@ class TwitterExtractor(Extractor): self.user = match.group(1) def _init(self): + self.unavailable = self.config("unavailable", False) self.textonly = self.config("text-tweets", False) self.retweets = self.config("retweets", False) self.replies = self.config("replies", True) @@ -143,6 +144,15 @@ class TwitterExtractor(Extractor): def _extract_media(self, tweet, entities, files): for media in entities: + + if "ext_media_availability" in media: + ext = media["ext_media_availability"] + if ext.get("status") == "Unavailable": + self.log.warning("Media unavailable (%s - '%s')", + tweet["id_str"], ext.get("reason")) + if not self.unavailable: + continue + descr = media.get("ext_alt_text") width = media["original_info"].get("width", 0) height = media["original_info"].get("height", 0) diff --git a/test/results/twitter.py b/test/results/twitter.py index 37aa6322..7f748a9a 100644 --- a/test/results/twitter.py +++ b/test/results/twitter.py @@ -621,6 +621,14 @@ The Washington Post writes, "Three weeks after the toxic train derailment in Ohi "content" : "Biden wants to replace lead pipes. He failed to mention that the unfunded mandate sets an almost impossible timeline, will cost billions, infringe on the rights of the States and their residents – all for benefits that may be entirely speculative. #sotu https://ag.ks.gov/media-center/news-releases/2024/02/09/kobach-leads-coalition-demanding-biden-drop-unnecessary-epa-rule", }, +{ + "#url" : "https://x.com/jsports_motor/status/1801338077618524583", + "#comment" : "geo-restricted video (#5736)", + "#category": ("", "twitter", "tweet"), + "#class" : twitter.TwitterTweetExtractor, + "#count" : 0, +}, + { "#url" : "https://twitter.com/playpokemon/status/1263832915173048321/quotes", "#category": ("", "twitter", "quotes"),