[twitter] ignore 'Unavailable' media (#5736)

… including geo-restricted content.

add 'unavailable' option to allow re-enabling them again
pull/5774/head
Mike Fährmann 3 months ago
parent 8452d04a33
commit f58b0e6fc7
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -3896,6 +3896,17 @@ Description
Note: This requires at least 1 additional API call per initial Tweet. Note: This requires at least 1 additional API call per initial Tweet.
extractor.twitter.unavailable
-----------------------------
Type
``bool``
Default
``false``
Description
Try to download media marked as ``Unavailable``,
e.g. ``Geoblocked`` videos.
extractor.twitter.include extractor.twitter.include
------------------------- -------------------------
Type Type

@ -36,6 +36,7 @@ class TwitterExtractor(Extractor):
self.user = match.group(1) self.user = match.group(1)
def _init(self): def _init(self):
self.unavailable = self.config("unavailable", False)
self.textonly = self.config("text-tweets", False) self.textonly = self.config("text-tweets", False)
self.retweets = self.config("retweets", False) self.retweets = self.config("retweets", False)
self.replies = self.config("replies", True) self.replies = self.config("replies", True)
@ -143,6 +144,15 @@ class TwitterExtractor(Extractor):
def _extract_media(self, tweet, entities, files): def _extract_media(self, tweet, entities, files):
for media in entities: for media in entities:
if "ext_media_availability" in media:
ext = media["ext_media_availability"]
if ext.get("status") == "Unavailable":
self.log.warning("Media unavailable (%s - '%s')",
tweet["id_str"], ext.get("reason"))
if not self.unavailable:
continue
descr = media.get("ext_alt_text") descr = media.get("ext_alt_text")
width = media["original_info"].get("width", 0) width = media["original_info"].get("width", 0)
height = media["original_info"].get("height", 0) height = media["original_info"].get("height", 0)

@ -621,6 +621,14 @@ The Washington Post writes, "Three weeks after the toxic train derailment in Ohi
"content" : "Biden wants to replace lead pipes. He failed to mention that the unfunded mandate sets an almost impossible timeline, will cost billions, infringe on the rights of the States and their residents all for benefits that may be entirely speculative. #sotu https://ag.ks.gov/media-center/news-releases/2024/02/09/kobach-leads-coalition-demanding-biden-drop-unnecessary-epa-rule", "content" : "Biden wants to replace lead pipes. He failed to mention that the unfunded mandate sets an almost impossible timeline, will cost billions, infringe on the rights of the States and their residents all for benefits that may be entirely speculative. #sotu https://ag.ks.gov/media-center/news-releases/2024/02/09/kobach-leads-coalition-demanding-biden-drop-unnecessary-epa-rule",
}, },
{
"#url" : "https://x.com/jsports_motor/status/1801338077618524583",
"#comment" : "geo-restricted video (#5736)",
"#category": ("", "twitter", "tweet"),
"#class" : twitter.TwitterTweetExtractor,
"#count" : 0,
},
{ {
"#url" : "https://twitter.com/playpokemon/status/1263832915173048321/quotes", "#url" : "https://twitter.com/playpokemon/status/1263832915173048321/quotes",
"#category": ("", "twitter", "quotes"), "#category": ("", "twitter", "quotes"),

Loading…
Cancel
Save