diff --git a/docs/configuration.rst b/docs/configuration.rst index 92a56443..d7299664 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -2362,9 +2362,15 @@ extractor.twitter.cards-blacklist Type ``list`` of ``strings`` Example - ``["player", "summary"]`` + ``["summary", "youtube.com", "player:twitch.tv"]`` Description - List of card types to ignore + List of card types to ignore. + + Possible values are + + * card names + * card domains + * ``:`` extractor.twitter.conversations diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 48eb3081..ba0597eb 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -41,7 +41,7 @@ class TwitterExtractor(Extractor): self.quoted = self.config("quoted", False) self.videos = self.config("videos", True) self.cards = self.config("cards", False) - self.cards_blacklist = self.config("cards-blacklist") or () + self.cards_blacklist = self.config("cards-blacklist") self._user = self._user_obj = None self._user_cache = {} self._init_sizes() @@ -180,16 +180,21 @@ class TwitterExtractor(Extractor): card = card["legacy"] name = card["name"].rpartition(":")[2] - if name in self.cards_blacklist: - return + bvals = card["binding_values"] + if isinstance(bvals, list): + bvals = {bval["key"]: bval["value"] + for bval in card["binding_values"]} + + cbl = self.cards_blacklist + if cbl: + if name in cbl: + return + if "vanity_url" in bvals: + domain = bvals["vanity_url"]["string_value"] + if domain in cbl or name + ":" + domain in cbl: + return if name in ("summary", "summary_large_image"): - bvals = card["binding_values"] - if isinstance(bvals, list): - bvals = { - bval["key"]: bval["value"] - for bval in card["binding_values"] - } for prefix in ("photo_image_full_size_", "summary_photo_image_", "thumbnail_image_"): @@ -206,15 +211,7 @@ class TwitterExtractor(Extractor): files.append(value) return elif name == "unified_card": - bvals = card["binding_values"] - if isinstance(bvals, list): - for bval in card["binding_values"]: - if bval["key"] == "unified_card": - bval = bval["value"]["string_value"] - break - else: - bval = bvals["unified_card"]["string_value"] - data = json.loads(bval) + data = json.loads(bvals["unified_card"]["string_value"]) self._extract_media(tweet, data["media_entities"].values(), files) return @@ -761,6 +758,12 @@ class TwitterTweetExtractor(TwitterExtractor): ("https://twitter.com/i/web/status/1466183847628865544", { "count": 0, }), + # 'cards-blacklist' option + ("https://twitter.com/i/web/status/1571141912295243776", { + "options": (("cards", "ytdl"), + ("cards-blacklist", ("twitch.tv",))), + "count": 0, + }), # original retweets (#1026) ("https://twitter.com/jessica_3978/status/1296304589591810048", { "options": (("retweets", "original"),),