gallery-dl/gallery_dl/extractor/twitter.py

# -*- coding: utf-8 -*-

# Copyright 2016-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://twitter.com/"""

from .common import Extractor, Message
from .. import text, util, exception
from ..cache import cache
import itertools
import json
import re

BASE_PATTERN = r"(?:https?://)?(?:www\.|mobile\.)?(?:[fv]x)?twitter\.com"


class TwitterExtractor(Extractor):
    """Base class for twitter extractors"""
    category = "twitter"
    directory_fmt = ("{category}", "{user[name]}")
    filename_fmt = "{tweet_id}_{num}.{extension}"
    archive_fmt = "{tweet_id}_{retweet_id}_{num}"
    cookiedomain = ".twitter.com"
    cookienames = ("auth_token",)
    root = "https://twitter.com"
    browser = "firefox"

    def __init__(self, match):
        Extractor.__init__(self, match)
        self.user = match.group(1)
        self.textonly = self.config("text-tweets", False)
        self.retweets = self.config("retweets", False)
        self.replies = self.config("replies", True)
        self.twitpic = self.config("twitpic", False)
        self.pinned = self.config("pinned", False)
        self.quoted = self.config("quoted", False)
        self.videos = self.config("videos", True)
        self.cards = self.config("cards", False)
        self.cards_blacklist = self.config("cards-blacklist")
        self.syndication = self.config("syndication")

        if not self.config("transform", True):
            self._transform_user = util.identity
            self._transform_tweet = util.identity
        self._user = None
        self._user_obj = None
        self._user_cache = {}
        self._init_sizes()

    def _init_sizes(self):
        size = self.config("size")
        if size is None:
            self._size_image = "orig"
            self._size_fallback = ("4096x4096", "large", "medium", "small")
        else:
            if isinstance(size, str):
                size = size.split(",")
            self._size_image = size[0]
            self._size_fallback = size[1:]

    def items(self):
        self.login()
        self.api = TwitterAPI(self)
        metadata = self.metadata()

        if self.config("expand"):
            tweets = self._expand_tweets(self.tweets())
            self.tweets = lambda : tweets

        if self.config("unique", True):
            seen_tweets = set()
        else:
            seen_tweets = None

        if self.twitpic:
            self._find_twitpic = re.compile(
                r"https?(://twitpic\.com/(?!photos/)\w+)").findall

        for tweet in self.tweets():

            if "legacy" in tweet:
                data = tweet["legacy"]
            else:
                data = tweet

            if not self.retweets and "retweeted_status_id_str" in data:
                self.log.debug("Skipping %s (retweet)", data["id_str"])
                continue
            if not self.quoted and "quoted_by_id_str" in data:
                self.log.debug("Skipping %s (quoted tweet)", data["id_str"])
                continue
            if "in_reply_to_user_id_str" in data and (
                not self.replies or (
                    self.replies == "self" and
                    data["user_id_str"] !=
                    (self._user_obj["rest_id"] if self._user else
                     data["in_reply_to_user_id_str"])
                )
            ):
                self.log.debug("Skipping %s (reply)", data["id_str"])
                continue

            if seen_tweets is not None:
                if data["id_str"] in seen_tweets:
                    self.log.debug(
                        "Skipping %s (previously seen)", data["id_str"])
                    continue
                seen_tweets.add(data["id_str"])

            if "withheld_scope" in data:
                txt = data.get("full_text") or data.get("text") or ""
                self.log.warning("'%s' (%s)", txt, data["id_str"])

            files = []
            if "extended_entities" in data:
                self._extract_media(
                    data, data["extended_entities"]["media"], files)
            if "card" in tweet and self.cards:
                self._extract_card(tweet, files)
            if self.twitpic:
                self._extract_twitpic(data, files)
            if not files and not self.textonly:
                continue

            tdata = self._transform_tweet(tweet)
            tdata.update(metadata)
            tdata["count"] = len(files)
            yield Message.Directory, tdata
            for tdata["num"], file in enumerate(files, 1):
                file.update(tdata)
                url = file.pop("url")
                if "extension" not in file:
                    text.nameext_from_url(url, file)
                yield Message.Url, url, file

    def _extract_media(self, tweet, entities, files):
        for media in entities:
            descr = media.get("ext_alt_text")
            width = media["original_info"].get("width", 0)
            height = media["original_info"].get("height", 0)

            if "video_info" in media:
                if self.videos == "ytdl":
                    files.append({
                        "url": "ytdl:{}/i/web/status/{}".format(
                            self.root, tweet["id_str"]),
                        "width"      : width,
                        "height"     : height,
                        "extension"  : None,
                        "description": descr,
                    })
                elif self.videos:
                    video_info = media["video_info"]
                    variant = max(
                        video_info["variants"],
                        key=lambda v: v.get("bitrate", 0),
                    )
                    files.append({
                        "url"        : variant["url"],
                        "width"      : width,
                        "height"     : height,
                        "bitrate"    : variant.get("bitrate", 0),
                        "duration"   : video_info.get(
                            "duration_millis", 0) / 1000,
                        "description": descr,
                    })
            elif "media_url_https" in media:
                url = media["media_url_https"]
                if url[-4] == ".":
                    base, _, fmt = url.rpartition(".")
                    base += "?format=" + fmt + "&name="
                else:
                    base = url.rpartition("=")[0] + "="
                files.append(text.nameext_from_url(url, {
                    "url"        : base + self._size_image,
                    "width"      : width,
                    "height"     : height,
                    "_fallback"  : self._image_fallback(base),
                    "description": descr,
                }))
            else:
                files.append({"url": media["media_url"]})

    def _image_fallback(self, base):
        for fmt in self._size_fallback:
            yield base + fmt

    def _extract_card(self, tweet, files):
        card = tweet["card"]
        if "legacy" in card:
            card = card["legacy"]

        name = card["name"].rpartition(":")[2]
        bvals = card["binding_values"]
        if isinstance(bvals, list):
            bvals = {bval["key"]: bval["value"]
                     for bval in card["binding_values"]}

        cbl = self.cards_blacklist
        if cbl:
            if name in cbl:
                return
            if "vanity_url" in bvals:
                domain = bvals["vanity_url"]["string_value"]
                if domain in cbl or name + ":" + domain in cbl:
                    return

        if name in ("summary", "summary_large_image"):
            for prefix in ("photo_image_full_size_",
                           "summary_photo_image_",
                           "thumbnail_image_"):
                for size in ("original", "x_large", "large", "small"):
                    key = prefix + size
                    if key in bvals:
                        value = bvals[key].get("image_value")
                        if value and "url" in value:
                            base, sep, size = value["url"].rpartition("&name=")
                            if sep:
                                base += sep
                                value["url"] = base + self._size_image
                                value["_fallback"] = self._image_fallback(base)
                            files.append(value)
                            return
        elif name == "unified_card":
            data = util.json_loads(bvals["unified_card"]["string_value"])
            self._extract_media(tweet, data["media_entities"].values(), files)
            return

        if self.cards == "ytdl":
            tweet_id = tweet.get("rest_id") or tweet["id_str"]
            url = "ytdl:{}/i/web/status/{}".format(self.root, tweet_id)
            files.append({"url": url})

    def _extract_twitpic(self, tweet, files):
        urls = {}

        # collect URLs from entities
        for url in tweet["entities"].get("urls") or ():
            url = url["expanded_url"]
            if "//twitpic.com/" not in url or "/photos/" in url:
                continue
            if url.startswith("http:"):
                url = "https" + url[4:]
            urls[url] = None

        # collect URLs from text
        for url in self._find_twitpic(
                tweet.get("full_text") or tweet.get("text") or ""):
            urls["https" + url] = None

        # extract actual URLs
        for url in urls:
            response = self.request(url, fatal=False)
            if response.status_code >= 400:
                continue
            url = text.extr(response.text, 'name="twitter:image" value="', '"')
            if url:
                files.append({"url": url})

    def _transform_tweet(self, tweet):
        if "author" in tweet:
            author = tweet["author"]
        elif "core" in tweet:
            author = tweet["core"]["user_results"]["result"]
        else:
            author = tweet["user"]
        author = self._transform_user(author)

        if "note_tweet" in tweet:
            note = tweet["note_tweet"]["note_tweet_results"]["result"]
        else:
            note = None

        if "legacy" in tweet:
            tweet = tweet["legacy"]

        tweet_id = int(tweet["id_str"])
        if tweet_id >= 300000000000000:
            date = text.parse_timestamp(
                ((tweet_id >> 22) + 1288834974657) // 1000)
        else:
            date = text.parse_datetime(
                tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")

        tget = tweet.get
        tdata = {
            "tweet_id"      : tweet_id,
            "retweet_id"    : text.parse_int(
                tget("retweeted_status_id_str")),
            "quote_id"      : text.parse_int(
                tget("quoted_by_id_str")),
            "reply_id"      : text.parse_int(
                tget("in_reply_to_status_id_str")),
            "conversation_id": text.parse_int(
                tget("conversation_id_str")),
            "date"          : date,
            "author"        : author,
            "user"          : self._user or author,
            "lang"          : tweet["lang"],
            "favorite_count": tget("favorite_count"),
            "quote_count"   : tget("quote_count"),
            "reply_count"   : tget("reply_count"),
            "retweet_count" : tget("retweet_count"),
        }

        entities = note["entity_set"] if note else tweet["entities"]

        hashtags = entities.get("hashtags")
        if hashtags:
            tdata["hashtags"] = [t["text"] for t in hashtags]

        mentions = entities.get("user_mentions")
        if mentions:
            tdata["mentions"] = [{
                "id": text.parse_int(u["id_str"]),
                "name": u["screen_name"],
                "nick": u["name"],
            } for u in mentions]

        content = text.unescape(
            note["text"] if note else tget("full_text") or tget("text") or "")
        urls = entities.get("urls")
        if urls:
            for url in urls:
                content = content.replace(url["url"], url["expanded_url"])
        txt, _, tco = content.rpartition(" ")
        tdata["content"] = txt if tco.startswith("https://t.co/") else content

        if "in_reply_to_screen_name" in tweet:
            tdata["reply_to"] = tweet["in_reply_to_screen_name"]
        if "quoted_by" in tweet:
            tdata["quote_by"] = tweet["quoted_by"]

        return tdata

    def _transform_user(self, user):
        uid = user.get("rest_id") or user["id_str"]

        try:
            return self._user_cache[uid]
        except KeyError:
            pass

        if "legacy" in user:
            user = user["legacy"]
        elif "statuses_count" not in user and self.syndication == "extended":
            # try to fetch extended user data
            user = self.api.user_by_screen_name(user["screen_name"])["legacy"]

        uget = user.get
        if uget("withheld_scope"):
            self.log.warning("'%s'", uget("description"))

        entities = user["entities"]
        self._user_cache[uid] = udata = {
            "id"              : text.parse_int(uid),
            "name"            : user["screen_name"],
            "nick"            : user["name"],
            "location"        : uget("location"),
            "date"            : text.parse_datetime(
                uget("created_at"), "%a %b %d %H:%M:%S %z %Y"),
            "verified"        : uget("verified", False),
            "profile_banner"  : uget("profile_banner_url", ""),
            "profile_image"   : uget(
                "profile_image_url_https", "").replace("_normal.", "."),
            "favourites_count": uget("favourites_count"),
            "followers_count" : uget("followers_count"),
            "friends_count"   : uget("friends_count"),
            "listed_count"    : uget("listed_count"),
            "media_count"     : uget("media_count"),
            "statuses_count"  : uget("statuses_count"),
        }

        descr = user["description"]
        urls = entities["description"].get("urls")
        if urls:
            for url in urls:
                descr = descr.replace(url["url"], url["expanded_url"])
        udata["description"] = descr

        if "url" in entities:
            url = entities["url"]["urls"][0]
            udata["url"] = url.get("expanded_url") or url.get("url")

        return udata

    def _assign_user(self, user):
        self._user_obj = user
        self._user = self._transform_user(user)

    def _users_result(self, users):
        userfmt = self.config("users")
        if not userfmt or userfmt == "timeline":
            cls = TwitterTimelineExtractor
            fmt = (self.root + "/i/user/{rest_id}").format_map
        elif userfmt == "media":
            cls = TwitterMediaExtractor
            fmt = (self.root + "/id:{rest_id}/media").format_map
        elif userfmt == "tweets":
            cls = TwitterTweetsExtractor
            fmt = (self.root + "/id:{rest_id}/tweets").format_map
        else:
            cls = None
            fmt = userfmt.format_map

        for user in users:
            user["_extractor"] = cls
            yield Message.Queue, fmt(user), user

    def _expand_tweets(self, tweets):
        seen = set()
        for tweet in tweets:
            obj = tweet["legacy"] if "legacy" in tweet else tweet
            cid = obj.get("conversation_id_str")
            if not cid:
                tid = obj["id_str"]
                self.log.warning(
                    "Unable to expand %s (no 'conversation_id')", tid)
                continue
            if cid in seen:
                self.log.debug(
                    "Skipping expansion of %s (previously seen)", cid)
                continue
            seen.add(cid)
            try:
                yield from self.api.tweet_detail(cid)
            except Exception:
                yield tweet

    def _make_tweet(self, user, url, id_str):
        return {
            "id_str": id_str,
            "lang": None,
            "user": user,
            "entities": {},
            "extended_entities": {
                "media": [
                    {
                        "original_info": {},
                        "media_url": url,
                    },
                ],
            },
        }

    def metadata(self):
        """Return general metadata"""
        return {}

    def tweets(self):
        """Yield all relevant tweet objects"""

    def login(self):
        if not self._check_cookies(self.cookienames):
            username, password = self._get_auth_info()
            if username:
                self._update_cookies(_login_impl(self, username, password))


class TwitterTimelineExtractor(TwitterExtractor):
    """Extractor for a Twitter user timeline"""
    subcategory = "timeline"
    pattern = (BASE_PATTERN + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])"
               r"|i(?:/user/|ntent/user\?user_id=)(\d+))")
    test = (
        ("https://twitter.com/supernaturepics", {
            "range": "1-40",
            "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
        }),
        # suspended account (#2216)
        ("https://twitter.com/OptionalTypo", {
            "exception": exception.NotFoundError,
        }),
        # suspended account user ID
        ("https://twitter.com/id:772949683521978368", {
            "exception": exception.NotFoundError,
        }),
        ("https://mobile.twitter.com/supernaturepics?p=i"),
        ("https://www.twitter.com/id:2976459548"),
        ("https://twitter.com/i/user/2976459548"),
        ("https://twitter.com/intent/user?user_id=2976459548"),
        ("https://fxtwitter.com/supernaturepics"),
        ("https://vxtwitter.com/supernaturepics"),
    )

    def __init__(self, match):
        TwitterExtractor.__init__(self, match)
        user_id = match.group(2)
        if user_id:
            self.user = "id:" + user_id

    def tweets(self):
        # yield initial batch of (media) tweets
        tweet = None
        for tweet in self._select_tweet_source()(self.user):
            yield tweet
        if tweet is None:
            return

        # build search query
        query = "from:{} max_id:{}".format(
            self._user["name"], tweet["rest_id"])
        if self.retweets:
            query += " include:retweets include:nativeretweets"

        if not self.textonly:
            # try to search for media-only tweets
            tweet = None
            for tweet in self.api.search_adaptive(query + " filter:links"):
                yield tweet
            if tweet is not None:
                return

        # yield unfiltered search results
        yield from self.api.search_adaptive(query)

    def _select_tweet_source(self):
        strategy = self.config("strategy")
        if strategy is None or strategy == "auto":
            if self.retweets or self.textonly:
                return self.api.user_tweets
            else:
                return self.api.user_media
        if strategy == "tweets":
            return self.api.user_tweets
        if strategy == "with_replies":
            return self.api.user_tweets_and_replies
        return self.api.user_media


class TwitterTweetsExtractor(TwitterExtractor):
    """Extractor for Tweets from a user's Tweets timeline"""
    subcategory = "tweets"
    pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/tweets(?!\w)"
    test = (
        ("https://twitter.com/supernaturepics/tweets", {
            "range": "1-40",
            "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
        }),
        ("https://mobile.twitter.com/supernaturepics/tweets#t"),
        ("https://www.twitter.com/id:2976459548/tweets"),
    )

    def tweets(self):
        return self.api.user_tweets(self.user)


class TwitterRepliesExtractor(TwitterExtractor):
    """Extractor for Tweets from a user's timeline including replies"""
    subcategory = "replies"
    pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/with_replies(?!\w)"
    test = (
        ("https://twitter.com/supernaturepics/with_replies", {
            "range": "1-40",
            "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
        }),
        ("https://mobile.twitter.com/supernaturepics/with_replies#t"),
        ("https://www.twitter.com/id:2976459548/with_replies"),
    )

    def tweets(self):
        return self.api.user_tweets_and_replies(self.user)


class TwitterMediaExtractor(TwitterExtractor):
    """Extractor for Tweets from a user's Media timeline"""
    subcategory = "media"
    pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/media(?!\w)"
    test = (
        ("https://twitter.com/supernaturepics/media", {
            "range": "1-40",
            "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
        }),
        ("https://mobile.twitter.com/supernaturepics/media#t"),
        ("https://www.twitter.com/id:2976459548/media"),
    )

    def tweets(self):
        return self.api.user_media(self.user)


class TwitterLikesExtractor(TwitterExtractor):
    """Extractor for liked tweets"""
    subcategory = "likes"
    pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/likes(?!\w)"
    test = ("https://twitter.com/supernaturepics/likes",)

    def metadata(self):
        return {"user_likes": self.user}

    def tweets(self):
        return self.api.user_likes(self.user)

    def _transform_tweet(self, tweet):
        tdata = TwitterExtractor._transform_tweet(self, tweet)
        tdata["date_liked"] = text.parse_timestamp(
            (int(tweet["sortIndex"]) >> 20) // 1000)
        return tdata


class TwitterBookmarkExtractor(TwitterExtractor):
    """Extractor for bookmarked tweets"""
    subcategory = "bookmark"
    pattern = BASE_PATTERN + r"/i/bookmarks()"
    test = ("https://twitter.com/i/bookmarks",)

    def tweets(self):
        return self.api.user_bookmarks()

    def _transform_tweet(self, tweet):
        tdata = TwitterExtractor._transform_tweet(self, tweet)
        tdata["date_bookmarked"] = text.parse_timestamp(
            (int(tweet["sortIndex"]) >> 20) // 1000)
        return tdata


class TwitterListExtractor(TwitterExtractor):
    """Extractor for Twitter lists"""
    subcategory = "list"
    pattern = BASE_PATTERN + r"/i/lists/(\d+)/?$"
    test = ("https://twitter.com/i/lists/784214683683127296", {
        "range": "1-40",
        "count": 40,
        "archive": False,
    })

    def tweets(self):
        return self.api.list_latest_tweets_timeline(self.user)


class TwitterListMembersExtractor(TwitterExtractor):
    """Extractor for members of a Twitter list"""
    subcategory = "list-members"
    pattern = BASE_PATTERN + r"/i/lists/(\d+)/members"
    test = ("https://twitter.com/i/lists/784214683683127296/members", {
        "pattern": TwitterTimelineExtractor.pattern,
        "range": "1-40",
        "count": 40,
    })

    def items(self):
        self.login()
        return self._users_result(TwitterAPI(self).list_members(self.user))


class TwitterFollowingExtractor(TwitterExtractor):
    """Extractor for followed users"""
    subcategory = "following"
    pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/following(?!\w)"
    test = (
        ("https://twitter.com/supernaturepics/following"),
        ("https://www.twitter.com/id:2976459548/following"),
    )

    def items(self):
        self.login()
        return self._users_result(TwitterAPI(self).user_following(self.user))


class TwitterSearchExtractor(TwitterExtractor):
    """Extractor for Twitter search results"""
    subcategory = "search"
    pattern = BASE_PATTERN + r"/search/?\?(?:[^&#]+&)*q=([^&#]+)"
    test = ("https://twitter.com/search?q=nature", {
        "range": "1-40",
        "count": 40,
        "archive": False,
    })

    def metadata(self):
        return {"search": text.unquote(self.user)}

    def tweets(self):
        query = text.unquote(self.user.replace("+", " "))

        user = None
        for item in query.split():
            item = item.strip("()")
            if item.startswith("from:"):
                if user:
                    user = None
                    break
                else:
                    user = item[5:]

        if user is not None:
            try:
                self._assign_user(self.api.user_by_screen_name(user))
            except KeyError:
                pass

        return self.api.search_adaptive(query)


class TwitterHashtagExtractor(TwitterExtractor):
    """Extractor for Twitter hashtags"""
    subcategory = "hashtag"
    pattern = BASE_PATTERN + r"/hashtag/([^/?#]+)"
    test = ("https://twitter.com/hashtag/nature", {
        "pattern": TwitterSearchExtractor.pattern,
        "url": "3571c3a53b7647ea35517041fdc17f77ec5b2cb9",
    })

    def items(self):
        url = "{}/search?q=%23{}".format(self.root, self.user)
        data = {"_extractor": TwitterSearchExtractor}
        yield Message.Queue, url, data


class TwitterEventExtractor(TwitterExtractor):
    """Extractor for Tweets from a Twitter Event"""
    subcategory = "event"
    directory_fmt = ("{category}", "Events",
                     "{event[id]} {event[short_title]}")
    pattern = BASE_PATTERN + r"/i/events/(\d+)"
    test = ("https://twitter.com/i/events/1484669206993903616", {
        "range": "1-20",
        "count": ">=1",
    })

    def metadata(self):
        return {"event": self.api.live_event(self.user)}

    def tweets(self):
        return self.api.live_event_timeline(self.user)


class TwitterTweetExtractor(TwitterExtractor):
    """Extractor for images from individual tweets"""
    subcategory = "tweet"
    pattern = BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)"
    test = (
        ("https://twitter.com/supernaturepics/status/604341487988576256", {
            "url": "88a40f7d25529c2501c46f2218f9e0de9aa634b4",
            "content": "ab05e1d8d21f8d43496df284d31e8b362cd3bcab",
        }),
        # 4 images
        ("https://twitter.com/perrypumas/status/894001459754180609", {
            "url": "3a2a43dc5fb79dd5432c701d8e55e87c4e551f47",
        }),
        # video
        ("https://twitter.com/perrypumas/status/1065692031626829824", {
            "pattern": r"https://video.twimg.com/ext_tw_video/.+\.mp4\?tag=5",
        }),
        # content with emoji, newlines, hashtags (#338)
        ("https://twitter.com/playpokemon/status/1263832915173048321", {
            "keyword": {"content": (
                r"re:Gear up for #PokemonSwordShieldEX with special Mystery "
                "Gifts! \n\nYou’ll be able to receive four Galarian form "
                "Pokémon with Hidden Abilities, plus some very useful items. "
                "It’s our \\(Mystery\\) Gift to you, Trainers! \n\n❓🎁➡️ "
            )},
        }),
        # Reply to deleted tweet (#403, #838)
        ("https://twitter.com/i/web/status/1170041925560258560", {
            "pattern": r"https://pbs.twimg.com/media/EDzS7VrU0AAFL4_",
        }),
        # 'replies' option (#705)
        ("https://twitter.com/i/web/status/1170041925560258560", {
            "options": (("replies", False),),
            "count": 0,
        }),
        # 'replies' to self (#1254)
        ("https://twitter.com/i/web/status/1424882930803908612", {
            "options": (("replies", "self"),),
            "count": 4,
            "keyword": {"user": {
                "description": "re:business email-- rhettaro.bloom@gmail.com "
                               "patreon- http://patreon.com/Princecanary",
                "url": "http://princecanary.tumblr.com",
            }},
        }),
        ("https://twitter.com/i/web/status/1424898916156284928", {
            "options": (("replies", "self"),),
            "count": 1,
        }),
        # "quoted" option (#854)
        ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", {
            "options": (("quoted", True),),
            "pattern": r"https://pbs\.twimg\.com/media/Ea[KG].+=jpg",
            "count": 8,
        }),
        # quoted tweet (#526, #854)
        ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", {
            "pattern": r"https://pbs\.twimg\.com/media/EaK.+=jpg",
            "count": 4,
        }),
        # different 'user' and 'author' in quoted Tweet (#3922)
        ("https://twitter.com/web/status/1644907989109751810", {
            "keyword": {
                "author": {"id": 321629993         , "name": "Cakes_Comics"},
                "user"  : {"id": 718928225360080897, "name": "StobiesGalaxy"},
            },
        }),
        # TwitPic embeds (#579)
        ("https://twitter.com/i/web/status/112900228289540096", {
            "options": (("twitpic", True), ("cards", False)),
            "pattern": r"https://\w+.cloudfront.net/photos/large/\d+.jpg",
            "count": 2,  # 1 duplicate
        }),
        # TwitPic URL not in 'urls' (#3792)
        ("https://twitter.com/shimoigusaP/status/8138669971", {
            "options": (("twitpic", True),),
            "pattern": r"https://\w+.cloudfront.net/photos/large/\d+.png",
            "count": 1,
        }),
        # Twitter card (#1005)
        ("https://twitter.com/billboard/status/1306599586602135555", {
            "options": (("cards", True),),
            "pattern": r"https://pbs.twimg.com/card_img/\d+/",
        }),
        # unified_card image_website (#2875)
        ("https://twitter.com/i/web/status/1561674543323910144", {
            "options": (("cards", True),),
            "pattern": r"https://pbs\.twimg\.com/media/F.+=jpg",
        }),
        # unified_card image_carousel_website
        ("https://twitter.com/doax_vv_staff/status/1479438945662685184", {
            "options": (("cards", True),),
            "pattern": r"https://pbs\.twimg\.com/media/F.+=png",
            "count": 6,
        }),
        # unified_card video_website (#2875)
        ("https://twitter.com/bang_dream_1242/status/1561548715348746241", {
            "options": (("cards", True),),
            "pattern": r"https://video\.twimg\.com/amplify_video"
                       r"/1560607284333449216/vid/720x720/\w+\.mp4",
        }),
        # unified_card without type
        ("https://twitter.com/i/web/status/1466183847628865544", {
            "count": 0,
        }),
        # 'cards-blacklist' option
        ("https://twitter.com/i/web/status/1571141912295243776", {
            "options": (("cards", "ytdl"),
                        ("cards-blacklist", ("twitch.tv",))),
            "count": 0,
        }),
        # retweet
        ("https://twitter.com/jessica_3978/status/1296304589591810048", {
            "options": (("retweets", True),),
            "count": 2,
            "keyword": {
                "tweet_id"  : 1296304589591810048,
                "retweet_id": 1296296016002547713,
                "date"      : "dt:2020-08-20 04:34:32",
            },
        }),
        # original retweets (#1026)
        ("https://twitter.com/jessica_3978/status/1296304589591810048", {
            "options": (("retweets", "original"),),
            "count": 2,
            "keyword": {
                "tweet_id"  : 1296296016002547713,
                "retweet_id": 1296296016002547713,
                "date"      : "dt:2020-08-20 04:00:28",
            },
        }),
        # all Tweets from a 'conversation' (#1319)
        ("https://twitter.com/supernaturepics/status/604341487988576256", {
            "options": (("conversations", True),),
            "count": 5,
        }),
        # retweet with missing media entities (#1555)
        ("https://twitter.com/morino_ya/status/1392763691599237121", {
            "options": (("retweets", True),),
            "count": 0,  # private
        }),
        # deleted quote tweet (#2225)
        ("https://twitter.com/i/web/status/1460044411165888515", {
            "count": 0,
        }),
        # "Misleading" content
        ("https://twitter.com/i/web/status/1486373748911575046", {
            "count": 4,
        }),
        # age-restricted (#2354)
        ("https://twitter.com/mightbecursed/status/1492954264909479936", {
            "options": (("syndication", True),),
            "keyword": {"date": "dt:2022-02-13 20:10:09"},
            "count": 1,
        }),
        # media alt texts / descriptions (#2617)
        ("https://twitter.com/my0nruri/status/1528379296041299968", {
            "keyword": {"description": "oc"}
        }),
        # '?format=...&name=...'-style URLs
        ("https://twitter.com/poco_dandy/status/1150646424461176832", {
            "options": (("cards", True),),
            "pattern": r"https://pbs.twimg.com/card_img/157\d+/[\w-]+"
                       r"\?format=(jpg|png)&name=orig$",
            "range": "1-2",
        }),
        # note tweet with long 'content'
        ("https://twitter.com/i/web/status/1629193457112686592", {
            "keyword": {
                "content": """\
BREAKING - DEADLY LIES: Independent researchers at Texas A&M University have \
just contradicted federal government regulators, saying that toxic air \
pollutants in East Palestine, Ohio, could pose long-term risks. \n\nThe \
Washington Post writes, "Three weeks after the toxic train derailment in \
Ohio, an analysis of Environmental Protection Agency data has found nine air \
pollutants at levels that could raise long-term health concerns in and around \
East Palestine, according to an independent analysis. \n\n\"The analysis by \
Texas A&M University seems to contradict statements by state and federal \
regulators that air near the crash site is completely safe, despite residents \
complaining about rashes, breathing problems and other health effects." \
Your reaction.""",
            },
        }),
    )

    def __init__(self, match):
        TwitterExtractor.__init__(self, match)
        self.tweet_id = match.group(2)

    def tweets(self):
        if self.config("conversations", False):
            return self._tweets_conversation(self.tweet_id)
        else:
            return self._tweets_single(self.tweet_id)

    def _tweets_single(self, tweet_id):
        tweets = []

        for tweet in self.api.tweet_detail(tweet_id):
            if tweet["rest_id"] == tweet_id or \
                    tweet.get("_retweet_id_str") == tweet_id:
                if self._user_obj is None:
                    self._assign_user(tweet["core"]["user_results"]["result"])
                tweets.append(tweet)

                tweet_id = tweet["legacy"].get("quoted_status_id_str")
                if not tweet_id:
                    break

        return tweets

    def _tweets_conversation(self, tweet_id):
        tweets = self.api.tweet_detail(tweet_id)
        buffer = []

        for tweet in tweets:
            buffer.append(tweet)
            if tweet["rest_id"] == tweet_id or \
                    tweet.get("_retweet_id_str") == tweet_id:
                self._assign_user(tweet["core"]["user_results"]["result"])
                break

        return itertools.chain(buffer, tweets)


class TwitterAvatarExtractor(TwitterExtractor):
    subcategory = "avatar"
    filename_fmt = "avatar {date}.{extension}"
    archive_fmt = "AV_{user[id]}_{date}"
    pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/photo"
    test = (
        ("https://twitter.com/supernaturepics/photo", {
            "pattern": r"https://pbs\.twimg\.com/profile_images"
                       r"/554585280938659841/FLVAlX18\.jpeg",
            "keyword": {
                "date": "dt:2015-01-12 10:26:49",
                "extension": "jpeg",
                "filename": "FLVAlX18",
                "tweet_id": 554585280938659841,
            },
        }),
        ("https://twitter.com/User16/photo", {
            "count": 0,
        }),
    )

    def tweets(self):
        self.api._user_id_by_screen_name(self.user)
        user = self._user_obj
        url = user["legacy"]["profile_image_url_https"]

        if url == ("https://abs.twimg.com/sticky"
                   "/default_profile_images/default_profile_normal.png"):
            return ()

        url = url.replace("_normal.", ".")
        id_str = url.rsplit("/", 2)[1]

        return (self._make_tweet(user, url, id_str),)


class TwitterBackgroundExtractor(TwitterExtractor):
    subcategory = "background"
    filename_fmt = "background {date}.{extension}"
    archive_fmt = "BG_{user[id]}_{date}"
    pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/header_photo"
    test = (
        ("https://twitter.com/supernaturepics/header_photo", {
            "pattern": r"https://pbs\.twimg\.com/profile_banners"
                       r"/2976459548/1421058583",
            "keyword": {
                "date": "dt:2015-01-12 10:29:43",
                "filename": "1421058583",
                "tweet_id": 554586009367478272,
            },
        }),
        ("https://twitter.com/User16/header_photo", {
            "count": 0,
        }),
    )

    def tweets(self):
        self.api._user_id_by_screen_name(self.user)
        user = self._user_obj

        try:
            url = user["legacy"]["profile_banner_url"]
            _, timestamp = url.rsplit("/", 1)
        except (KeyError, ValueError):
            return ()

        id_str = str((int(timestamp) * 1000 - 1288834974657) << 22)
        return (self._make_tweet(user, url, id_str),)


class TwitterImageExtractor(Extractor):
    category = "twitter"
    subcategory = "image"
    pattern = r"https?://pbs\.twimg\.com/media/([\w-]+)(?:\?format=|\.)(\w+)"
    test = (
        ("https://pbs.twimg.com/media/EqcpviCVoAAG-QG?format=jpg&name=orig", {
            "options": (("size", "4096x4096,orig"),),
            "url": "cb3042a6f6826923da98f0d2b66c427e9385114c",
        }),
        ("https://pbs.twimg.com/media/EqcpviCVoAAG-QG.jpg:orig"),
    )

    def __init__(self, match):
        Extractor.__init__(self, match)
        self.id, self.fmt = match.groups()
        TwitterExtractor._init_sizes(self)

    def items(self):
        base = "https://pbs.twimg.com/media/{}?format={}&name=".format(
            self.id, self.fmt)

        data = {
            "filename": self.id,
            "extension": self.fmt,
            "_fallback": TwitterExtractor._image_fallback(self, base),
        }

        yield Message.Directory, data
        yield Message.Url, base + self._size_image, data


class TwitterAPI():

    def __init__(self, extractor):
        self.extractor = extractor

        self.root = "https://api.twitter.com"
        self._nsfw_warning = True
        self._syndication = self.extractor.syndication
        self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode

        cookies = extractor.session.cookies
        cookiedomain = extractor.cookiedomain

        csrf = extractor.config("csrf")
        if csrf is None or csrf == "cookies":
            csrf_token = cookies.get("ct0", domain=cookiedomain)
        else:
            csrf_token = None
        if not csrf_token:
            csrf_token = util.generate_token()
            cookies.set("ct0", csrf_token, domain=cookiedomain)

        auth_token = cookies.get("auth_token", domain=cookiedomain)

        self.headers = {
            "Accept": "*/*",
            "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR"
                             "COuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu"
                             "4FA33AGWWjCpTnA",
            "x-guest-token": None,
            "x-twitter-auth-type": "OAuth2Session" if auth_token else None,
            "x-twitter-client-language": "en",
            "x-twitter-active-user": "yes",
            "x-csrf-token": csrf_token,
            "Origin": "https://twitter.com",
            "Referer": "https://twitter.com/",
        }
        self.params = {
            "include_profile_interstitial_type": "1",
            "include_blocking": "1",
            "include_blocked_by": "1",
            "include_followed_by": "1",
            "include_want_retweets": "1",
            "include_mute_edge": "1",
            "include_can_dm": "1",
            "include_can_media_tag": "1",
            "include_ext_has_nft_avatar": "1",
            "include_ext_is_blue_verified": "1",
            "include_ext_verified_type": "1",
            "skip_status": "1",
            "cards_platform": "Web-12",
            "include_cards": "1",
            "include_ext_alt_text": "true",
            "include_ext_limited_action_results": "false",
            "include_quote_count": "true",
            "include_reply_count": "1",
            "tweet_mode": "extended",
            "include_ext_collab_control": "true",
            "include_ext_views": "true",
            "include_entities": "true",
            "include_user_entities": "true",
            "include_ext_media_color": "true",
            "include_ext_media_availability": "true",
            "include_ext_sensitive_media_warning": "true",
            "include_ext_trusted_friends_metadata": "true",
            "send_error_codes": "true",
            "simple_quoted_tweet": "true",
            "q": None,
            "count": "100",
            "query_source": None,
            "cursor": None,
            "pc": None,
            "spelling_corrections": None,
            "include_ext_edit_control": "true",
            "ext": "mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,"
                   "enrichments,superFollowMetadata,unmentionInfo,editControl,"
                   "collab_control,vibe",
        }
        self.variables = {
            "withDownvotePerspective": False,
            "withReactionsMetadata": False,
            "withReactionsPerspective": False,
        }
        self.features = {
            "blue_business_profile_image_shape_enabled": False,
            "responsive_web_twitter_blue_verified_badge_is_enabled": True,
            "responsive_web_graphql_exclude_directive_enabled": True,
            "verified_phone_label_enabled": False,
            "responsive_web_graphql_skip_user_profile_"
            "image_extensions_enabled": False,
            "responsive_web_graphql_timeline_navigation_enabled": True,
        }
        self.features_pagination = {
            "blue_business_profile_image_shape_enabled": False,
            "responsive_web_twitter_blue_verified_badge_is_enabled": True,
            "responsive_web_graphql_exclude_directive_enabled": True,
            "verified_phone_label_enabled": False,
            "responsive_web_graphql_timeline_navigation_enabled": True,
            "responsive_web_graphql_skip_user_profile_"
            "image_extensions_enabled": False,
            "tweetypie_unmention_optimization_enabled": True,
            "vibe_api_enabled": True,
            "responsive_web_edit_tweet_api_enabled": True,
            "graphql_is_translatable_rweb_tweet_is_translatable_enabled": True,
            "view_counts_everywhere_api_enabled": True,
            "longform_notetweets_consumption_enabled": True,
            "tweet_awards_web_tipping_enabled": False,
            "freedom_of_speech_not_reach_fetch_enabled": False,
            "standardized_nudges_misinfo": True,
            "tweet_with_visibility_results_prefer_gql_"
            "limited_actions_policy_enabled": False,
            "interactive_text_enabled": True,
            "responsive_web_text_conversations_enabled": False,
            "longform_notetweets_richtext_consumption_enabled": False,
            "responsive_web_enhance_cards_enabled": False,
        }

    def tweet_detail(self, tweet_id):
        endpoint = "/graphql/AV_lPTkN6Fc6LgerQpK8Zg/TweetDetail"
        variables = {
            "focalTweetId": tweet_id,
            "referrer": "profile",
            "with_rux_injections": False,
            "includePromotedContent": True,
            "withCommunity": True,
            "withQuickPromoteEligibilityTweetFields": True,
            "withBirdwatchNotes": False,
            "withSuperFollowsUserFields": True,
            "withSuperFollowsTweetFields": True,
            "withVoice": True,
            "withV2Timeline": True,
        }
        return self._pagination_tweets(
            endpoint, variables, ("threaded_conversation_with_injections_v2",))

    def user_tweets(self, screen_name):
        endpoint = "/graphql/BeHK76TOCY3P8nO-FWocjA/UserTweets"
        variables = {
            "userId": self._user_id_by_screen_name(screen_name),
            "count": 100,
            "includePromotedContent": True,
            "withQuickPromoteEligibilityTweetFields": True,
            "withVoice": True,
            "withV2Timeline": True,
        }
        return self._pagination_tweets(endpoint, variables)

    def user_tweets_and_replies(self, screen_name):
        endpoint = "/graphql/eZVlZu_1gwb6hMUDXBnZoQ/UserTweetsAndReplies"
        variables = {
            "userId": self._user_id_by_screen_name(screen_name),
            "count": 100,
            "includePromotedContent": True,
            "withCommunity": True,
            "withVoice": True,
            "withV2Timeline": True,
        }
        return self._pagination_tweets(endpoint, variables)

    def user_media(self, screen_name):
        endpoint = "/graphql/d_ONZLUHGCsErBCriRsLXg/UserMedia"
        variables = {
            "userId": self._user_id_by_screen_name(screen_name),
            "count": 100,
            "includePromotedContent": False,
            "withClientEventToken": False,
            "withBirdwatchNotes": False,
            "withVoice": True,
            "withV2Timeline": True,
        }
        return self._pagination_tweets(endpoint, variables)

    def user_media_legacy(self, screen_name):
        endpoint = "/graphql/nRybED9kRbN-TOWioHq1ng/UserMedia"
        variables = {
            "userId": self._user_id_by_screen_name(screen_name),
            "count": 100,
            "includePromotedContent": False,
            "withSuperFollowsUserFields": True,
            "withBirdwatchPivots": False,
            "withSuperFollowsTweetFields": True,
            "withClientEventToken": False,
            "withBirdwatchNotes": False,
            "withVoice": True,
            "withV2Timeline": False,
            "__fs_interactive_text": False,
            "__fs_dont_mention_me_view_api_enabled": False,
        }
        return self._pagination_tweets(
            endpoint, variables, ("user", "result", "timeline", "timeline"),
            features=False)

    def user_likes(self, screen_name):
        endpoint = "/graphql/fN4-E0MjFJ9Cn7IYConL7g/Likes"
        variables = {
            "userId": self._user_id_by_screen_name(screen_name),
            "count": 100,
            "includePromotedContent": False,
            "withClientEventToken": False,
            "withBirdwatchNotes": False,
            "withVoice": True,
            "withV2Timeline": True,
        }
        return self._pagination_tweets(endpoint, variables)

    def user_bookmarks(self):
        endpoint = "/graphql/RV1g3b8n_SGOHwkqKYSCFw/Bookmarks"
        variables = {
            "count": 100,
        }
        features = self.features_pagination.copy()
        features["graphql_timeline_v2_bookmark_timeline"] = True
        return self._pagination_tweets(
            endpoint, variables, ("bookmark_timeline_v2", "timeline"), False,
            features=features)

    def list_latest_tweets_timeline(self, list_id):
        endpoint = "/graphql/5DAiJG3bD77SiWEs4xViBw/ListLatestTweetsTimeline"
        variables = {
            "listId": list_id,
            "count": 100,
        }
        return self._pagination_tweets(
            endpoint, variables, ("list", "tweets_timeline", "timeline"))

    def search_adaptive(self, query):
        endpoint = "/2/search/adaptive.json"
        params = self.params.copy()
        params["q"] = query
        params["tweet_search_mode"] = "live"
        params["query_source"] = "typed_query"
        params["pc"] = "1"
        params["spelling_corrections"] = "1"
        return self._pagination_legacy(endpoint, params)

    def live_event_timeline(self, event_id):
        endpoint = "/2/live_event/timeline/{}.json".format(event_id)
        params = self.params.copy()
        params["timeline_id"] = "recap"
        params["urt"] = "true"
        params["get_annotations"] = "true"
        return self._pagination_legacy(endpoint, params)

    def live_event(self, event_id):
        endpoint = "/1.1/live_event/1/{}/timeline.json".format(event_id)
        params = self.params.copy()
        params["count"] = "0"
        params["urt"] = "true"
        return (self._call(endpoint, params)
                ["twitter_objects"]["live_events"][event_id])

    def list_by_rest_id(self, list_id):
        endpoint = "/graphql/D0EoyrDcct2MEqC-LnPzFg/ListByRestId"
        params = {
            "variables": self._json_dumps({
                "listId": list_id,
                "withSuperFollowsUserFields": True,
            }),
            "features": self._json_dumps(self.features),
        }
        try:
            return self._call(endpoint, params)["data"]["list"]
        except KeyError:
            raise exception.NotFoundError("list")

    def list_members(self, list_id):
        endpoint = "/graphql/tzsIIbGUH9RyFCVmtO2W2w/ListMembers"
        variables = {
            "listId": list_id,
            "count": 100,
            "withSafetyModeUserFields": True,
        }
        return self._pagination_users(
            endpoint, variables, ("list", "members_timeline", "timeline"))

    def user_following(self, screen_name):
        endpoint = "/graphql/FaBzCqZXuQCb4PhB0RHqHw/Following"
        variables = {
            "userId": self._user_id_by_screen_name(screen_name),
            "count": 100,
            "includePromotedContent": False,
        }
        return self._pagination_users(endpoint, variables)

    def user_by_rest_id(self, rest_id):
        endpoint = "/graphql/S2BkcAyFMG--jef2N6Dgzw/UserByRestId"
        params = {
            "variables": self._json_dumps({
                "userId": rest_id,
                "withSafetyModeUserFields": True,
            }),
            "features": self._json_dumps(self.features),
        }
        return self._call(endpoint, params)["data"]["user"]["result"]

    def user_by_screen_name(self, screen_name):
        endpoint = "/graphql/k26ASEiniqy4eXMdknTSoQ/UserByScreenName"
        params = {
            "variables": self._json_dumps({
                "screen_name": screen_name,
                "withSafetyModeUserFields": True,
            }),
            "features": self._json_dumps(self.features),
        }
        return self._call(endpoint, params)["data"]["user"]["result"]

    def _user_id_by_screen_name(self, screen_name):
        user = ()
        try:
            if screen_name.startswith("id:"):
                user = self.user_by_rest_id(screen_name[3:])
            else:
                user = self.user_by_screen_name(screen_name)
            self.extractor._assign_user(user)
            return user["rest_id"]
        except KeyError:
            if "unavailable_message" in user:
                raise exception.NotFoundError("{} ({})".format(
                    user["unavailable_message"].get("text"),
                    user.get("reason")), False)
            else:
                raise exception.NotFoundError("user")

    @cache(maxage=3600)
    def _guest_token(self):
        endpoint = "/1.1/guest/activate.json"
        self.extractor.log.info("Requesting guest token")
        return str(self._call(endpoint, None, "POST", False)["guest_token"])

    def _authenticate_guest(self):
        guest_token = self._guest_token()
        if guest_token != self.headers["x-guest-token"]:
            self.headers["x-guest-token"] = guest_token
            self.extractor.session.cookies.set(
                "gt", guest_token, domain=self.extractor.cookiedomain)

    def _call(self, endpoint, params, method="GET", auth=True):
        url = self.root + endpoint

        while True:
            if not self.headers["x-twitter-auth-type"] and auth:
                self._authenticate_guest()

            response = self.extractor.request(
                url, method=method, params=params,
                headers=self.headers, fatal=None)

            # update 'x-csrf-token' header (#1170)
            csrf_token = response.cookies.get("ct0")
            if csrf_token:
                self.headers["x-csrf-token"] = csrf_token

            if response.status_code < 400:
                # success
                return response.json()

            if response.status_code == 429:
                # rate limit exceeded
                until = response.headers.get("x-rate-limit-reset")
                seconds = None if until else 60
                self.extractor.wait(until=until, seconds=seconds)
                continue

            if response.status_code == 403 and \
                    not self.headers["x-twitter-auth-type"] and \
                    endpoint == "/2/search/adaptive.json":
                raise exception.AuthorizationError(
                    "Login required to access search results")

            # error
            try:
                data = response.json()
                errors = ", ".join(e["message"] for e in data["errors"])
            except ValueError:
                errors = response.text
            except Exception:
                errors = data.get("errors", "")

            raise exception.StopExtraction(
                "%s %s (%s)", response.status_code, response.reason, errors)

    def _pagination_legacy(self, endpoint, params):
        original_retweets = (self.extractor.retweets == "original")
        bottom = ("cursor-bottom-", "sq-cursor-bottom")

        while True:
            data = self._call(endpoint, params)

            instructions = data["timeline"]["instructions"]
            if not instructions:
                return

            tweets = data["globalObjects"]["tweets"]
            users = data["globalObjects"]["users"]
            tweet_id = cursor = None
            tweet_ids = []
            entries = ()

            # process instructions
            for instr in instructions:
                if "addEntries" in instr:
                    entries = instr["addEntries"]["entries"]
                elif "replaceEntry" in instr:
                    entry = instr["replaceEntry"]["entry"]
                    if entry["entryId"].startswith(bottom):
                        cursor = (entry["content"]["operation"]
                                  ["cursor"]["value"])

            # collect tweet IDs and cursor value
            for entry in entries:
                entry_startswith = entry["entryId"].startswith

                if entry_startswith(("tweet-", "sq-I-t-")):
                    tweet_ids.append(
                        entry["content"]["item"]["content"]["tweet"]["id"])

                elif entry_startswith("homeConversation-"):
                    tweet_ids.extend(
                        entry["content"]["timelineModule"]["metadata"]
                        ["conversationMetadata"]["allTweetIds"][::-1])

                elif entry_startswith(bottom):
                    cursor = entry["content"]["operation"]["cursor"]
                    if not cursor.get("stopOnEmptyResponse", True):
                        # keep going even if there are no tweets
                        tweet_id = True
                    cursor = cursor["value"]

                elif entry_startswith("conversationThread-"):
                    tweet_ids.extend(
                        item["entryId"][6:]
                        for item in entry["content"]["timelineModule"]["items"]
                        if item["entryId"].startswith("tweet-")
                    )

            # process tweets
            for tweet_id in tweet_ids:
                try:
                    tweet = tweets[tweet_id]
                except KeyError:
                    self.extractor.log.debug("Skipping %s (deleted)", tweet_id)
                    continue

                if "retweeted_status_id_str" in tweet:
                    retweet = tweets.get(tweet["retweeted_status_id_str"])
                    if original_retweets:
                        if not retweet:
                            continue
                        retweet["retweeted_status_id_str"] = retweet["id_str"]
                        retweet["_retweet_id_str"] = tweet["id_str"]
                        tweet = retweet
                    elif retweet:
                        tweet["author"] = users[retweet["user_id_str"]]
                        if "extended_entities" in retweet and \
                                "extended_entities" not in tweet:
                            tweet["extended_entities"] = \
                                retweet["extended_entities"]
                tweet["user"] = users[tweet["user_id_str"]]
                yield tweet

                if "quoted_status_id_str" in tweet:
                    quoted = tweets.get(tweet["quoted_status_id_str"])
                    if quoted:
                        quoted = quoted.copy()
                        quoted["author"] = users[quoted["user_id_str"]]
                        quoted["quoted_by"] = tweet["user"]["screen_name"]
                        quoted["quoted_by_id_str"] = tweet["id_str"]
                        yield quoted

            # stop on empty response
            if not cursor or (not tweets and not tweet_id):
                return
            params["cursor"] = cursor

    def _pagination_tweets(self, endpoint, variables,
                           path=None, stop_tweets=True, features=None):
        extr = self.extractor
        variables.update(self.variables)
        original_retweets = (extr.retweets == "original")
        pinned_tweet = extr.pinned

        params = {"variables": None}
        if features is None:
            features = self.features_pagination
        if features:
            params["features"] = self._json_dumps(features)

        while True:
            params["variables"] = self._json_dumps(variables)
            data = self._call(endpoint, params)["data"]

            try:
                if path is None:
                    instructions = (data["user"]["result"]["timeline_v2"]
                                    ["timeline"]["instructions"])
                else:
                    instructions = data
                    for key in path:
                        instructions = instructions[key]
                    instructions = instructions["instructions"]

                for instr in instructions:
                    if instr.get("type") == "TimelineAddEntries":
                        entries = instr["entries"]
                        break
                else:
                    raise KeyError()

            except LookupError:
                extr.log.debug(data)

                user = extr._user_obj
                if user:
                    user = user["legacy"]
                    if user.get("blocked_by"):
                        if self.headers["x-twitter-auth-type"] and \
                                extr.config("logout"):
                            extr._cookiefile = None
                            del extr.session.cookies["auth_token"]
                            self.headers["x-twitter-auth-type"] = None
                            extr.log.info("Retrying API request as guest")
                            continue
                        raise exception.AuthorizationError(
                            "{} blocked your account".format(
                                user["screen_name"]))
                    elif user.get("protected"):
                        raise exception.AuthorizationError(
                            "{}'s Tweets are protected".format(
                                user["screen_name"]))

                raise exception.StopExtraction(
                    "Unable to retrieve Tweets from this timeline")

            tweets = []
            tweet = cursor = None

            if pinned_tweet:
                pinned_tweet = False
                if instructions[-1]["type"] == "TimelinePinEntry":
                    tweets.append(instructions[-1]["entry"])

            for entry in entries:
                esw = entry["entryId"].startswith

                if esw("tweet-"):
                    tweets.append(entry)
                elif esw(("homeConversation-",
                          "profile-conversation-",
                          "conversationthread-")):
                    tweets.extend(entry["content"]["items"])
                elif esw("tombstone-"):
                    item = entry["content"]["itemContent"]
                    item["tweet_results"] = \
                        {"result": {"tombstone": item["tombstoneInfo"]}}
                    tweets.append(entry)
                elif esw("cursor-bottom-"):
                    cursor = entry["content"]
                    if "itemContent" in cursor:
                        cursor = cursor["itemContent"]
                    if not cursor.get("stopOnEmptyResponse", True):
                        # keep going even if there are no tweets
                        tweet = True
                    cursor = cursor.get("value")

            for entry in tweets:
                try:
                    tweet = ((entry.get("content") or entry["item"])
                             ["itemContent"]["tweet_results"]["result"])
                    if "tombstone" in tweet:
                        tweet = self._process_tombstone(
                            entry, tweet["tombstone"])
                        if not tweet:
                            continue
                    if "tweet" in tweet:
                        tweet = tweet["tweet"]
                    legacy = tweet["legacy"]
                    tweet["sortIndex"] = entry.get("sortIndex")
                except KeyError:
                    extr.log.debug(
                        "Skipping %s (deleted)",
                        (entry.get("entryId") or "").rpartition("-")[2])
                    continue

                if "retweeted_status_result" in legacy:
                    retweet = legacy["retweeted_status_result"]["result"]
                    if "tweet" in retweet:
                        retweet = retweet["tweet"]
                    if original_retweets:
                        try:
                            retweet["legacy"]["retweeted_status_id_str"] = \
                                retweet["rest_id"]
                            retweet["_retweet_id_str"] = tweet["rest_id"]
                            tweet = retweet
                        except KeyError:
                            continue
                    else:
                        try:
                            legacy["retweeted_status_id_str"] = \
                                retweet["rest_id"]
                            tweet["author"] = \
                                retweet["core"]["user_results"]["result"]

                            rtlegacy = retweet["legacy"]
                            if "extended_entities" in rtlegacy and \
                                    "extended_entities" not in legacy:
                                legacy["extended_entities"] = \
                                    rtlegacy["extended_entities"]
                            if "withheld_scope" in rtlegacy and \
                                    "withheld_scope" not in legacy:
                                legacy["withheld_scope"] = \
                                    rtlegacy["withheld_scope"]
                                legacy["full_text"] = rtlegacy["full_text"]
                        except KeyError:
                            pass

                yield tweet

                if "quoted_status_result" in tweet:
                    try:
                        quoted = tweet["quoted_status_result"]["result"]
                        quoted["legacy"]["quoted_by"] = (
                            tweet["core"]["user_results"]["result"]
                            ["legacy"]["screen_name"])
                        quoted["legacy"]["quoted_by_id_str"] = tweet["rest_id"]
                        quoted["sortIndex"] = entry.get("sortIndex")

                        yield quoted
                    except KeyError:
                        extr.log.debug(
                            "Skipping quote of %s (deleted)",
                            tweet.get("rest_id"))
                        continue

            if stop_tweets and not tweet:
                return
            if not cursor or cursor == variables.get("cursor"):
                return
            variables["cursor"] = cursor

    def _pagination_users(self, endpoint, variables, path=None):
        variables.update(self.variables)
        params = {"variables": None,
                  "features" : self._json_dumps(self.features_pagination)}

        while True:
            cursor = entry = stop = None
            params["variables"] = self._json_dumps(variables)
            data = self._call(endpoint, params)["data"]

            try:
                if path is None:
                    instructions = (data["user"]["result"]["timeline"]
                                    ["timeline"]["instructions"])
                else:
                    for key in path:
                        data = data[key]
                    instructions = data["instructions"]
            except KeyError:
                return

            for instr in instructions:
                if instr["type"] == "TimelineAddEntries":
                    for entry in instr["entries"]:
                        if entry["entryId"].startswith("user-"):
                            try:
                                user = (entry["content"]["itemContent"]
                                        ["user_results"]["result"])
                            except KeyError:
                                pass
                            else:
                                if "rest_id" in user:
                                    yield user
                        elif entry["entryId"].startswith("cursor-bottom-"):
                            cursor = entry["content"]["value"]
                elif instr["type"] == "TimelineTerminateTimeline":
                    if instr["direction"] == "Bottom":
                        stop = True

            if stop or not cursor or not entry:
                return
            variables["cursor"] = cursor

    def _process_tombstone(self, entry, tombstone):
        text = (tombstone.get("richText") or tombstone["text"])["text"]
        tweet_id = entry["entryId"].rpartition("-")[2]

        if text.startswith("Age-restricted"):
            if self._syndication:
                return self._syndication_tweet(tweet_id)
            elif self._nsfw_warning:
                self._nsfw_warning = False
                self.extractor.log.warning('"%s"', text)

        self.extractor.log.debug("Skipping %s (\"%s\")", tweet_id, text)

    def _syndication_tweet(self, tweet_id):
        base_url = "https://cdn.syndication.twimg.com/tweet-result?id="
        tweet = self.extractor.request(base_url + tweet_id).json()

        tweet["user"]["description"] = ""
        tweet["user"]["entities"] = {"description": {}}
        tweet["user_id_str"] = tweet["user"]["id_str"]

        if tweet["id_str"] != tweet_id:
            tweet["retweeted_status_id_str"] = tweet["id_str"]
            tweet["id_str"] = retweet_id = tweet_id
        else:
            retweet_id = None

        # assume 'conversation_id' is the same as 'id' when the tweet
        # is not a reply
        if "conversation_id_str" not in tweet and \
                "in_reply_to_status_id_str" not in tweet:
            tweet["conversation_id_str"] = tweet["id_str"]

        if int(tweet_id) < 300000000000000:
            tweet["created_at"] = text.parse_datetime(
                tweet["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime(
                "%a %b %d %H:%M:%S +0000 %Y")

        if "video" in tweet:
            video = tweet["video"]
            video["variants"] = (max(
                (v for v in video["variants"] if v["type"] == "video/mp4"),
                key=lambda v: text.parse_int(
                    v["src"].split("/")[-2].partition("x")[0])
            ),)
            video["variants"][0]["url"] = video["variants"][0]["src"]
            tweet["extended_entities"] = {"media": [{
                "video_info"   : video,
                "original_info": {"width" : 0, "height": 0},
            }]}
        elif "photos" in tweet:
            for p in tweet["photos"]:
                p["media_url_https"] = p["url"]
                p["original_info"] = {
                    "width" : p["width"],
                    "height": p["height"],
                }
            tweet["extended_entities"] = {"media": tweet["photos"]}

        return {
            "rest_id": tweet["id_str"],
            "legacy" : tweet,
            "core"   : {"user_results": {"result": tweet["user"]}},
            "_retweet_id_str": retweet_id,
        }


@cache(maxage=360*86400, keyarg=1)
def _login_impl(extr, username, password):

    import re
    import random

    if re.fullmatch(r"[\w.%+-]+@[\w.-]+\.\w{2,}", username):
        extr.log.warning(
            "Login with email is no longer possible. "
            "You need to provide your username or phone number instead.")

    def process(response):
        try:
            data = response.json()
        except ValueError:
            data = {"errors": ({"message": "Invalid response"},)}
        else:
            if response.status_code < 400:
                return data["flow_token"]

        errors = []
        for error in data.get("errors") or ():
            msg = error.get("message")
            errors.append('"{}"'.format(msg) if msg else "Unknown error")
        extr.log.debug(response.text)
        raise exception.AuthenticationError(", ".join(errors))

    extr.session.cookies.clear()
    api = TwitterAPI(extr)
    api._authenticate_guest()
    headers = api.headers

    extr.log.info("Logging in as %s", username)

    # init
    data = {
        "input_flow_data": {
            "flow_context": {
                "debug_overrides": {},
                "start_location": {"location": "unknown"},
            },
        },
        "subtask_versions": {
            "action_list": 2,
            "alert_dialog": 1,
            "app_download_cta": 1,
            "check_logged_in_account": 1,
            "choice_selection": 3,
            "contacts_live_sync_permission_prompt": 0,
            "cta": 7,
            "email_verification": 2,
            "end_flow": 1,
            "enter_date": 1,
            "enter_email": 2,
            "enter_password": 5,
            "enter_phone": 2,
            "enter_recaptcha": 1,
            "enter_text": 5,
            "enter_username": 2,
            "generic_urt": 3,
            "in_app_notification": 1,
            "interest_picker": 3,
            "js_instrumentation": 1,
            "menu_dialog": 1,
            "notifications_permission_prompt": 2,
            "open_account": 2,
            "open_home_timeline": 1,
            "open_link": 1,
            "phone_verification": 4,
            "privacy_options": 1,
            "security_key": 3,
            "select_avatar": 4,
            "select_banner": 2,
            "settings_list": 7,
            "show_code": 1,
            "sign_up": 2,
            "sign_up_review": 4,
            "tweet_selection_urt": 1,
            "update_users": 1,
            "upload_media": 1,
            "user_recommendations_list": 4,
            "user_recommendations_urt": 1,
            "wait_spinner": 3,
            "web_modal": 1,
        },
    }
    url = "https://api.twitter.com/1.1/onboarding/task.json?flow_name=login"
    response = extr.request(url, method="POST", headers=headers, json=data)

    data = {
        "flow_token": process(response),
        "subtask_inputs": [
            {
                "subtask_id": "LoginJsInstrumentationSubtask",
                "js_instrumentation": {
                    "response": "{}",
                    "link": "next_link",
                },
            },
        ],
    }
    url = "https://api.twitter.com/1.1/onboarding/task.json"
    response = extr.request(
        url, method="POST", headers=headers, json=data, fatal=None)

    # username
    data = {
        "flow_token": process(response),
        "subtask_inputs": [
            {
                "subtask_id": "LoginEnterUserIdentifierSSO",
                "settings_list": {
                    "setting_responses": [
                        {
                            "key": "user_identifier",
                            "response_data": {
                                "text_data": {"result": username},
                            },
                        },
                    ],
                    "link": "next_link",
                },
            },
        ],
    }
    #  url = "https://api.twitter.com/1.1/onboarding/task.json"
    extr.sleep(random.uniform(2.0, 4.0), "login (username)")
    response = extr.request(
        url, method="POST", headers=headers, json=data, fatal=None)

    # password
    data = {
        "flow_token": process(response),
        "subtask_inputs": [
            {
                "subtask_id": "LoginEnterPassword",
                "enter_password": {
                    "password": password,
                    "link": "next_link",
                },
            },
        ],
    }
    #  url = "https://api.twitter.com/1.1/onboarding/task.json"
    extr.sleep(random.uniform(2.0, 4.0), "login (password)")
    response = extr.request(
        url, method="POST", headers=headers, json=data, fatal=None)

    # account duplication check ?
    data = {
        "flow_token": process(response),
        "subtask_inputs": [
            {
                "subtask_id": "AccountDuplicationCheck",
                "check_logged_in_account": {
                    "link": "AccountDuplicationCheck_false",
                },
            },
        ],
    }
    #  url = "https://api.twitter.com/1.1/onboarding/task.json"
    response = extr.request(
        url, method="POST", headers=headers, json=data, fatal=None)
    process(response)

    return {
        cookie.name: cookie.value
        for cookie in extr.session.cookies
    }
-												[twitter] add extractor

											
										
										
											8 years ago
+								# -*- coding: utf-8 -*-
-												fix 'keywords' in extractor tests (#3491)

											
										
										
											2 years ago
+								# Copyright 2016-2023 Mike Fährmann
-												[twitter] add extractor

											
										
										
											8 years ago
+								#
 								# This program is free software; you can redistribute it and/or modify
 								# it under the terms of the GNU General Public License version 2 as
 								# published by the Free Software Foundation.
-												[twitter] force old login page layout (fixes #584, fixes #598)

											
										
										
											5 years ago
+								"""Extractors for https://twitter.com/"""
-												[twitter] add extractor

											
										
										
											8 years ago
 								from .common import Extractor, Message
-												add a general 'generate_csrf_token()' function

											
										
										
											4 years ago
+								from .. import text, util, exception
-												[twitter] don't cache results of 'user_by_screen_name()'

A 'keyarg=1' argument to the memcache decorator would have worked as
well, but keeping the user object in memory isn't useful for the vast
majority of use cases and only wastes space.

(closes #817)

											
										
										
											4 years ago
+								from ..cache import cache
-												[twitter] implement constant 'user' for tweet URLs

											
										
										
											2 years ago
+								import itertools
-												[twitter] add 'list-members' extractor (closes #1096)

											
										
										
											4 years ago
+								import json
-												[twitter] extract TwitPic URLs in text (#3792)

also ignore previously seen URLs

											
										
										
											2 years ago
+								import re
-												code adjustments according to pep8 nr2

											
										
										
											8 years ago
-												[nitter] add extractors for Nitter instances (#2696)

											
										
										
											2 years ago
+								BASE_PATTERN = r"(?:https?://)?(?:www\.|mobile\.)?(?:[fv]x)?twitter\.com"
-												[twitter] add support for nitter.net URLs in pattern (#890)

Please note that URLs are only "translated", all requests are still
done always via the Twitter API.
											
										
										
											4 years ago
-												[twitter] add support for user-timelines (closes #96)

also adds a 'retweets' option to filter retweeted content

											
										
										
											6 years ago
+								class TwitterExtractor(Extractor):
 								    """Base class for twitter extractors"""
-												[twitter] add extractor

											
										
										
											8 years ago
+								    category = "twitter"
-												Revert "[twitter] use '{author[name]' in default directory names"

This reverts commit 9ad3cdc5d8ec71d9700714c5bb5151850d0300b5.

											
										
										
											2 years ago
+								    directory_fmt = ("{category}", "{user[name]}")
-												[twitter] metadata cleanup #2

- remove useless clutter by creating new tweet-data dicts instead of
  reusing the original Tweet objects
- rename fields to how they were named before
  ('id_str' -> 'tweet_id', etc.)
- only include 'author' if it would differ from 'user'
- restore 'archive_fmt'

											
										
										
											4 years ago
+								    filename_fmt = "{tweet_id}_{num}.{extension}"
 								    archive_fmt = "{tweet_id}_{retweet_id}_{num}"
-												[twitter] use a simpler data structure to store cookies in cache

Use a dict with name-value pairs instead of an entire
RequestsCookieJar object.

											
										
										
											5 years ago
+								    cookiedomain = ".twitter.com"
-												[twitter] skip login if 'auth_token' cookie is present

											
										
										
											4 years ago
+								    cookienames = ("auth_token",)
-												[twitter] add support for user-timelines (closes #96)

also adds a 'retweets' option to filter retweeted content

											
										
										
											6 years ago
+								    root = "https://twitter.com"
-												[twitter] use "browser": "firefox" by default (#3522)

and reenable TLS 1.2 ciphers

											
										
										
											2 years ago
+								    browser = "firefox"
-												[twitter] add support for user-timelines (closes #96)

also adds a 'retweets' option to filter retweeted content

											
										
										
											6 years ago
-												[twitter] add extractor for media-tweet timelines (#96)

For example "https://twitter.com/PicturesEarth/media".
They are different from normal timelines in that they do not contain
any (re)tweets from other users and feature all media the user ever
posted, including responses to other tweets.

											
										
										
											6 years ago
+								    def __init__(self, match):
-												propagate 'match' to base extractor constructor

											
										
										
											6 years ago
+								        Extractor.__init__(self, match)
-												[twitter] add extractor for media-tweet timelines (#96)

For example "https://twitter.com/PicturesEarth/media".
They are different from normal timelines in that they do not contain
any (re)tweets from other users and feature all media the user ever
posted, including responses to other tweets.

											
										
										
											6 years ago
+								        self.user = match.group(1)
-												[twitter] rename 'text-only' to 'text-tweets' (#570)

											
										
										
											3 years ago
+								        self.textonly = self.config("text-tweets", False)
-												[twitter] change some defaults

- 'retweets' option: true -> false
- 'quoted' option  : true -> false

  i.e. disable downloading tweets from other user's timelines by default

- search directory:
    '["{category}", "Search", "{search}"]' ->
    '["{category}", "{user[name]}"]'

  i.e. change it to the same as other twitter extractors (#1308)

											
										
										
											3 years ago
+								        self.retweets = self.config("retweets", False)
-												[twitter] add 'replies' option (closes #705)

											
										
										
											4 years ago
+								        self.replies = self.config("replies", True)
-												[twitter] add option to extract TwitPic embeds (#579)

											
										
										
											5 years ago
+								        self.twitpic = self.config("twitpic", False)
-												[twitter] add 'pinned' option

											
										
										
											3 years ago
+								        self.pinned = self.config("pinned", False)
-												[twitter] change some defaults

- 'retweets' option: true -> false
- 'quoted' option  : true -> false

  i.e. disable downloading tweets from other user's timelines by default

- search directory:
    '["{category}", "Search", "{search}"]' ->
    '["{category}", "{user[name]}"]'

  i.e. change it to the same as other twitter extractors (#1308)

											
										
										
											3 years ago
+								        self.quoted = self.config("quoted", False)
-												[twitter] change default value for 'videos' to 'true'

Every other 'videos' option defaulted to 'true', except Twitter.

											
										
										
											5 years ago
+								        self.videos = self.config("videos", True)
-												[twitter] disable 'cards' by default

											
										
										
											2 years ago
+								        self.cards = self.config("cards", False)
-												[twitter] improve 'cards-blacklist' (#2875)

allow blacklisting domains and 'name:domain',
where 'domain' depends on a card's 'vanity_url' value

											
										
										
											2 years ago
+								        self.cards_blacklist = self.config("cards-blacklist")
-												[twitter] implement 'syndication=extended'

to be able to fetch extended user metadata

											
										
										
											2 years ago
+								        self.syndication = self.config("syndication")
-												[twitter] add 'transform' option

											
										
										
											2 years ago
 								        if not self.config("transform", True):
 								            self._transform_user = util.identity
 								            self._transform_tweet = util.identity
-												[twitter] do not overwrite previously assigned users (#3922)

											
										
										
											1 year ago
+								        self._user = None
 								        self._user_obj = None
-												[twitter] metadata cleanup #2

- remove useless clutter by creating new tweet-data dicts instead of
  reusing the original Tweet objects
- rename fields to how they were named before
  ('id_str' -> 'tweet_id', etc.)
- only include 'author' if it would differ from 'user'
- restore 'archive_fmt'

											
										
										
											4 years ago
+								        self._user_cache = {}
-												[twitter] fix extractor for direct image links (fixes #2030)

											
										
										
											3 years ago
+								        self._init_sizes()
-												[twitter] add experimental 'videos' option (#99)

Enabling this option will detect videos in tweets and output them as
"unsupported" URLs, so that these can then be downloaded with youtube-dl

There are a lot of improvements to be made to the current
implementation, but it works and does what it is supposed to, even if
inefficient as can be ...

											
										
										
											6 years ago
-												[twitter] fix extractor for direct image links (fixes #2030)

											
										
										
											3 years ago
+								    def _init_sizes(self):
-												[twitter] add 'size' option (#1881)

											
										
										
											3 years ago
+								        size = self.config("size")
 								        if size is None:
 								            self._size_image = "orig"
-												[twitter] include '4096x4096' as a default image fallback

(closes #2107, closes #1881)

											
										
										
											3 years ago
+								            self._size_fallback = ("4096x4096", "large", "medium", "small")
-												[twitter] add 'size' option (#1881)

											
										
										
											3 years ago
+								        else:
 								            if isinstance(size, str):
 								                size = size.split(",")
 								            self._size_image = size[0]
 								            self._size_fallback = size[1:]
-												[twitter] add support for user-timelines (closes #96)

also adds a 'retweets' option to filter retweeted content

											
										
										
											6 years ago
+								    def items(self):
-												[twitter] add login support (#214)

											
										
										
											6 years ago
+								        self.login()
-												[twitter] simplify

- use dict with common GraphQL variables
- reduce 'variables' size with custom JSON encoder instance
- centralise TwitterAPI() creation

											
										
										
											3 years ago
+								        self.api = TwitterAPI(self)
-												[twitter] improve

- update metadata structure
  - combine all user… entries into their own dict
  - let 'user' always specify the Timeline owner
  - add 'author' entry that specifies the original Tweet author
- create directories per post (closes #491)
- fix username issues with /i/web/ URLs

											
										
										
											5 years ago
+								        metadata = self.metadata()
-												[twitter] add support for user-timelines (closes #96)

also adds a 'retweets' option to filter retweeted content

											
										
										
											6 years ago
-												[twitter] implement 'expand' option (#2665)

											
										
										
											2 years ago
+								        if self.config("expand"):
 								            tweets = self._expand_tweets(self.tweets())
 								            self.tweets = lambda : tweets
-												[twitter] ignore previously seen Tweets (#2712)

occurs primarily for /with_replies results when logged in

											
										
										
											2 years ago
+								        if self.config("unique", True):
 								            seen_tweets = set()
 								        else:
 								            seen_tweets = None
-												[twitter] extract TwitPic URLs in text (#3792)

also ignore previously seen URLs

											
										
										
											2 years ago
+								        if self.twitpic:
 								            self._find_twitpic = re.compile(
-												[twitter] optimize '_extract_twitpic()'

- use findall instead of finditer
- store URLs in a dict to discard duplicates

											
										
										
											1 year ago
+								                r"https?(://twitpic\.com/(?!photos/)\w+)").findall
-												[twitter] extract TwitPic URLs in text (#3792)

also ignore previously seen URLs

											
										
										
											2 years ago
-												[twitter] add support for user-timelines (closes #96)

also adds a 'retweets' option to filter retweeted content

											
										
										
											6 years ago
+								        for tweet in self.tweets():
-												[twitter] restore TwitPic support

											
										
										
											4 years ago
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								            if "legacy" in tweet:
 								                data = tweet["legacy"]
 								            else:
 								                data = tweet
 								            if not self.retweets and "retweeted_status_id_str" in data:
 								                self.log.debug("Skipping %s (retweet)", data["id_str"])
-												[twitter] add debug messages for all skipped Tweets (#867)

											
										
										
											4 years ago
+								                continue
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								            if not self.quoted and "quoted_by_id_str" in data:
 								                self.log.debug("Skipping %s (quoted tweet)", data["id_str"])
-												[twitter] rewrite; use new interface (#740, #806)

Everything except logging in with username & password and TwitPic
embeds should be working again.

Metadata per Tweet is massively different than before (mostly raw API
responses - might need some cleaning up) and the default 'archive_fmt'
changed.

											
										
										
											4 years ago
+								                continue
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								            if "in_reply_to_user_id_str" in data and (
-												[twitter] extend 'replies' option (#1254)

Allow setting 'replies to '"self"' to only download from self-replies.

											
										
										
											3 years ago
+								                not self.replies or (
 								                    self.replies == "self" and
-												[twitter] update 'user' and 'author' fields

- 'author' is always the user who authored a tweet
- 'user' is always the user specified in the input URL
  or equal to 'author' when the former is not given

											
										
										
											2 years ago
+								                    data["user_id_str"] !=
 								                    (self._user_obj["rest_id"] if self._user else
 								                     data["in_reply_to_user_id_str"])
-												[twitter] extend 'replies' option (#1254)

Allow setting 'replies to '"self"' to only download from self-replies.

											
										
										
											3 years ago
+								                )
 								            ):
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								                self.log.debug("Skipping %s (reply)", data["id_str"])
-												[twitter] extend 'replies' option (#1254)

Allow setting 'replies to '"self"' to only download from self-replies.

											
										
										
											3 years ago
+								                continue
-												[twitter] rewrite; use new interface (#740, #806)

Everything except logging in with username & password and TwitPic
embeds should be working again.

Metadata per Tweet is massively different than before (mostly raw API
responses - might need some cleaning up) and the default 'archive_fmt'
changed.

											
										
										
											4 years ago
-												[twitter] prioritize tweet type checks (#3439)

Do not consider a tweet seen before applying 'retweet', 'quote' and
'reply' checks. Otherwise the original tweets will also be skipped if
the "derivative" tweets and the original tweets are from the same user.

											
										
										
											2 years ago
+								            if seen_tweets is not None:
 								                if data["id_str"] in seen_tweets:
 								                    self.log.debug(
 								                        "Skipping %s (previously seen)", data["id_str"])
 								                    continue
 								                seen_tweets.add(data["id_str"])
-												[twitter] warn about 'withheld' Tweets and users (#3864)

											
										
										
											1 year ago
+								            if "withheld_scope" in data:
 								                txt = data.get("full_text") or data.get("text") or ""
 								                self.log.warning("'%s' (%s)", txt, data["id_str"])
-												[twitter] support media from Cards (#1005, #937)

Can be enabled with 'extractor.twitter.cards', but for now disabled by
default because cards can redirect to rather large videos from YouTube
or Twitch.

											
										
										
											4 years ago
+								            files = []
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								            if "extended_entities" in data:
-												[twitter] support "image_carousel_website" unified cards

											
										
										
											3 years ago
+								                self._extract_media(
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								                    data, data["extended_entities"]["media"], files)
-												[twitter] support media from Cards (#1005, #937)

Can be enabled with 'extractor.twitter.cards', but for now disabled by
default because cards can redirect to rather large videos from YouTube
or Twitch.

											
										
										
											4 years ago
+								            if "card" in tweet and self.cards:
 								                self._extract_card(tweet, files)
-												[twitter] restore TwitPic support

											
										
										
											4 years ago
+								            if self.twitpic:
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								                self._extract_twitpic(data, files)
-												[twitter] add 'text-only' option (#570)

											
										
										
											3 years ago
+								            if not files and not self.textonly:
-												[twitter] add support for user-timelines (closes #96)

also adds a 'retweets' option to filter retweeted content

											
										
										
											6 years ago
+								                continue
-												[twitter] metadata cleanup #2

- remove useless clutter by creating new tweet-data dicts instead of
  reusing the original Tweet objects
- rename fields to how they were named before
  ('id_str' -> 'tweet_id', etc.)
- only include 'author' if it would differ from 'user'
- restore 'archive_fmt'

											
										
										
											4 years ago
+								            tdata = self._transform_tweet(tweet)
 								            tdata.update(metadata)
-												[twitter] add 'count' metadata field (#2741)

											
										
										
											2 years ago
+								            tdata["count"] = len(files)
-												[twitter] metadata cleanup #2

- remove useless clutter by creating new tweet-data dicts instead of
  reusing the original Tweet objects
- rename fields to how they were named before
  ('id_str' -> 'tweet_id', etc.)
- only include 'author' if it would differ from 'user'
- restore 'archive_fmt'

											
										
										
											4 years ago
+								            yield Message.Directory, tdata
-												[twitter] support media from Cards (#1005, #937)

Can be enabled with 'extractor.twitter.cards', but for now disabled by
default because cards can redirect to rather large videos from YouTube
or Twitch.

											
										
										
											4 years ago
+								            for tdata["num"], file in enumerate(files, 1):
 								                file.update(tdata)
 								                url = file.pop("url")
 								                if "extension" not in file:
 								                    text.nameext_from_url(url, file)
 								                yield Message.Url, url, file
-												[twitter] support "image_carousel_website" unified cards

											
										
										
											3 years ago
+								    def _extract_media(self, tweet, entities, files):
 								        for media in entities:
-												[twitter] extract alt texts as 'description' (closes #2617)

											
										
										
											2 years ago
+								            descr = media.get("ext_alt_text")
-												[twitter] update GraphQL endpoint & fix width/height entries

											
										
										
											4 years ago
+								            width = media["original_info"].get("width", 0)
 								            height = media["original_info"].get("height", 0)
-												[twitter] support media from Cards (#1005, #937)

Can be enabled with 'extractor.twitter.cards', but for now disabled by
default because cards can redirect to rather large videos from YouTube
or Twitch.

											
										
										
											4 years ago
 								            if "video_info" in media:
 								                if self.videos == "ytdl":
 								                    files.append({
 								                        "url": "ytdl:{}/i/web/status/{}".format(
 								                            self.root, tweet["id_str"]),
-												[twitter] extract alt texts as 'description' (closes #2617)

											
										
										
											2 years ago
+								                        "width"      : width,
 								                        "height"     : height,
 								                        "extension"  : None,
 								                        "description": descr,
-												[twitter] support media from Cards (#1005, #937)

Can be enabled with 'extractor.twitter.cards', but for now disabled by
default because cards can redirect to rather large videos from YouTube
or Twitch.

											
										
										
											4 years ago
+								                    })
 								                elif self.videos:
 								                    video_info = media["video_info"]
 								                    variant = max(
 								                        video_info["variants"],
 								                        key=lambda v: v.get("bitrate", 0),
 								                    )
 								                    files.append({
-												[twitter] extract alt texts as 'description' (closes #2617)

											
										
										
											2 years ago
+								                        "url"        : variant["url"],
 								                        "width"      : width,
 								                        "height"     : height,
 								                        "bitrate"    : variant.get("bitrate", 0),
 								                        "duration"   : video_info.get(
-												[twitter] support media from Cards (#1005, #937)

Can be enabled with 'extractor.twitter.cards', but for now disabled by
default because cards can redirect to rather large videos from YouTube
or Twitch.

											
										
										
											4 years ago
+								                            "duration_millis", 0) / 1000,
-												[twitter] extract alt texts as 'description' (closes #2617)

											
										
										
											2 years ago
+								                        "description": descr,
-												[twitter] support media from Cards (#1005, #937)

Can be enabled with 'extractor.twitter.cards', but for now disabled by
default because cards can redirect to rather large videos from YouTube
or Twitch.

											
										
										
											4 years ago
+								                    })
 								            elif "media_url_https" in media:
 								                url = media["media_url_https"]
-												[twitter] fix new-style '/card_img/' URLs

											
										
										
											2 years ago
+								                if url[-4] == ".":
 								                    base, _, fmt = url.rpartition(".")
 								                    base += "?format=" + fmt + "&name="
 								                else:
 								                    base = url.rpartition("=")[0] + "="
-												[twitter] support media from Cards (#1005, #937)

Can be enabled with 'extractor.twitter.cards', but for now disabled by
default because cards can redirect to rather large videos from YouTube
or Twitch.

											
										
										
											4 years ago
+								                files.append(text.nameext_from_url(url, {
-												[twitter] extract alt texts as 'description' (closes #2617)

											
										
										
											2 years ago
+								                    "url"        : base + self._size_image,
 								                    "width"      : width,
 								                    "height"     : height,
 								                    "_fallback"  : self._image_fallback(base),
 								                    "description": descr,
-												[twitter] support media from Cards (#1005, #937)

Can be enabled with 'extractor.twitter.cards', but for now disabled by
default because cards can redirect to rather large videos from YouTube
or Twitch.

											
										
										
											4 years ago
+								                }))
 								            else:
 								                files.append({"url": media["media_url"]})
-												[twitter] add 'size' option (#1881)

											
										
										
											3 years ago
+								    def _image_fallback(self, base):
 								        for fmt in self._size_fallback:
 								            yield base + fmt
-												[twitter] update image URL format (#1145)

use
'/<name>?format=<fmt>&name=<size>'
instead of the potentially deprecated
'/<name>.<fmt>:<size>'

but keep all of them as fallback URLs

											
										
										
											4 years ago
-												[twitter] support media from Cards (#1005, #937)

Can be enabled with 'extractor.twitter.cards', but for now disabled by
default because cards can redirect to rather large videos from YouTube
or Twitch.

											
										
										
											4 years ago
+								    def _extract_card(self, tweet, files):
 								        card = tweet["card"]
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								        if "legacy" in card:
 								            card = card["legacy"]
-												[twitter] add 'cards-blacklist' option (#2875)

											
										
										
											2 years ago
 								        name = card["name"].rpartition(":")[2]
-												[twitter] improve 'cards-blacklist' (#2875)

allow blacklisting domains and 'name:domain',
where 'domain' depends on a card's 'vanity_url' value

											
										
										
											2 years ago
+								        bvals = card["binding_values"]
 								        if isinstance(bvals, list):
 								            bvals = {bval["key"]: bval["value"]
 								                     for bval in card["binding_values"]}
 								        cbl = self.cards_blacklist
 								        if cbl:
 								            if name in cbl:
 								                return
 								            if "vanity_url" in bvals:
 								                domain = bvals["vanity_url"]["string_value"]
 								                if domain in cbl or name + ":" + domain in cbl:
 								                    return
-												[twitter] support "image_carousel_website" unified cards

											
										
										
											3 years ago
 								        if name in ("summary", "summary_large_image"):
-												[twitter] support media from Cards (#1005, #937)

Can be enabled with 'extractor.twitter.cards', but for now disabled by
default because cards can redirect to rather large videos from YouTube
or Twitch.

											
										
										
											4 years ago
+								            for prefix in ("photo_image_full_size_",
 								                           "summary_photo_image_",
 								                           "thumbnail_image_"):
 								                for size in ("original", "x_large", "large", "small"):
 								                    key = prefix + size
 								                    if key in bvals:
-												[twitter] ensure card entries have a 'url' (#1868)

											
										
										
											3 years ago
+								                        value = bvals[key].get("image_value")
 								                        if value and "url" in value:
-												[twitter] provide fallback URLs for card images

https://github.com/mikf/gallery-dl/commit/f2e8aedd746bd570a79d0289aaad68a00dbcf9f9#commitcomment-64057751

											
										
										
											3 years ago
+								                            base, sep, size = value["url"].rpartition("&name=")
 								                            if sep:
 								                                base += sep
 								                                value["url"] = base + self._size_image
 								                                value["_fallback"] = self._image_fallback(base)
-												[twitter] ensure card entries have a 'url' (#1868)

											
										
										
											3 years ago
+								                            files.append(value)
 								                            return
-												[twitter] support "image_carousel_website" unified cards

											
										
										
											3 years ago
+								        elif name == "unified_card":
-												replace json.loads with direct calls to JSONDecoder.decode

											
										
										
											2 years ago
+								            data = util.json_loads(bvals["unified_card"]["string_value"])
-												[twitter] general support for unified cards (#2875)

just removing the 'type' check seems to work

											
										
										
											2 years ago
+								            self._extract_media(tweet, data["media_entities"].values(), files)
 								            return
-												[twitter] support "image_carousel_website" unified cards

											
										
										
											3 years ago
-												[twitter] changes to 'cards' option

- change default value to 'true'
- only invoke youtube-dl for cards unsupported by gallery
  when 'cards' is set to "ytdl"

"cards": true   --> only download card images
"cards": "ytdl" --> download card images and
                    use youtube_dl on otherwise unsupported cards

											
										
										
											3 years ago
+								        if self.cards == "ytdl":
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								            tweet_id = tweet.get("rest_id") or tweet["id_str"]
 								            url = "ytdl:{}/i/web/status/{}".format(self.root, tweet_id)
-												[twitter] support media from Cards (#1005, #937)

Can be enabled with 'extractor.twitter.cards', but for now disabled by
default because cards can redirect to rather large videos from YouTube
or Twitch.

											
										
										
											4 years ago
+								            files.append({"url": url})
 								    def _extract_twitpic(self, tweet, files):
-												[twitter] optimize '_extract_twitpic()'

- use findall instead of finditer
- store URLs in a dict to discard duplicates

											
										
										
											1 year ago
+								        urls = {}
 								        # collect URLs from entities
-												[twitter] extract TwitPic URLs in text (#3792)

also ignore previously seen URLs

											
										
										
											2 years ago
+								        for url in tweet["entities"].get("urls") or ():
-												[twitter] restore TwitPic support

											
										
										
											4 years ago
+								            url = url["expanded_url"]
-												[twitter] refactor extraction of TwitPic URLs

flattening

											
										
										
											2 years ago
+								            if "//twitpic.com/" not in url or "/photos/" in url:
 								                continue
-												[twitter] improve 'http' -> 'https' replacement

											
										
										
											2 years ago
+								            if url.startswith("http:"):
 								                url = "https" + url[4:]
-												[twitter] optimize '_extract_twitpic()'

- use findall instead of finditer
- store URLs in a dict to discard duplicates

											
										
										
											1 year ago
+								            urls[url] = None
 								        # collect URLs from text
 								        for url in self._find_twitpic(
 								                tweet.get("full_text") or tweet.get("text") or ""):
 								            urls["https" + url] = None
 								        # extract actual URLs
-												[twitter] extract TwitPic URLs in text (#3792)

also ignore previously seen URLs

											
										
										
											2 years ago
+								        for url in urls:
-												[twitter] improve 'http' -> 'https' replacement

											
										
										
											2 years ago
+								            response = self.request(url, fatal=False)
 								            if response.status_code >= 400:
-												[twitter] refactor extraction of TwitPic URLs

flattening

											
										
										
											2 years ago
+								                continue
-												[twitter] improve 'http' -> 'https' replacement

											
										
										
											2 years ago
+								            url = text.extr(response.text, 'name="twitter:image" value="', '"')
-												[twitter] refactor extraction of TwitPic URLs

flattening

											
										
										
											2 years ago
+								            if url:
 								                files.append({"url": url})
-												[twitter] restore TwitPic support

											
										
										
											4 years ago
-												[twitter] metadata cleanup #2

- remove useless clutter by creating new tweet-data dicts instead of
  reusing the original Tweet objects
- rename fields to how they were named before
  ('id_str' -> 'tweet_id', etc.)
- only include 'author' if it would differ from 'user'
- restore 'archive_fmt'

											
										
										
											4 years ago
+								    def _transform_tweet(self, tweet):
-												[twitter] update 'user' and 'author' fields

- 'author' is always the user who authored a tweet
- 'user' is always the user specified in the input URL
  or equal to 'author' when the former is not given

											
										
										
											2 years ago
+								        if "author" in tweet:
 								            author = tweet["author"]
 								        elif "core" in tweet:
 								            author = tweet["core"]["user_results"]["result"]
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								        else:
-												[twitter] update 'user' and 'author' fields

- 'author' is always the user who authored a tweet
- 'user' is always the user specified in the input URL
  or equal to 'author' when the former is not given

											
										
										
											2 years ago
+								            author = tweet["user"]
 								        author = self._transform_user(author)
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
-												[twitter] support 'note_tweet's

											
										
										
											2 years ago
+								        if "note_tweet" in tweet:
 								            note = tweet["note_tweet"]["note_tweet_results"]["result"]
 								        else:
 								            note = None
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								        if "legacy" in tweet:
 								            tweet = tweet["legacy"]
-												[twitter] calculate 'date' from Tweet IDs

20 times faster than parsing 'created_at'

											
										
										
											1 year ago
+								        tweet_id = int(tweet["id_str"])
 								        if tweet_id >= 300000000000000:
 								            date = text.parse_timestamp(
 								                ((tweet_id >> 22) + 1288834974657) // 1000)
 								        else:
 								            date = text.parse_datetime(
 								                tweet["created_at"], "%a %b %d %H:%M:%S %z %Y")
-												[twitter] add 'syndication' option (#2354)

to fetch age-restricted content using Twitter's  syndication API

											
										
										
											3 years ago
+								        tget = tweet.get
-												[twitter] metadata cleanup #2

- remove useless clutter by creating new tweet-data dicts instead of
  reusing the original Tweet objects
- rename fields to how they were named before
  ('id_str' -> 'tweet_id', etc.)
- only include 'author' if it would differ from 'user'
- restore 'archive_fmt'

											
										
										
											4 years ago
+								        tdata = {
-												[twitter] calculate 'date' from Tweet IDs

20 times faster than parsing 'created_at'

											
										
										
											1 year ago
+								            "tweet_id"      : tweet_id,
-												[twitter] metadata cleanup #2

- remove useless clutter by creating new tweet-data dicts instead of
  reusing the original Tweet objects
- rename fields to how they were named before
  ('id_str' -> 'tweet_id', etc.)
- only include 'author' if it would differ from 'user'
- restore 'archive_fmt'

											
										
										
											4 years ago
+								            "retweet_id"    : text.parse_int(
-												[twitter] add 'syndication' option (#2354)

to fetch age-restricted content using Twitter's  syndication API

											
										
										
											3 years ago
+								                tget("retweeted_status_id_str")),
-												[twitter] metadata cleanup #2

- remove useless clutter by creating new tweet-data dicts instead of
  reusing the original Tweet objects
- rename fields to how they were named before
  ('id_str' -> 'tweet_id', etc.)
- only include 'author' if it would differ from 'user'
- restore 'archive_fmt'

											
										
										
											4 years ago
+								            "quote_id"      : text.parse_int(
-												[twitter] update 'quote_id' and 'quote_by'

- 'quote_id' is now non-null for quoted Tweets and has the ID of the
  quoting Tweet, instead the other way round like before
- 'quote_by' is now the 'screen_name' of the quoting user
  (was the same the new 'quote_id' is now)

											
										
										
											2 years ago
+								                tget("quoted_by_id_str")),
-												[twitter] metadata cleanup #2

- remove useless clutter by creating new tweet-data dicts instead of
  reusing the original Tweet objects
- rename fields to how they were named before
  ('id_str' -> 'tweet_id', etc.)
- only include 'author' if it would differ from 'user'
- restore 'archive_fmt'

											
										
										
											4 years ago
+								            "reply_id"      : text.parse_int(
-												[twitter] add 'syndication' option (#2354)

to fetch age-restricted content using Twitter's  syndication API

											
										
										
											3 years ago
+								                tget("in_reply_to_status_id_str")),
-												[twitter] extract 'conversation_id' metadata (#3839)

											
										
										
											1 year ago
+								            "conversation_id": text.parse_int(
 								                tget("conversation_id_str")),
-												[twitter] calculate 'date' from Tweet IDs

20 times faster than parsing 'created_at'

											
										
										
											1 year ago
+								            "date"          : date,
-												[twitter] update 'user' and 'author' fields

- 'author' is always the user who authored a tweet
- 'user' is always the user specified in the input URL
  or equal to 'author' when the former is not given

											
										
										
											2 years ago
+								            "author"        : author,
-												[twitter] calculate 'date' from Tweet IDs

20 times faster than parsing 'created_at'

											
										
										
											1 year ago
+								            "user"          : self._user or author,
-												[twitter] metadata cleanup #2

- remove useless clutter by creating new tweet-data dicts instead of
  reusing the original Tweet objects
- rename fields to how they were named before
  ('id_str' -> 'tweet_id', etc.)
- only include 'author' if it would differ from 'user'
- restore 'archive_fmt'

											
										
										
											4 years ago
+								            "lang"          : tweet["lang"],
-												[twitter] add 'syndication' option (#2354)

to fetch age-restricted content using Twitter's  syndication API

											
										
										
											3 years ago
+								            "favorite_count": tget("favorite_count"),
 								            "quote_count"   : tget("quote_count"),
 								            "reply_count"   : tget("reply_count"),
 								            "retweet_count" : tget("retweet_count"),
-												[twitter] metadata cleanup #2

- remove useless clutter by creating new tweet-data dicts instead of
  reusing the original Tweet objects
- rename fields to how they were named before
  ('id_str' -> 'tweet_id', etc.)
- only include 'author' if it would differ from 'user'
- restore 'archive_fmt'

											
										
										
											4 years ago
+								        }
-												[twitter] support 'note_tweet's

											
										
										
											2 years ago
+								        entities = note["entity_set"] if note else tweet["entities"]
-												[twitter] metadata cleanup #2

- remove useless clutter by creating new tweet-data dicts instead of
  reusing the original Tweet objects
- rename fields to how they were named before
  ('id_str' -> 'tweet_id', etc.)
- only include 'author' if it would differ from 'user'
- restore 'archive_fmt'

											
										
										
											4 years ago
+								        hashtags = entities.get("hashtags")
 								        if hashtags:
 								            tdata["hashtags"] = [t["text"] for t in hashtags]
 								        mentions = entities.get("user_mentions")
 								        if mentions:
 								            tdata["mentions"] = [{
 								                "id": text.parse_int(u["id_str"]),
 								                "name": u["screen_name"],
 								                "nick": u["name"],
 								            } for u in mentions]
-												[twitter] support 'note_tweet's

											
										
										
											2 years ago
+								        content = text.unescape(
 								            note["text"] if note else tget("full_text") or tget("text") or "")
-												[twitter] resolve t.co URLs in 'content' (#1532)

											
										
										
											3 years ago
+								        urls = entities.get("urls")
 								        if urls:
 								            for url in urls:
 								                content = content.replace(url["url"], url["expanded_url"])
-												[twitter] strip useless t.co links (#1532)

The 'full_text' of Tweets with media content usually ends with a t.co
link to itself. This commit removes those.

											
										
										
											3 years ago
+								        txt, _, tco = content.rpartition(" ")
 								        tdata["content"] = txt if tco.startswith("https://t.co/") else content
-												[twitter] resolve t.co URLs in 'content' (#1532)

											
										
										
											3 years ago
-												[twitter] add 'reply_to' metadata to replies

											
										
										
											4 years ago
+								        if "in_reply_to_screen_name" in tweet:
 								            tdata["reply_to"] = tweet["in_reply_to_screen_name"]
-												[twitter] update 'quote_id' and 'quote_by'

- 'quote_id' is now non-null for quoted Tweets and has the ID of the
  quoting Tweet, instead the other way round like before
- 'quote_by' is now the 'screen_name' of the quoting user
  (was the same the new 'quote_id' is now)

											
										
										
											2 years ago
+								        if "quoted_by" in tweet:
 								            tdata["quote_by"] = tweet["quoted_by"]
-												[twitter] add 'reply_to' metadata to replies

											
										
										
											4 years ago
-												[twitter] metadata cleanup #2

- remove useless clutter by creating new tweet-data dicts instead of
  reusing the original Tweet objects
- rename fields to how they were named before
  ('id_str' -> 'tweet_id', etc.)
- only include 'author' if it would differ from 'user'
- restore 'archive_fmt'

											
										
										
											4 years ago
+								        return tdata
 								    def _transform_user(self, user):
-												[twitter] add 'syndication' option (#2354)

to fetch age-restricted content using Twitter's  syndication API

											
										
										
											3 years ago
+								        uid = user.get("rest_id") or user["id_str"]
-												[twitter] slightly improve '_transform_user()'

											
										
										
											3 years ago
+								        try:
-												[twitter] add 'syndication' option (#2354)

to fetch age-restricted content using Twitter's  syndication API

											
										
										
											3 years ago
+								            return self._user_cache[uid]
-												[twitter] slightly improve '_transform_user()'

											
										
										
											3 years ago
+								        except KeyError:
 								            pass
-												[twitter] metadata cleanup #2

- remove useless clutter by creating new tweet-data dicts instead of
  reusing the original Tweet objects
- rename fields to how they were named before
  ('id_str' -> 'tweet_id', etc.)
- only include 'author' if it would differ from 'user'
- restore 'archive_fmt'

											
										
										
											4 years ago
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								        if "legacy" in user:
 								            user = user["legacy"]
-												[twitter] implement 'syndication=extended'

to be able to fetch extended user metadata

											
										
										
											2 years ago
+								        elif "statuses_count" not in user and self.syndication == "extended":
-												[twitter] apply suggestions from code review

Co-authored-by: Mike Fährmann <mike_faehrmann@web.de>

											
										
										
											2 years ago
+								            # try to fetch extended user data
-												[twitter] implement 'syndication=extended'

to be able to fetch extended user metadata

											
										
										
											2 years ago
+								            user = self.api.user_by_screen_name(user["screen_name"])["legacy"]
-												[twitter] add 'syndication' option (#2354)

to fetch age-restricted content using Twitter's  syndication API

											
										
										
											3 years ago
 								        uget = user.get
-												[twitter] warn about 'withheld' Tweets and users (#3864)

											
										
										
											1 year ago
+								        if uget("withheld_scope"):
 								            self.log.warning("'%s'", uget("description"))
-												[twitter] add 'url' to user objects (#1532, #1787)

											
										
										
											3 years ago
-												[twitter] warn about 'withheld' Tweets and users (#3864)

											
										
										
											1 year ago
+								        entities = user["entities"]
-												[twitter] slightly improve '_transform_user()'

											
										
										
											3 years ago
+								        self._user_cache[uid] = udata = {
 								            "id"              : text.parse_int(uid),
 								            "name"            : user["screen_name"],
 								            "nick"            : user["name"],
-												[twitter] add 'syndication' option (#2354)

to fetch age-restricted content using Twitter's  syndication API

											
										
										
											3 years ago
+								            "location"        : uget("location"),
-												[twitter] slightly improve '_transform_user()'

											
										
										
											3 years ago
+								            "date"            : text.parse_datetime(
-												[twitter] add 'syndication' option (#2354)

to fetch age-restricted content using Twitter's  syndication API

											
										
										
											3 years ago
+								                uget("created_at"), "%a %b %d %H:%M:%S %z %Y"),
 								            "verified"        : uget("verified", False),
 								            "profile_banner"  : uget("profile_banner_url", ""),
 								            "profile_image"   : uget(
-												[twitter] slightly improve '_transform_user()'

											
										
										
											3 years ago
+								                "profile_image_url_https", "").replace("_normal.", "."),
-												[twitter] add 'syndication' option (#2354)

to fetch age-restricted content using Twitter's  syndication API

											
										
										
											3 years ago
+								            "favourites_count": uget("favourites_count"),
 								            "followers_count" : uget("followers_count"),
 								            "friends_count"   : uget("friends_count"),
 								            "listed_count"    : uget("listed_count"),
 								            "media_count"     : uget("media_count"),
 								            "statuses_count"  : uget("statuses_count"),
-												[twitter] slightly improve '_transform_user()'

											
										
										
											3 years ago
+								        }
-												[twitter] add 'url' to user objects (#1532, #1787)

											
										
										
											3 years ago
-												[twitter] expand t.co links in user descriptions (#1532, #1787)

											
										
										
											3 years ago
+								        descr = user["description"]
 								        urls = entities["description"].get("urls")
 								        if urls:
 								            for url in urls:
 								                descr = descr.replace(url["url"], url["expanded_url"])
 								        udata["description"] = descr
-												[twitter] add 'url' to user objects (#1532, #1787)

											
										
										
											3 years ago
+								        if "url" in entities:
-												[twitter] fix 'url' extraction for users without 'expanded_url'

(#1532, #1787)

											
										
										
											3 years ago
+								            url = entities["url"]["urls"][0]
 								            udata["url"] = url.get("expanded_url") or url.get("url")
-												[twitter] add 'url' to user objects (#1532, #1787)

											
										
										
											3 years ago
-												[twitter] slightly improve '_transform_user()'

											
										
										
											3 years ago
+								        return udata
-												[twitter] metadata cleanup #2

- remove useless clutter by creating new tweet-data dicts instead of
  reusing the original Tweet objects
- rename fields to how they were named before
  ('id_str' -> 'tweet_id', etc.)
- only include 'author' if it would differ from 'user'
- restore 'archive_fmt'

											
										
										
											4 years ago
-												[twitter] simplify 'user' assignment

											
										
										
											2 years ago
+								    def _assign_user(self, user):
-												[twitter] improve 480bc34e

only check for double user assignment where necessary

											
										
										
											1 year ago
+								        self._user_obj = user
 								        self._user = self._transform_user(user)
-												[twitter] simplify 'user' assignment

											
										
										
											2 years ago
-												[twitter] implement 'users' option (#1337)

											
										
										
											4 years ago
+								    def _users_result(self, users):
-												[twitter] allow specifying a custom format for user results

(#1337)

											
										
										
											4 years ago
+								        userfmt = self.config("users")
 								        if not userfmt or userfmt == "timeline":
 								            cls = TwitterTimelineExtractor
 								            fmt = (self.root + "/i/user/{rest_id}").format_map
 								        elif userfmt == "media":
-												[twitter] implement 'users' option (#1337)

											
										
										
											4 years ago
+								            cls = TwitterMediaExtractor
-												[twitter] allow specifying a custom format for user results

(#1337)

											
										
										
											4 years ago
+								            fmt = (self.root + "/id:{rest_id}/media").format_map
-												[twitter] improve results for regular user URLs

- continuation of 3346f58a
- use media timeline results (or tweet timeline if retweets are enabled)
  plus search results starting from the last tweet id of the first
  timeline, similar to how Twitter Media Downloader operates
- the old behavior can be forced by appending '/tweets' to a user URL,
  like with '/media' (https://twitter.com/USER/tweets)
  although there should be no need to ever do that

											
										
										
											2 years ago
+								        elif userfmt == "tweets":
 								            cls = TwitterTweetsExtractor
 								            fmt = (self.root + "/id:{rest_id}/tweets").format_map
-												[twitter] implement 'users' option (#1337)

											
										
										
											4 years ago
+								        else:
-												[twitter] allow specifying a custom format for user results

(#1337)

											
										
										
											4 years ago
+								            cls = None
 								            fmt = userfmt.format_map
-												[twitter] implement 'users' option (#1337)

											
										
										
											4 years ago
 								        for user in users:
 								            user["_extractor"] = cls
-												[twitter] allow specifying a custom format for user results

(#1337)

											
										
										
											4 years ago
+								            yield Message.Queue, fmt(user), user
-												[twitter] implement 'users' option (#1337)

											
										
										
											4 years ago
-												[twitter] implement 'expand' option (#2665)

											
										
										
											2 years ago
+								    def _expand_tweets(self, tweets):
 								        seen = set()
 								        for tweet in tweets:
-												[twitter] fix crash when using 'expand' and 'syndication'

caused by KeyError: 'conversation_id_str'

											
										
										
											2 years ago
+								            obj = tweet["legacy"] if "legacy" in tweet else tweet
 								            cid = obj.get("conversation_id_str")
 								            if not cid:
 								                tid = obj["id_str"]
 								                self.log.warning(
 								                    "Unable to expand %s (no 'conversation_id')", tid)
 								                continue
 								            if cid in seen:
 								                self.log.debug(
 								                    "Skipping expansion of %s (previously seen)", cid)
 								                continue
 								            seen.add(cid)
 								            try:
 								                yield from self.api.tweet_detail(cid)
 								            except Exception:
 								                yield tweet
-												[twitter] implement 'expand' option (#2665)

											
										
										
											2 years ago
-												[twitter] calculate 'date' from Tweet IDs

20 times faster than parsing 'created_at'

											
										
										
											1 year ago
+								    def _make_tweet(self, user, url, id_str):
-												[twitter] add 'avatar' and 'background' extractors (#349, #3023)

											
										
										
											2 years ago
+								        return {
 								            "id_str": id_str,
 								            "lang": None,
 								            "user": user,
 								            "entities": {},
 								            "extended_entities": {
 								                "media": [
 								                    {
 								                        "original_info": {},
 								                        "media_url": url,
 								                    },
 								                ],
 								            },
 								        }
-												[twitter] add support for user-timelines (closes #96)

also adds a 'retweets' option to filter retweeted content

											
										
										
											6 years ago
+								    def metadata(self):
 								        """Return general metadata"""
-												[twitter] improve

- update metadata structure
  - combine all user… entries into their own dict
  - let 'user' always specify the Timeline owner
  - add 'author' entry that specifies the original Tweet author
- create directories per post (closes #491)
- fix username issues with /i/web/ URLs

											
										
										
											5 years ago
+								        return {}
-												[twitter] add support for user-timelines (closes #96)

also adds a 'retweets' option to filter retweeted content

											
										
										
											6 years ago
 								    def tweets(self):
-												[twitter] rewrite; use new interface (#740, #806)

Everything except logging in with username & password and TwitPic
embeds should be working again.

Metadata per Tweet is massively different than before (mostly raw API
responses - might need some cleaning up) and the default 'archive_fmt'
changed.

											
										
										
											4 years ago
+								        """Yield all relevant tweet objects"""
-												[twitter] add support for user-timelines (closes #96)

also adds a 'retweets' option to filter retweeted content

											
										
										
											6 years ago
-												[twitter] add login support (#214)

											
										
										
											6 years ago
+								    def login(self):
-												[twitter] skip login if 'auth_token' cookie is present

											
										
										
											4 years ago
+								        if not self._check_cookies(self.cookienames):
 								            username, password = self._get_auth_info()
 								            if username:
-												[twitter] fix login (#3220)

Using an email as 'username' seems to no longer be possible,
as Twitter will always additionally ask for username or phone number
when providing an email address as 'username'.

											
										
										
											2 years ago
+								                self._update_cookies(_login_impl(self, username, password))
-												[twitter] add support for user-timelines (closes #96)

also adds a 'retweets' option to filter retweeted content

											
										
										
											6 years ago
-												[twitter] add extractor for media-tweet timelines (#96)

For example "https://twitter.com/PicturesEarth/media".
They are different from normal timelines in that they do not contain
any (re)tweets from other users and feature all media the user ever
posted, including responses to other tweets.

											
										
										
											6 years ago
+								class TwitterTimelineExtractor(TwitterExtractor):
-												[twitter] improve results for regular user URLs

- continuation of 3346f58a
- use media timeline results (or tweet timeline if retweets are enabled)
  plus search results starting from the last tweet id of the first
  timeline, similar to how Twitter Media Downloader operates
- the old behavior can be forced by appending '/tweets' to a user URL,
  like with '/media' (https://twitter.com/USER/tweets)
  although there should be no need to ever do that

											
										
										
											2 years ago
+								    """Extractor for a Twitter user timeline"""
-												[twitter] add extractor for media-tweet timelines (#96)

For example "https://twitter.com/PicturesEarth/media".
They are different from normal timelines in that they do not contain
any (re)tweets from other users and feature all media the user ever
posted, including responses to other tweets.

											
										
										
											6 years ago
+								    subcategory = "timeline"
-												[twitter] match '/i/user/ID' URLs

											
										
										
											4 years ago
+								    pattern = (BASE_PATTERN + r"/(?!search)(?:([^/?#]+)/?(?:$|[?#])"
 								               r"|i(?:/user/|ntent/user\?user_id=)(\d+))")
-												[twitter] small improvements

- handle reply tweets (#403)
- unset cookies in Tweet extractor to "force" the legacy interface

											
										
										
											5 years ago
+								    test = (
 								        ("https://twitter.com/supernaturepics", {
 								            "range": "1-40",
-												[twitter] update image URL format (#1145)

use
'/<name>?format=<fmt>&name=<size>'
instead of the potentially deprecated
'/<name>.<fmt>:<size>'

but keep all of them as fallback URLs

											
										
										
											4 years ago
+								            "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
-												[twitter] small improvements

- handle reply tweets (#403)
- unset cookies in Tweet extractor to "force" the legacy interface

											
										
										
											5 years ago
+								        }),
-												[twitter] fix several errors (#2212, #2216, #2225)

- fix Tweets with deleted quotes
- fix suspended Tweets without 'legacy' entry
- fix unified_cards without 'type'

											
										
										
											3 years ago
+								        # suspended account (#2216)
-												[twitter] fix using user IDs for suspended accounts

											
										
										
											2 years ago
+								        ("https://twitter.com/OptionalTypo", {
 								            "exception": exception.NotFoundError,
 								        }),
 								        # suspended account user ID
 								        ("https://twitter.com/id:772949683521978368", {
-												[twitter] fix several errors (#2212, #2216, #2225)

- fix Tweets with deleted quotes
- fix suspended Tweets without 'legacy' entry
- fix unified_cards without 'type'

											
										
										
											3 years ago
+								            "exception": exception.NotFoundError,
 								        }),
-												[twitter] small improvements

- handle reply tweets (#403)
- unset cookies in Tweet extractor to "force" the legacy interface

											
										
										
											5 years ago
+								        ("https://mobile.twitter.com/supernaturepics?p=i"),
-												[twitter] support specifying users by ID (#980)

by using 'id:…' as their screen name, i.e.
https://www.twitter.com/id:2976459548/media
instead of
https://twitter.com/supernaturepics/media

The user ID can, for example, be obtained from the output of
$ gallery-dl -j --range 1 https://twitter.com/<screen-name>

											
										
										
											4 years ago
+								        ("https://www.twitter.com/id:2976459548"),
-												[twitter] match '/i/user/ID' URLs

											
										
										
											4 years ago
+								        ("https://twitter.com/i/user/2976459548"),
-												[twitter] support '/intent/user?user_id=…' URLs (#980)

											
										
										
											4 years ago
+								        ("https://twitter.com/intent/user?user_id=2976459548"),
-												recognize vxtwitter URLs (#2621)


											
										
										
											2 years ago
+								        ("https://fxtwitter.com/supernaturepics"),
 								        ("https://vxtwitter.com/supernaturepics"),
-												[twitter] small improvements

- handle reply tweets (#403)
- unset cookies in Tweet extractor to "force" the legacy interface

											
										
										
											5 years ago
+								    )
-												[twitter] add extractor for media-tweet timelines (#96)

For example "https://twitter.com/PicturesEarth/media".
They are different from normal timelines in that they do not contain
any (re)tweets from other users and feature all media the user ever
posted, including responses to other tweets.

											
										
										
											6 years ago
-												[twitter] support '/intent/user?user_id=…' URLs (#980)

											
										
										
											4 years ago
+								    def __init__(self, match):
 								        TwitterExtractor.__init__(self, match)
-												[twitter] match '/i/user/ID' URLs

											
										
										
											4 years ago
+								        user_id = match.group(2)
 								        if user_id:
 								            self.user = "id:" + user_id
-												[twitter] support '/intent/user?user_id=…' URLs (#980)

											
										
										
											4 years ago
-												[twitter] add extractor for media-tweet timelines (#96)

For example "https://twitter.com/PicturesEarth/media".
They are different from normal timelines in that they do not contain
any (re)tweets from other users and feature all media the user ever
posted, including responses to other tweets.

											
										
										
											6 years ago
+								    def tweets(self):
-												[twitter] improve results for regular user URLs

- continuation of 3346f58a
- use media timeline results (or tweet timeline if retweets are enabled)
  plus search results starting from the last tweet id of the first
  timeline, similar to how Twitter Media Downloader operates
- the old behavior can be forced by appending '/tweets' to a user URL,
  like with '/media' (https://twitter.com/USER/tweets)
  although there should be no need to ever do that

											
										
										
											2 years ago
+								        # yield initial batch of (media) tweets
-												[twitter] use twMediaDownloader strategy for user URLs

- use media timeline + search for default user URLs like
  https://twitter.com/SCREEN_NAME
- fetches all/most media for the type of twitter URL that most users
  use with gallery-dl
- can be disabled by setting 'strategy' to any truthy value,
  like "timeline"

											
										
										
											2 years ago
+								        tweet = None
-												[twitter] implement 'strategy' option (#2712)

to be able to better control what Tweets get used an returned
for twitter.com/USER URLs.

											
										
										
											2 years ago
+								        for tweet in self._select_tweet_source()(self.user):
-												[twitter] use twMediaDownloader strategy for user URLs

- use media timeline + search for default user URLs like
  https://twitter.com/SCREEN_NAME
- fetches all/most media for the type of twitter URL that most users
  use with gallery-dl
- can be disabled by setting 'strategy' to any truthy value,
  like "timeline"

											
										
										
											2 years ago
+								            yield tweet
 								        if tweet is None:
 								            return
-												[twitter] improve strategy for user URLs (#2665)

- use '/with_replies' when appropriate
- consider 'text-tweets'
- build search query as necessary

											
										
										
											2 years ago
+								        # build search query
-												[twitter] update 'user' and 'author' fields

- 'author' is always the user who authored a tweet
- 'user' is always the user specified in the input URL
  or equal to 'author' when the former is not given

											
										
										
											2 years ago
+								        query = "from:{} max_id:{}".format(
 								            self._user["name"], tweet["rest_id"])
-												[twitter] improve strategy for user URLs (#2665)

- use '/with_replies' when appropriate
- consider 'text-tweets'
- build search query as necessary

											
										
										
											2 years ago
+								        if self.retweets:
 								            query += " include:retweets include:nativeretweets"
-												[twitter] fall back to unfiltered search (#2766)

											
										
										
											2 years ago
-												[twitter] improve strategy for user URLs (#2665)

- use '/with_replies' when appropriate
- consider 'text-tweets'
- build search query as necessary

											
										
										
											2 years ago
+								        if not self.textonly:
-												[twitter] fall back to unfiltered search (#2766)

											
										
										
											2 years ago
+								            # try to search for media-only tweets
 								            tweet = None
-												[twitter] use filter:links (#2766)

											
										
										
											2 years ago
+								            for tweet in self.api.search_adaptive(query + " filter:links"):
-												[twitter] fall back to unfiltered search (#2766)

											
										
										
											2 years ago
+								                yield tweet
 								            if tweet is not None:
 								                return
-												[twitter] improve strategy for user URLs (#2665)

- use '/with_replies' when appropriate
- consider 'text-tweets'
- build search query as necessary

											
										
										
											2 years ago
-												[twitter] fall back to unfiltered search (#2766)

											
										
										
											2 years ago
+								        # yield unfiltered search results
-												[twitter] improve strategy for user URLs (#2665)

- use '/with_replies' when appropriate
- consider 'text-tweets'
- build search query as necessary

											
										
										
											2 years ago
+								        yield from self.api.search_adaptive(query)
-												[twitter] use twMediaDownloader strategy for user URLs

- use media timeline + search for default user URLs like
  https://twitter.com/SCREEN_NAME
- fetches all/most media for the type of twitter URL that most users
  use with gallery-dl
- can be disabled by setting 'strategy' to any truthy value,
  like "timeline"

											
										
										
											2 years ago
-												[twitter] implement 'strategy' option (#2712)

to be able to better control what Tweets get used an returned
for twitter.com/USER URLs.

											
										
										
											2 years ago
+								    def _select_tweet_source(self):
 								        strategy = self.config("strategy")
 								        if strategy is None or strategy == "auto":
 								            if self.retweets or self.textonly:
 								                return self.api.user_tweets
 								            else:
 								                return self.api.user_media
 								        if strategy == "tweets":
 								            return self.api.user_tweets
 								        if strategy == "with_replies":
 								            return self.api.user_tweets_and_replies
 								        return self.api.user_media
-												[twitter] add extractor for media-tweet timelines (#96)

For example "https://twitter.com/PicturesEarth/media".
They are different from normal timelines in that they do not contain
any (re)tweets from other users and feature all media the user ever
posted, including responses to other tweets.

											
										
										
											6 years ago
-												[twitter] improve results for regular user URLs

- continuation of 3346f58a
- use media timeline results (or tweet timeline if retweets are enabled)
  plus search results starting from the last tweet id of the first
  timeline, similar to how Twitter Media Downloader operates
- the old behavior can be forced by appending '/tweets' to a user URL,
  like with '/media' (https://twitter.com/USER/tweets)
  although there should be no need to ever do that

											
										
										
											2 years ago
+								class TwitterTweetsExtractor(TwitterExtractor):
 								    """Extractor for Tweets from a user's Tweets timeline"""
 								    subcategory = "tweets"
 								    pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/tweets(?!\w)"
 								    test = (
 								        ("https://twitter.com/supernaturepics/tweets", {
 								            "range": "1-40",
 								            "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
 								        }),
 								        ("https://mobile.twitter.com/supernaturepics/tweets#t"),
 								        ("https://www.twitter.com/id:2976459548/tweets"),
 								    )
 								    def tweets(self):
 								        return self.api.user_tweets(self.user)
-												[twitter] support '/with_replies' URLs (closes #1833)

											
										
										
											3 years ago
+								class TwitterRepliesExtractor(TwitterExtractor):
 								    """Extractor for Tweets from a user's timeline including replies"""
 								    subcategory = "replies"
 								    pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/with_replies(?!\w)"
 								    test = (
 								        ("https://twitter.com/supernaturepics/with_replies", {
 								            "range": "1-40",
 								            "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
 								        }),
 								        ("https://mobile.twitter.com/supernaturepics/with_replies#t"),
 								        ("https://www.twitter.com/id:2976459548/with_replies"),
 								    )
 								    def tweets(self):
-												[twitter] simplify

- use dict with common GraphQL variables
- reduce 'variables' size with custom JSON encoder instance
- centralise TwitterAPI() creation

											
										
										
											3 years ago
+								        return self.api.user_tweets_and_replies(self.user)
-												[twitter] support '/with_replies' URLs (closes #1833)

											
										
										
											3 years ago
-												[twitter] add extractor for media-tweet timelines (#96)

For example "https://twitter.com/PicturesEarth/media".
They are different from normal timelines in that they do not contain
any (re)tweets from other users and feature all media the user ever
posted, including responses to other tweets.

											
										
										
											6 years ago
+								class TwitterMediaExtractor(TwitterExtractor):
-												[twitter] support '/with_replies' URLs (closes #1833)

											
										
										
											3 years ago
+								    """Extractor for Tweets from a user's Media timeline"""
-												[twitter] add extractor for media-tweet timelines (#96)

For example "https://twitter.com/PicturesEarth/media".
They are different from normal timelines in that they do not contain
any (re)tweets from other users and feature all media the user ever
posted, including responses to other tweets.

											
										
										
											6 years ago
+								    subcategory = "media"
-												remove '&' from URL patterns

'/?&#' -> '/?#' and '?&#' -> '?#'

According to https://www.ietf.org/rfc/rfc3986.txt, URLs are
"organized hierarchically" by using "the slash ("/"), question
mark ("?"), and number sign ("#") characters to delimit components"

											
										
										
											4 years ago
+								    pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/media(?!\w)"
-												[twitter] small improvements

- handle reply tweets (#403)
- unset cookies in Tweet extractor to "force" the legacy interface

											
										
										
											5 years ago
+								    test = (
 								        ("https://twitter.com/supernaturepics/media", {
 								            "range": "1-40",
-												[twitter] update image URL format (#1145)

use
'/<name>?format=<fmt>&name=<size>'
instead of the potentially deprecated
'/<name>.<fmt>:<size>'

but keep all of them as fallback URLs

											
										
										
											4 years ago
+								            "url": "c570ac1aae38ed1463be726cc46f31cac3d82a40",
-												[twitter] small improvements

- handle reply tweets (#403)
- unset cookies in Tweet extractor to "force" the legacy interface

											
										
										
											5 years ago
+								        }),
 								        ("https://mobile.twitter.com/supernaturepics/media#t"),
-												[twitter] support specifying users by ID (#980)

by using 'id:…' as their screen name, i.e.
https://www.twitter.com/id:2976459548/media
instead of
https://twitter.com/supernaturepics/media

The user ID can, for example, be obtained from the output of
$ gallery-dl -j --range 1 https://twitter.com/<screen-name>

											
										
										
											4 years ago
+								        ("https://www.twitter.com/id:2976459548/media"),
-												[twitter] small improvements

- handle reply tweets (#403)
- unset cookies in Tweet extractor to "force" the legacy interface

											
										
										
											5 years ago
+								    )
-												[twitter] add extractor for media-tweet timelines (#96)

For example "https://twitter.com/PicturesEarth/media".
They are different from normal timelines in that they do not contain
any (re)tweets from other users and feature all media the user ever
posted, including responses to other tweets.

											
										
										
											6 years ago
 								    def tweets(self):
-												[twitter] simplify

- use dict with common GraphQL variables
- reduce 'variables' size with custom JSON encoder instance
- centralise TwitterAPI() creation

											
										
										
											3 years ago
+								        return self.api.user_media(self.user)
-												[twitter] add extractor for media-tweet timelines (#96)

For example "https://twitter.com/PicturesEarth/media".
They are different from normal timelines in that they do not contain
any (re)tweets from other users and feature all media the user ever
posted, including responses to other tweets.

											
										
										
											6 years ago
-												[twitter] small improvements to search extractor

- put search results in separate directories
- set 'max_position' to '-1' for first request
  -> prevent duplicate results
- add a test
- flake8

											
										
										
											5 years ago
-												[twitter] add extractor for liked tweets (closes #837)

You need to be logged in to get access to anyone's liked tweets,
it seems.

											
										
										
											4 years ago
+								class TwitterLikesExtractor(TwitterExtractor):
 								    """Extractor for liked tweets"""
 								    subcategory = "likes"
-												remove '&' from URL patterns

'/?&#' -> '/?#' and '?&#' -> '?#'

According to https://www.ietf.org/rfc/rfc3986.txt, URLs are
"organized hierarchically" by using "the slash ("/"), question
mark ("?"), and number sign ("#") characters to delimit components"

											
										
										
											4 years ago
+								    pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/likes(?!\w)"
-												[twitter] add extractor for liked tweets (closes #837)

You need to be logged in to get access to anyone's liked tweets,
it seems.

											
										
										
											4 years ago
+								    test = ("https://twitter.com/supernaturepics/likes",)
-												[twitter] add 'user_likes' metadata field for liked tweets

i.e. the 'screen_name' of the user whose liked tweets get extracted.

Ideally this would replace 'user' or at least be in the same format,
but that would break backwards compatibility or be impossible/too
complicated thanks to API result differences.

(#1421)

											
										
										
											4 years ago
+								    def metadata(self):
 								        return {"user_likes": self.user}
-												[twitter] add extractor for liked tweets (closes #837)

You need to be logged in to get access to anyone's liked tweets,
it seems.

											
										
										
											4 years ago
+								    def tweets(self):
-												[twitter] simplify

- use dict with common GraphQL variables
- reduce 'variables' size with custom JSON encoder instance
- centralise TwitterAPI() creation

											
										
										
											3 years ago
+								        return self.api.user_likes(self.user)
-												[twitter] add extractor for liked tweets (closes #837)

You need to be logged in to get access to anyone's liked tweets,
it seems.

											
										
										
											4 years ago
-												[twitter] add 'date_liked' metadata for liked Tweets (#3816)

											
										
										
											1 year ago
+								    def _transform_tweet(self, tweet):
 								        tdata = TwitterExtractor._transform_tweet(self, tweet)
 								        tdata["date_liked"] = text.parse_timestamp(
 								            (int(tweet["sortIndex"]) >> 20) // 1000)
 								        return tdata
-												[twitter] add extractor for liked tweets (closes #837)

You need to be logged in to get access to anyone's liked tweets,
it seems.

											
										
										
											4 years ago
 								class TwitterBookmarkExtractor(TwitterExtractor):
 								    """Extractor for bookmarked tweets"""
 								    subcategory = "bookmark"
-												[twitter] add support for nitter.net URLs in pattern (#890)

Please note that URLs are only "translated", all requests are still
done always via the Twitter API.
											
										
										
											4 years ago
+								    pattern = BASE_PATTERN + r"/i/bookmarks()"
-												[twitter] add extractor for liked tweets (closes #837)

You need to be logged in to get access to anyone's liked tweets,
it seems.

											
										
										
											4 years ago
+								    test = ("https://twitter.com/i/bookmarks",)
 								    def tweets(self):
-												[twitter] simplify

- use dict with common GraphQL variables
- reduce 'variables' size with custom JSON encoder instance
- centralise TwitterAPI() creation

											
										
										
											3 years ago
+								        return self.api.user_bookmarks()
-												[twitter] add extractor for liked tweets (closes #837)

You need to be logged in to get access to anyone's liked tweets,
it seems.

											
										
										
											4 years ago
-												[twitter] add 'date_bookmarked' metadata (#3816)

											
										
										
											1 year ago
+								    def _transform_tweet(self, tweet):
 								        tdata = TwitterExtractor._transform_tweet(self, tweet)
 								        tdata["date_bookmarked"] = text.parse_timestamp(
 								            (int(tweet["sortIndex"]) >> 20) // 1000)
 								        return tdata
-												[twitter] add extractor for liked tweets (closes #837)

You need to be logged in to get access to anyone's liked tweets,
it seems.

											
										
										
											4 years ago
-												[twitter] add 'list' extractor (#1096)

											
										
										
											4 years ago
+								class TwitterListExtractor(TwitterExtractor):
 								    """Extractor for Twitter lists"""
 								    subcategory = "list"
-												[twitter] add 'list-members' extractor (closes #1096)

											
										
										
											4 years ago
+								    pattern = BASE_PATTERN + r"/i/lists/(\d+)/?$"
-												[twitter] add 'list' extractor (#1096)

											
										
										
											4 years ago
+								    test = ("https://twitter.com/i/lists/784214683683127296", {
 								        "range": "1-40",
 								        "count": 40,
 								        "archive": False,
 								    })
 								    def tweets(self):
-												[twitter] simplify

- use dict with common GraphQL variables
- reduce 'variables' size with custom JSON encoder instance
- centralise TwitterAPI() creation

											
										
										
											3 years ago
+								        return self.api.list_latest_tweets_timeline(self.user)
-												[twitter] add 'list' extractor (#1096)

											
										
										
											4 years ago
-												[twitter] add 'list-members' extractor (closes #1096)

											
										
										
											4 years ago
+								class TwitterListMembersExtractor(TwitterExtractor):
 								    """Extractor for members of a Twitter list"""
 								    subcategory = "list-members"
 								    pattern = BASE_PATTERN + r"/i/lists/(\d+)/members"
-												[twitter] update query hashes

											
										
										
											1 year ago
+								    test = ("https://twitter.com/i/lists/784214683683127296/members", {
 								        "pattern": TwitterTimelineExtractor.pattern,
 								        "range": "1-40",
 								        "count": 40,
 								    })
-												[twitter] add 'list-members' extractor (closes #1096)

											
										
										
											4 years ago
 								    def items(self):
 								        self.login()
-												[twitter] implement 'users' option (#1337)

											
										
										
											4 years ago
+								        return self._users_result(TwitterAPI(self).list_members(self.user))
-												[twitter] add 'list-members' extractor (closes #1096)

											
										
										
											4 years ago
-												[twitter] add extractor for followed users (#1337)

https://twitter.com/USER/following or
https://twitter.com/id:USERID/following

											
										
										
											4 years ago
+								class TwitterFollowingExtractor(TwitterExtractor):
 								    """Extractor for followed users"""
 								    subcategory = "following"
 								    pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/following(?!\w)"
 								    test = (
 								        ("https://twitter.com/supernaturepics/following"),
 								        ("https://www.twitter.com/id:2976459548/following"),
 								    )
 								    def items(self):
 								        self.login()
-												[twitter] implement 'users' option (#1337)

											
										
										
											4 years ago
+								        return self._users_result(TwitterAPI(self).user_following(self.user))
-												[twitter] add extractor for followed users (#1337)

https://twitter.com/USER/following or
https://twitter.com/id:USERID/following

											
										
										
											4 years ago
-												Add search downloading to twitter.py (#448)

Adds the functionality to download search results on twitter.com/search. Since twitter only allows downloading of up to 3,200 of a users most recent tweets, you will be unable to download old images from users with a lot of tweets. To bypass this, you can use the twitter search to get the tweets from the sections in time you were stopped at. An example search would be "from:user since:2015-01-01 until:2016-01-01 filter:images". The URL you would use will look something like this https://twitter.com/search?f=tweets&q=from%3Asupernaturepics%20since%3A2015-01-01%20until%3A2016-01-01%20filter%3Aimages&src=typd&lang=en

The _tweets_from_api function had to be changed because it would not get the next page of results using the last "data-tweet-id". It would return the same JSON but with a "min_position" string added. Using this string for the "max_position" param from the second page onwards correctly returned the next pages. This change does not interfere with how the other extractors work as far as I know. The 2 regex patterns in the extractors had to be changed to not match the search URL.
											
										
										
											5 years ago
+								class TwitterSearchExtractor(TwitterExtractor):
-												[twitter] add 'event' extractor (closes #2109)

											
										
										
											3 years ago
+								    """Extractor for Twitter search results"""
-												Add search downloading to twitter.py (#448)

Adds the functionality to download search results on twitter.com/search. Since twitter only allows downloading of up to 3,200 of a users most recent tweets, you will be unable to download old images from users with a lot of tweets. To bypass this, you can use the twitter search to get the tweets from the sections in time you were stopped at. An example search would be "from:user since:2015-01-01 until:2016-01-01 filter:images". The URL you would use will look something like this https://twitter.com/search?f=tweets&q=from%3Asupernaturepics%20since%3A2015-01-01%20until%3A2016-01-01%20filter%3Aimages&src=typd&lang=en

The _tweets_from_api function had to be changed because it would not get the next page of results using the last "data-tweet-id". It would return the same JSON but with a "min_position" string added. Using this string for the "max_position" param from the second page onwards correctly returned the next pages. This change does not interfere with how the other extractors work as far as I know. The 2 regex patterns in the extractors had to be changed to not match the search URL.
											
										
										
											5 years ago
+								    subcategory = "search"
-												[twitter] add support for nitter.net URLs in pattern (#890)

Please note that URLs are only "translated", all requests are still
done always via the Twitter API.
											
										
										
											4 years ago
+								    pattern = BASE_PATTERN + r"/search/?\?(?:[^&#]+&)*q=([^&#]+)"
-												[twitter] small improvements to search extractor

- put search results in separate directories
- set 'max_position' to '-1' for first request
  -> prevent duplicate results
- add a test
- flake8

											
										
										
											5 years ago
+								    test = ("https://twitter.com/search?q=nature", {
 								        "range": "1-40",
 								        "count": 40,
-												update extractor test results

											
										
										
											4 years ago
+								        "archive": False,
-												[twitter] small improvements to search extractor

- put search results in separate directories
- set 'max_position' to '-1' for first request
  -> prevent duplicate results
- add a test
- flake8

											
										
										
											5 years ago
+								    })
 								    def metadata(self):
-												[twitter] improve pagination

											
										
										
											4 years ago
+								        return {"search": text.unquote(self.user)}
-												[twitter] small improvements to search extractor

- put search results in separate directories
- set 'max_position' to '-1' for first request
  -> prevent duplicate results
- add a test
- flake8

											
										
										
											5 years ago
-												Add search downloading to twitter.py (#448)

Adds the functionality to download search results on twitter.com/search. Since twitter only allows downloading of up to 3,200 of a users most recent tweets, you will be unable to download old images from users with a lot of tweets. To bypass this, you can use the twitter search to get the tweets from the sections in time you were stopped at. An example search would be "from:user since:2015-01-01 until:2016-01-01 filter:images". The URL you would use will look something like this https://twitter.com/search?f=tweets&q=from%3Asupernaturepics%20since%3A2015-01-01%20until%3A2016-01-01%20filter%3Aimages&src=typd&lang=en

The _tweets_from_api function had to be changed because it would not get the next page of results using the last "data-tweet-id". It would return the same JSON but with a "min_position" string added. Using this string for the "max_position" param from the second page onwards correctly returned the next pages. This change does not interfere with how the other extractors work as far as I know. The 2 regex patterns in the extractors had to be changed to not match the search URL.
											
										
										
											5 years ago
+								    def tweets(self):
-												[twitter] unescape '+' in search queries (#2226)

... and do not raise exception if searched user does not exist

											
										
										
											2 years ago
+								        query = text.unquote(self.user.replace("+", " "))
-												[twitter] implement constant 'user' for 'from:…' searches

											
										
										
											2 years ago
 								        user = None
 								        for item in query.split():
 								            item = item.strip("()")
 								            if item.startswith("from:"):
 								                if user:
 								                    user = None
 								                    break
 								                else:
 								                    user = item[5:]
 								        if user is not None:
 								            try:
-												[twitter] simplify 'user' assignment

											
										
										
											2 years ago
+								                self._assign_user(self.api.user_by_screen_name(user))
-												[twitter] implement constant 'user' for 'from:…' searches

											
										
										
											2 years ago
+								            except KeyError:
-												[twitter] unescape '+' in search queries (#2226)

... and do not raise exception if searched user does not exist

											
										
										
											2 years ago
+								                pass
-												[twitter] implement constant 'user' for 'from:…' searches

											
										
										
											2 years ago
 								        return self.api.search_adaptive(query)
-												[twitter] add 'event' extractor (closes #2109)

											
										
										
											3 years ago
-												[twitter] add 'hashtag' extractor (#3783)

											
										
										
											2 years ago
+								class TwitterHashtagExtractor(TwitterExtractor):
 								    """Extractor for Twitter hashtags"""
 								    subcategory = "hashtag"
 								    pattern = BASE_PATTERN + r"/hashtag/([^/?#]+)"
 								    test = ("https://twitter.com/hashtag/nature", {
 								        "pattern": TwitterSearchExtractor.pattern,
 								        "url": "3571c3a53b7647ea35517041fdc17f77ec5b2cb9",
 								    })
 								    def items(self):
 								        url = "{}/search?q=%23{}".format(self.root, self.user)
 								        data = {"_extractor": TwitterSearchExtractor}
 								        yield Message.Queue, url, data
-												[twitter] add 'event' extractor (closes #2109)

											
										
										
											3 years ago
+								class TwitterEventExtractor(TwitterExtractor):
 								    """Extractor for Tweets from a Twitter Event"""
 								    subcategory = "event"
-												[twitter] define directory format for events (#2109)

											
										
										
											3 years ago
+								    directory_fmt = ("{category}", "Events",
 								                     "{event[id]} {event[short_title]}")
-												[twitter] add 'event' extractor (closes #2109)

											
										
										
											3 years ago
+								    pattern = BASE_PATTERN + r"/i/events/(\d+)"
 								    test = ("https://twitter.com/i/events/1484669206993903616", {
 								        "range": "1-20",
-												update extractor test results

											
										
										
											2 years ago
+								        "count": ">=1",
-												[twitter] add 'event' extractor (closes #2109)

											
										
										
											3 years ago
+								    })
 								    def metadata(self):
 								        return {"event": self.api.live_event(self.user)}
 								    def tweets(self):
 								        return self.api.live_event_timeline(self.user)
-												[twitter] small improvements to search extractor

- put search results in separate directories
- set 'max_position' to '-1' for first request
  -> prevent duplicate results
- add a test
- flake8

											
										
										
											5 years ago
-												[twitter] add extractor for media-tweet timelines (#96)

For example "https://twitter.com/PicturesEarth/media".
They are different from normal timelines in that they do not contain
any (re)tweets from other users and feature all media the user ever
posted, including responses to other tweets.

											
										
										
											6 years ago
-												[twitter] add support for user-timelines (closes #96)

also adds a 'retweets' option to filter retweeted content

											
										
										
											6 years ago
+								class TwitterTweetExtractor(TwitterExtractor):
-												[twitter] changes and improvements

- rename User- to TimelineExtractor
- rename 'userid' to 'user_id' to conform to the other ..._id values
- adjust archive_fmt to deal with retweets
- emulate browser behavior for API calls

											
										
										
											6 years ago
+								    """Extractor for images from individual tweets"""
-												[twitter] add support for user-timelines (closes #96)

also adds a 'retweets' option to filter retweeted content

											
										
										
											6 years ago
+								    subcategory = "tweet"
-												remove '&' from URL patterns

'/?&#' -> '/?#' and '?&#' -> '?#'

According to https://www.ietf.org/rfc/rfc3986.txt, URLs are
"organized hierarchically" by using "the slash ("/"), question
mark ("?"), and number sign ("#") characters to delimit components"

											
										
										
											4 years ago
+								    pattern = BASE_PATTERN + r"/([^/?#]+|i/web)/status/(\d+)"
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											6 years ago
+								    test = (
-												[twitter] replace unit test URLs

https://twitter.com/PicturesEarth was deleted

											
										
										
											5 years ago
+								        ("https://twitter.com/supernaturepics/status/604341487988576256", {
-												[twitter] update image URL format (#1145)

use
'/<name>?format=<fmt>&name=<size>'
instead of the potentially deprecated
'/<name>.<fmt>:<size>'

but keep all of them as fallback URLs

											
										
										
											4 years ago
+								            "url": "88a40f7d25529c2501c46f2218f9e0de9aa634b4",
-												[twitter] replace unit test URLs

https://twitter.com/PicturesEarth was deleted

											
										
										
											5 years ago
+								            "content": "ab05e1d8d21f8d43496df284d31e8b362cd3bcab",
-												[twitter] ignore "Promoted Tweets"

											
										
										
											7 years ago
+								        }),
-												[twitter] extract 'date' metadata (#224)

											
										
										
											5 years ago
+								        # 4 images
-												[twitter] ignore "Promoted Tweets"

											
										
										
											7 years ago
+								        ("https://twitter.com/perrypumas/status/894001459754180609", {
-												[twitter] update image URL format (#1145)

use
'/<name>?format=<fmt>&name=<size>'
instead of the potentially deprecated
'/<name>.<fmt>:<size>'

but keep all of them as fallback URLs

											
										
										
											4 years ago
+								            "url": "3a2a43dc5fb79dd5432c701d8e55e87c4e551f47",
-												[twitter] extract 'date' metadata (#224)

											
										
										
											5 years ago
+								        }),
 								        # video
 								        ("https://twitter.com/perrypumas/status/1065692031626829824", {
-												[twitter] rewrite; use new interface (#740, #806)

Everything except logging in with username & password and TwitPic
embeds should be working again.

Metadata per Tweet is massively different than before (mostly raw API
responses - might need some cleaning up) and the default 'archive_fmt'
changed.

											
										
										
											4 years ago
+								            "pattern": r"https://video.twimg.com/ext_tw_video/.+\.mp4\?tag=5",
-												[twitter] ignore "Promoted Tweets"

											
										
										
											7 years ago
+								        }),
-												[twitter] improve 'content' formatting; add option (#338)

- include emoticons
- leave newlines intact
- remove pic.twitter.com/ links at the end

											
										
										
											5 years ago
+								        # content with emoji, newlines, hashtags (#338)
-												update extractor test results

- don't run Instagram tests on Travis anymore
- replace Twitter test because timeline was made private
- update Hiperdex domain to '.com' (again ...)

											
										
										
											4 years ago
+								        ("https://twitter.com/playpokemon/status/1263832915173048321", {
-												[twitter] metadata cleanup #2

- remove useless clutter by creating new tweet-data dicts instead of
  reusing the original Tweet objects
- rename fields to how they were named before
  ('id_str' -> 'tweet_id', etc.)
- only include 'author' if it would differ from 'user'
- restore 'archive_fmt'

											
										
										
											4 years ago
+								            "keyword": {"content": (
-												update extractor test results

- don't run Instagram tests on Travis anymore
- replace Twitter test because timeline was made private
- update Hiperdex domain to '.com' (again ...)

											
										
										
											4 years ago
+								                r"re:Gear up for #PokemonSwordShieldEX with special Mystery "
 								                "Gifts! \n\nYou’ll be able to receive four Galarian form "
 								                "Pokémon with Hidden Abilities, plus some very useful items. "
 								                "It’s our \\(Mystery\\) Gift to you, Trainers! \n\n❓🎁➡️ "
-												update test results

- twitter:

    Don't test the whole kwdict, only the actual content, since the
    keyword hash changes whenever that user changes his display name.

- khinsider:

    Download host changed

											
										
										
											5 years ago
+								            )},
-												[twitter] improve 'content' formatting; add option (#338)

- include emoticons
- leave newlines intact
- remove pic.twitter.com/ links at the end

											
										
										
											5 years ago
+								        }),
-												[twitter] update tests

											
										
										
											4 years ago
+								        # Reply to deleted tweet (#403, #838)
 								        ("https://twitter.com/i/web/status/1170041925560258560", {
-												[twitter] update image URL format (#1145)

use
'/<name>?format=<fmt>&name=<size>'
instead of the potentially deprecated
'/<name>.<fmt>:<size>'

but keep all of them as fallback URLs

											
										
										
											4 years ago
+								            "pattern": r"https://pbs.twimg.com/media/EDzS7VrU0AAFL4_",
-												[twitter] small improvements

- handle reply tweets (#403)
- unset cookies in Tweet extractor to "force" the legacy interface

											
										
										
											5 years ago
+								        }),
-												[twitter] add 'replies' option (closes #705)

											
										
										
											4 years ago
+								        # 'replies' option (#705)
-												[twitter] update tests

											
										
										
											4 years ago
+								        ("https://twitter.com/i/web/status/1170041925560258560", {
-												[twitter] add 'replies' option (closes #705)

											
										
										
											4 years ago
+								            "options": (("replies", False),),
 								            "count": 0,
 								        }),
-												[twitter] extend 'replies' option (#1254)

Allow setting 'replies to '"self"' to only download from self-replies.

											
										
										
											3 years ago
+								        # 'replies' to self (#1254)
 								        ("https://twitter.com/i/web/status/1424882930803908612", {
 								            "options": (("replies", "self"),),
 								            "count": 4,
-												[twitter] expand t.co links in user descriptions (#1532, #1787)

											
										
										
											3 years ago
+								            "keyword": {"user": {
 								                "description": "re:business email-- rhettaro.bloom@gmail.com "
 								                               "patreon- http://patreon.com/Princecanary",
 								                "url": "http://princecanary.tumblr.com",
 								            }},
-												[twitter] extend 'replies' option (#1254)

Allow setting 'replies to '"self"' to only download from self-replies.

											
										
										
											3 years ago
+								        }),
 								        ("https://twitter.com/i/web/status/1424898916156284928", {
 								            "options": (("replies", "self"),),
-												[twitter] implement constant 'user' for tweet URLs

											
										
										
											2 years ago
+								            "count": 1,
-												[twitter] extend 'replies' option (#1254)

Allow setting 'replies to '"self"' to only download from self-replies.

											
										
										
											3 years ago
+								        }),
-												[twitter] change some defaults

- 'retweets' option: true -> false
- 'quoted' option  : true -> false

  i.e. disable downloading tweets from other user's timelines by default

- search directory:
    '["{category}", "Search", "{search}"]' ->
    '["{category}", "{user[name]}"]'

  i.e. change it to the same as other twitter extractors (#1308)

											
										
										
											3 years ago
+								        # "quoted" option (#854)
-												[twitter] add option to filter media from quoted tweets (#854)

											
										
										
											4 years ago
+								        ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", {
-												[twitter] change some defaults

- 'retweets' option: true -> false
- 'quoted' option  : true -> false

  i.e. disable downloading tweets from other user's timelines by default

- search directory:
    '["{category}", "Search", "{search}"]' ->
    '["{category}", "{user[name]}"]'

  i.e. change it to the same as other twitter extractors (#1308)

											
										
										
											3 years ago
+								            "options": (("quoted", True),),
-												[twitter] update image URL format (#1145)

use
'/<name>?format=<fmt>&name=<size>'
instead of the potentially deprecated
'/<name>.<fmt>:<size>'

but keep all of them as fallback URLs

											
										
										
											4 years ago
+								            "pattern": r"https://pbs\.twimg\.com/media/Ea[KG].+=jpg",
-												[twitter] add option to filter media from quoted tweets (#854)

											
										
										
											4 years ago
+								            "count": 8,
 								        }),
-												[twitter] change some defaults

- 'retweets' option: true -> false
- 'quoted' option  : true -> false

  i.e. disable downloading tweets from other user's timelines by default

- search directory:
    '["{category}", "Search", "{search}"]' ->
    '["{category}", "{user[name]}"]'

  i.e. change it to the same as other twitter extractors (#1308)

											
										
										
											3 years ago
+								        # quoted tweet (#526, #854)
-												[twitter] add option to filter media from quoted tweets (#854)

											
										
										
											4 years ago
+								        ("https://twitter.com/StobiesGalaxy/status/1270755918330896395", {
-												[twitter] update image URL format (#1145)

use
'/<name>?format=<fmt>&name=<size>'
instead of the potentially deprecated
'/<name>.<fmt>:<size>'

but keep all of them as fallback URLs

											
										
										
											4 years ago
+								            "pattern": r"https://pbs\.twimg\.com/media/EaK.+=jpg",
-												[twitter] add option to filter media from quoted tweets (#854)

											
										
										
											4 years ago
+								            "count": 4,
-												[twitter] handle quoted tweets (#526)

… and categorize them as retweets

											
										
										
											5 years ago
+								        }),
-												[twitter] do not overwrite previously assigned users (#3922)

											
										
										
											1 year ago
+								        # different 'user' and 'author' in quoted Tweet (#3922)
 								        ("https://twitter.com/web/status/1644907989109751810", {
 								            "keyword": {
 								                "author": {"id": 321629993         , "name": "Cakes_Comics"},
 								                "user"  : {"id": 718928225360080897, "name": "StobiesGalaxy"},
 								            },
 								        }),
-												[twitter] add option to extract TwitPic embeds (#579)

											
										
										
											5 years ago
+								        # TwitPic embeds (#579)
 								        ("https://twitter.com/i/web/status/112900228289540096", {
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								            "options": (("twitpic", True), ("cards", False)),
-												[twitter] add option to extract TwitPic embeds (#579)

											
										
										
											5 years ago
+								            "pattern": r"https://\w+.cloudfront.net/photos/large/\d+.jpg",
-												[twitter] extract TwitPic URLs in text (#3792)

also ignore previously seen URLs

											
										
										
											2 years ago
+								            "count": 2,  # 1 duplicate
 								        }),
 								        # TwitPic URL not in 'urls' (#3792)
 								        ("https://twitter.com/shimoigusaP/status/8138669971", {
 								            "options": (("twitpic", True),),
 								            "pattern": r"https://\w+.cloudfront.net/photos/large/\d+.png",
 								            "count": 1,
-												[twitter] add option to extract TwitPic embeds (#579)

											
										
										
											5 years ago
+								        }),
-												[twitter] support media from Cards (#1005, #937)

Can be enabled with 'extractor.twitter.cards', but for now disabled by
default because cards can redirect to rather large videos from YouTube
or Twitch.

											
										
										
											4 years ago
+								        # Twitter card (#1005)
 								        ("https://twitter.com/billboard/status/1306599586602135555", {
 								            "options": (("cards", True),),
-												[twitter] update GraphQL endpoint & fix width/height entries

											
										
										
											4 years ago
+								            "pattern": r"https://pbs.twimg.com/card_img/\d+/",
-												[twitter] support media from Cards (#1005, #937)

Can be enabled with 'extractor.twitter.cards', but for now disabled by
default because cards can redirect to rather large videos from YouTube
or Twitch.

											
										
										
											4 years ago
+								        }),
-												[twitter] general support for unified cards (#2875)

just removing the 'type' check seems to work

											
										
										
											2 years ago
+								        # unified_card image_website (#2875)
-												[twitter] support "image_website" unified cards (#2875)

											
										
										
											2 years ago
+								        ("https://twitter.com/i/web/status/1561674543323910144", {
 								            "options": (("cards", True),),
 								            "pattern": r"https://pbs\.twimg\.com/media/F.+=jpg",
 								        }),
-												[twitter] general support for unified cards (#2875)

just removing the 'type' check seems to work

											
										
										
											2 years ago
+								        # unified_card image_carousel_website
-												[twitter] support "image_carousel_website" unified cards

											
										
										
											3 years ago
+								        ("https://twitter.com/doax_vv_staff/status/1479438945662685184", {
 								            "options": (("cards", True),),
 								            "pattern": r"https://pbs\.twimg\.com/media/F.+=png",
 								            "count": 6,
 								        }),
-												[twitter] general support for unified cards (#2875)

just removing the 'type' check seems to work

											
										
										
											2 years ago
+								        # unified_card video_website (#2875)
 								        ("https://twitter.com/bang_dream_1242/status/1561548715348746241", {
 								            "options": (("cards", True),),
 								            "pattern": r"https://video\.twimg\.com/amplify_video"
 								                       r"/1560607284333449216/vid/720x720/\w+\.mp4",
 								        }),
-												[twitter] fix several errors (#2212, #2216, #2225)

- fix Tweets with deleted quotes
- fix suspended Tweets without 'legacy' entry
- fix unified_cards without 'type'

											
										
										
											3 years ago
+								        # unified_card without type
 								        ("https://twitter.com/i/web/status/1466183847628865544", {
 								            "count": 0,
 								        }),
-												[twitter] improve 'cards-blacklist' (#2875)

allow blacklisting domains and 'name:domain',
where 'domain' depends on a card's 'vanity_url' value

											
										
										
											2 years ago
+								        # 'cards-blacklist' option
 								        ("https://twitter.com/i/web/status/1571141912295243776", {
 								            "options": (("cards", "ytdl"),
 								                        ("cards-blacklist", ("twitch.tv",))),
 								            "count": 0,
 								        }),
-												[twitter] calculate 'date' from Tweet IDs

20 times faster than parsing 'created_at'

											
										
										
											1 year ago
+								        # retweet
 								        ("https://twitter.com/jessica_3978/status/1296304589591810048", {
 								            "options": (("retweets", True),),
 								            "count": 2,
 								            "keyword": {
 								                "tweet_id"  : 1296304589591810048,
 								                "retweet_id": 1296296016002547713,
 								                "date"      : "dt:2020-08-20 04:34:32",
 								            },
 								        }),
-												[twitter] extend 'retweets' option (closes #1026)

Setting 'retweets' to '"original"' will use metadata from the
original retweeted Tweets, and not from the Retweet entry.

											
										
										
											4 years ago
+								        # original retweets (#1026)
 								        ("https://twitter.com/jessica_3978/status/1296304589591810048", {
 								            "options": (("retweets", "original"),),
 								            "count": 2,
 								            "keyword": {
-												[twitter] set 'retweet_id' for original retweets (#1481)

											
										
										
											3 years ago
+								                "tweet_id"  : 1296296016002547713,
 								                "retweet_id": 1296296016002547713,
 								                "date"      : "dt:2020-08-20 04:00:28",
-												[twitter] extend 'retweets' option (closes #1026)

Setting 'retweets' to '"original"' will use metadata from the
original retweeted Tweets, and not from the Retweet entry.

											
										
										
											4 years ago
+								            },
 								        }),
-												[twitter] fix pagination for conversion tweets

a relic from the switch to GraphQL API

											
										
										
											2 years ago
+								        # all Tweets from a 'conversation' (#1319)
 								        ("https://twitter.com/supernaturepics/status/604341487988576256", {
-												[twitter] add option to download all media from a conversation

(fixes #1319)

											
										
										
											4 years ago
+								            "options": (("conversations", True),),
-												[twitter] fix pagination for conversion tweets

a relic from the switch to GraphQL API

											
										
										
											2 years ago
+								            "count": 5,
-												[twitter] add option to download all media from a conversation

(fixes #1319)

											
										
										
											4 years ago
+								        }),
-												[twitter] add missing retweet media entities (fixes #1555)

from the original tweets

											
										
										
											3 years ago
+								        # retweet with missing media entities (#1555)
 								        ("https://twitter.com/morino_ya/status/1392763691599237121", {
-												[twitter] change some defaults

- 'retweets' option: true -> false
- 'quoted' option  : true -> false

  i.e. disable downloading tweets from other user's timelines by default

- search directory:
    '["{category}", "Search", "{search}"]' ->
    '["{category}", "{user[name]}"]'

  i.e. change it to the same as other twitter extractors (#1308)

											
										
										
											3 years ago
+								            "options": (("retweets", True),),
-												update extractor test results

											
										
										
											2 years ago
+								            "count": 0,  # private
-												[twitter] add missing retweet media entities (fixes #1555)

from the original tweets

											
										
										
											3 years ago
+								        }),
-												[twitter] fix several errors (#2212, #2216, #2225)

- fix Tweets with deleted quotes
- fix suspended Tweets without 'legacy' entry
- fix unified_cards without 'type'

											
										
										
											3 years ago
+								        # deleted quote tweet (#2225)
 								        ("https://twitter.com/i/web/status/1460044411165888515", {
 								            "count": 0,
 								        }),
-												[twitter] handle Tweets with "softIntervention" entries

or other such things where the actual Tweet data is one level deeper
than usual

											
										
										
											3 years ago
+								        # "Misleading" content
 								        ("https://twitter.com/i/web/status/1486373748911575046", {
 								            "count": 4,
 								        }),
-												[twitter] add 'syndication' option (#2354)

to fetch age-restricted content using Twitter's  syndication API

											
										
										
											3 years ago
+								        # age-restricted (#2354)
 								        ("https://twitter.com/mightbecursed/status/1492954264909479936", {
 								            "options": (("syndication", True),),
-												fix 'keywords' in extractor tests (#3491)

											
										
										
											2 years ago
+								            "keyword": {"date": "dt:2022-02-13 20:10:09"},
-												[twitter] add 'syndication' option (#2354)

to fetch age-restricted content using Twitter's  syndication API

											
										
										
											3 years ago
+								            "count": 1,
 								        }),
-												[twitter] extract alt texts as 'description' (closes #2617)

											
										
										
											2 years ago
+								        # media alt texts / descriptions (#2617)
 								        ("https://twitter.com/my0nruri/status/1528379296041299968", {
 								            "keyword": {"description": "oc"}
 								        }),
-												[twitter] fix new-style '/card_img/' URLs

											
										
										
											2 years ago
+								        # '?format=...&name=...'-style URLs
 								        ("https://twitter.com/poco_dandy/status/1150646424461176832", {
 								            "options": (("cards", True),),
-												update extractor test results

											
										
										
											2 years ago
+								            "pattern": r"https://pbs.twimg.com/card_img/157\d+/[\w-]+"
-												[twitter] fix new-style '/card_img/' URLs

											
										
										
											2 years ago
+								                       r"\?format=(jpg|png)&name=orig$",
 								            "range": "1-2",
 								        }),
-												[twitter] support 'note_tweet's

											
										
										
											2 years ago
+								        # note tweet with long 'content'
 								        ("https://twitter.com/i/web/status/1629193457112686592", {
 								            "keyword": {
 								                "content": """\
 								BREAKING - DEADLY LIES: Independent researchers at Texas A&M University have \
 								just contradicted federal government regulators, saying that toxic air \
 								pollutants in East Palestine, Ohio, could pose long-term risks. \n\nThe \
 								Washington Post writes, "Three weeks after the toxic train derailment in \
 								Ohio, an analysis of Environmental Protection Agency data has found nine air \
 								pollutants at levels that could raise long-term health concerns in and around \
 								East Palestine, according to an independent analysis. \n\n\"The analysis by \
 								Texas A&M University seems to contradict statements by state and federal \
 								regulators that air near the crash site is completely safe, despite residents \
 								complaining about rashes, breathing problems and other health effects." \
 								Your reaction.""",
 								            },
 								        }),
-												simplify extractor constants

- single strings for URL patterns
- tuples instead of lists for 'directory_fmt' and 'test'
- single-tuple tests where applicable

											
										
										
											6 years ago
+								    )
-												[twitter] add extractor

											
										
										
											8 years ago
 								    def __init__(self, match):
-												[twitter] add extractor for media-tweet timelines (#96)

For example "https://twitter.com/PicturesEarth/media".
They are different from normal timelines in that they do not contain
any (re)tweets from other users and feature all media the user ever
posted, including responses to other tweets.

											
										
										
											6 years ago
+								        TwitterExtractor.__init__(self, match)
 								        self.tweet_id = match.group(2)
-												[twitter] add extractor

											
										
										
											8 years ago
-												[twitter] add support for user-timelines (closes #96)

also adds a 'retweets' option to filter retweeted content

											
										
										
											6 years ago
+								    def tweets(self):
-												[twitter] add option to download all media from a conversation

(fixes #1319)

											
										
										
											4 years ago
+								        if self.config("conversations", False):
-												[twitter] implement constant 'user' for tweet URLs

											
										
										
											2 years ago
+								            return self._tweets_conversation(self.tweet_id)
 								        else:
 								            return self._tweets_single(self.tweet_id)
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
-												[twitter] implement constant 'user' for tweet URLs

											
										
										
											2 years ago
+								    def _tweets_single(self, tweet_id):
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								        tweets = []
-												[twitter] implement constant 'user' for tweet URLs

											
										
										
											2 years ago
-												[twitter] simplify

- use dict with common GraphQL variables
- reduce 'variables' size with custom JSON encoder instance
- centralise TwitterAPI() creation

											
										
										
											3 years ago
+								        for tweet in self.api.tweet_detail(tweet_id):
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								            if tweet["rest_id"] == tweet_id or \
 								                    tweet.get("_retweet_id_str") == tweet_id:
-												[twitter] improve 480bc34e

only check for double user assignment where necessary

											
										
										
											1 year ago
+								                if self._user_obj is None:
 								                    self._assign_user(tweet["core"]["user_results"]["result"])
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								                tweets.append(tweet)
 								                tweet_id = tweet["legacy"].get("quoted_status_id_str")
 								                if not tweet_id:
 								                    break
-												[twitter] implement constant 'user' for tweet URLs

											
										
										
											2 years ago
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								        return tweets
-												[twitter] handle API rate limits (#526)

											
										
										
											5 years ago
-												[twitter] implement constant 'user' for tweet URLs

											
										
										
											2 years ago
+								    def _tweets_conversation(self, tweet_id):
 								        tweets = self.api.tweet_detail(tweet_id)
 								        buffer = []
 								        for tweet in tweets:
 								            buffer.append(tweet)
 								            if tweet["rest_id"] == tweet_id or \
 								                    tweet.get("_retweet_id_str") == tweet_id:
-												[twitter] simplify 'user' assignment

											
										
										
											2 years ago
+								                self._assign_user(tweet["core"]["user_results"]["result"])
-												[twitter] implement constant 'user' for tweet URLs

											
										
										
											2 years ago
+								                break
 								        return itertools.chain(buffer, tweets)
-												[twitter] handle API rate limits (#526)

											
										
										
											5 years ago
-												[twitter] add 'avatar' and 'background' extractors (#349, #3023)

											
										
										
											2 years ago
+								class TwitterAvatarExtractor(TwitterExtractor):
 								    subcategory = "avatar"
 								    filename_fmt = "avatar {date}.{extension}"
 								    archive_fmt = "AV_{user[id]}_{date}"
 								    pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/photo"
 								    test = (
 								        ("https://twitter.com/supernaturepics/photo", {
 								            "pattern": r"https://pbs\.twimg\.com/profile_images"
 								                       r"/554585280938659841/FLVAlX18\.jpeg",
 								            "keyword": {
 								                "date": "dt:2015-01-12 10:26:49",
 								                "extension": "jpeg",
 								                "filename": "FLVAlX18",
 								                "tweet_id": 554585280938659841,
 								            },
 								        }),
 								        ("https://twitter.com/User16/photo", {
 								            "count": 0,
 								        }),
 								    )
 								    def tweets(self):
 								        self.api._user_id_by_screen_name(self.user)
 								        user = self._user_obj
 								        url = user["legacy"]["profile_image_url_https"]
 								        if url == ("https://abs.twimg.com/sticky"
 								                   "/default_profile_images/default_profile_normal.png"):
 								            return ()
 								        url = url.replace("_normal.", ".")
 								        id_str = url.rsplit("/", 2)[1]
-												[twitter] calculate 'date' from Tweet IDs

20 times faster than parsing 'created_at'

											
										
										
											1 year ago
+								        return (self._make_tweet(user, url, id_str),)
-												[twitter] add 'avatar' and 'background' extractors (#349, #3023)

											
										
										
											2 years ago
 								class TwitterBackgroundExtractor(TwitterExtractor):
 								    subcategory = "background"
 								    filename_fmt = "background {date}.{extension}"
 								    archive_fmt = "BG_{user[id]}_{date}"
 								    pattern = BASE_PATTERN + r"/(?!search)([^/?#]+)/header_photo"
 								    test = (
 								        ("https://twitter.com/supernaturepics/header_photo", {
 								            "pattern": r"https://pbs\.twimg\.com/profile_banners"
 								                       r"/2976459548/1421058583",
 								            "keyword": {
 								                "date": "dt:2015-01-12 10:29:43",
 								                "filename": "1421058583",
-												[twitter] calculate 'date' from Tweet IDs

20 times faster than parsing 'created_at'

											
										
										
											1 year ago
+								                "tweet_id": 554586009367478272,
-												[twitter] add 'avatar' and 'background' extractors (#349, #3023)

											
										
										
											2 years ago
+								            },
 								        }),
 								        ("https://twitter.com/User16/header_photo", {
 								            "count": 0,
 								        }),
 								    )
 								    def tweets(self):
 								        self.api._user_id_by_screen_name(self.user)
-												misc fixes

- fix typo (#3399)
- remove double assignment
- [bunkr] update things I forgot in 6b6f886d
- [soundgasm] adjust 'archive_fmt' (#3388)

											
										
										
											2 years ago
+								        user = self._user_obj
-												[twitter] add 'avatar' and 'background' extractors (#349, #3023)

											
										
										
											2 years ago
 								        try:
 								            url = user["legacy"]["profile_banner_url"]
 								            _, timestamp = url.rsplit("/", 1)
 								        except (KeyError, ValueError):
 								            return ()
-												[twitter] calculate 'date' from Tweet IDs

20 times faster than parsing 'created_at'

											
										
										
											1 year ago
+								        id_str = str((int(timestamp) * 1000 - 1288834974657) << 22)
 								        return (self._make_tweet(user, url, id_str),)
-												[twitter] add 'avatar' and 'background' extractors (#349, #3023)

											
										
										
											2 years ago
-												[twitter] add extractor for direct image links (closes #1417)

											
										
										
											4 years ago
+								class TwitterImageExtractor(Extractor):
 								    category = "twitter"
 								    subcategory = "image"
 								    pattern = r"https?://pbs\.twimg\.com/media/([\w-]+)(?:\?format=|\.)(\w+)"
 								    test = (
-												[twitter] fix extractor for direct image links (fixes #2030)

											
										
										
											3 years ago
+								        ("https://pbs.twimg.com/media/EqcpviCVoAAG-QG?format=jpg&name=orig", {
 								            "options": (("size", "4096x4096,orig"),),
 								            "url": "cb3042a6f6826923da98f0d2b66c427e9385114c",
 								        }),
-												[twitter] add extractor for direct image links (closes #1417)

											
										
										
											4 years ago
+								        ("https://pbs.twimg.com/media/EqcpviCVoAAG-QG.jpg:orig"),
 								    )
 								    def __init__(self, match):
 								        Extractor.__init__(self, match)
 								        self.id, self.fmt = match.groups()
-												[twitter] fix extractor for direct image links (fixes #2030)

											
										
										
											3 years ago
+								        TwitterExtractor._init_sizes(self)
-												[twitter] add extractor for direct image links (closes #1417)

											
										
										
											4 years ago
 								    def items(self):
-												[twitter] remove old-style URLs from image fallback lists

											
										
										
											3 years ago
+								        base = "https://pbs.twimg.com/media/{}?format={}&name=".format(
 								            self.id, self.fmt)
-												[twitter] add extractor for direct image links (closes #1417)

											
										
										
											4 years ago
 								        data = {
 								            "filename": self.id,
 								            "extension": self.fmt,
-												[twitter] fix extractor for direct image links (fixes #2030)

											
										
										
											3 years ago
+								            "_fallback": TwitterExtractor._image_fallback(self, base),
-												[twitter] add extractor for direct image links (closes #1417)

											
										
										
											4 years ago
+								        }
 								        yield Message.Directory, data
-												[twitter] fix extractor for direct image links (fixes #2030)

											
										
										
											3 years ago
+								        yield Message.Url, base + self._size_image, data
-												[twitter] add extractor for direct image links (closes #1417)

											
										
										
											4 years ago
-												[twitter] rewrite; use new interface (#740, #806)

Everything except logging in with username & password and TwitPic
embeds should be working again.

Metadata per Tweet is massively different than before (mostly raw API
responses - might need some cleaning up) and the default 'archive_fmt'
changed.

											
										
										
											4 years ago
+								class TwitterAPI():
 								    def __init__(self, extractor):
 								        self.extractor = extractor
-												[twitter] update API calls

- use 'https://twitter.com/i/api' for all requests
  except '/guest/activate.json'
- update (default) URL parameters
- update GraphQL endpoints

											
										
										
											4 years ago
-												[twitter] remove 'tweet_search_mode' from search parameters (#3522)

and update API root and general query parameters

											
										
										
											2 years ago
+								        self.root = "https://api.twitter.com"
-												[twitter] update API endpoints and parameters

											
										
										
											2 years ago
+								        self._nsfw_warning = True
 								        self._syndication = self.extractor.syndication
 								        self._json_dumps = json.JSONEncoder(separators=(",", ":")).encode
-												[twitter] refresh guest tokens (#3445, #3458)

											
										
										
											2 years ago
+								        cookies = extractor.session.cookies
 								        cookiedomain = extractor.cookiedomain
 								        csrf = extractor.config("csrf")
 								        if csrf is None or csrf == "cookies":
 								            csrf_token = cookies.get("ct0", domain=cookiedomain)
 								        else:
 								            csrf_token = None
 								        if not csrf_token:
 								            csrf_token = util.generate_token()
 								            cookies.set("ct0", csrf_token, domain=cookiedomain)
 								        auth_token = cookies.get("auth_token", domain=cookiedomain)
-												[twitter] rewrite; use new interface (#740, #806)

Everything except logging in with username & password and TwitPic
embeds should be working again.

Metadata per Tweet is massively different than before (mostly raw API
responses - might need some cleaning up) and the default 'archive_fmt'
changed.

											
										
										
											4 years ago
+								        self.headers = {
-												[twitter] update API endpoints and parameters

											
										
										
											2 years ago
+								            "Accept": "*/*",
-												[twitter] rewrite; use new interface (#740, #806)

Everything except logging in with username & password and TwitPic
embeds should be working again.

Metadata per Tweet is massively different than before (mostly raw API
responses - might need some cleaning up) and the default 'archive_fmt'
changed.

											
										
										
											4 years ago
+								            "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR"
 								                             "COuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu"
 								                             "4FA33AGWWjCpTnA",
 								            "x-guest-token": None,
-												[twitter] refresh guest tokens (#3445, #3458)

											
										
										
											2 years ago
+								            "x-twitter-auth-type": "OAuth2Session" if auth_token else None,
-												[twitter] rewrite; use new interface (#740, #806)

Everything except logging in with username & password and TwitPic
embeds should be working again.

Metadata per Tweet is massively different than before (mostly raw API
responses - might need some cleaning up) and the default 'archive_fmt'
changed.

											
										
										
											4 years ago
+								            "x-twitter-client-language": "en",
 								            "x-twitter-active-user": "yes",
-												[twitter] refresh guest tokens (#3445, #3458)

											
										
										
											2 years ago
+								            "x-csrf-token": csrf_token,
-												[twitter] remove 'tweet_search_mode' from search parameters (#3522)

and update API root and general query parameters

											
										
										
											2 years ago
+								            "Origin": "https://twitter.com",
-												[twitter] rewrite; use new interface (#740, #806)

Everything except logging in with username & password and TwitPic
embeds should be working again.

Metadata per Tweet is massively different than before (mostly raw API
responses - might need some cleaning up) and the default 'archive_fmt'
changed.

											
										
										
											4 years ago
+								            "Referer": "https://twitter.com/",
 								        }
-												[twitter] add 'event' extractor (closes #2109)

											
										
										
											3 years ago
+								        self.params = {
 								            "include_profile_interstitial_type": "1",
 								            "include_blocking": "1",
 								            "include_blocked_by": "1",
 								            "include_followed_by": "1",
 								            "include_want_retweets": "1",
 								            "include_mute_edge": "1",
 								            "include_can_dm": "1",
 								            "include_can_media_tag": "1",
 								            "include_ext_has_nft_avatar": "1",
-												[twitter] remove 'tweet_search_mode' from search parameters (#3522)

and update API root and general query parameters

											
										
										
											2 years ago
+								            "include_ext_is_blue_verified": "1",
 								            "include_ext_verified_type": "1",
-												[twitter] add 'event' extractor (closes #2109)

											
										
										
											3 years ago
+								            "skip_status": "1",
 								            "cards_platform": "Web-12",
 								            "include_cards": "1",
 								            "include_ext_alt_text": "true",
-												[twitter] remove 'tweet_search_mode' from search parameters (#3522)

and update API root and general query parameters

											
										
										
											2 years ago
+								            "include_ext_limited_action_results": "false",
-												[twitter] add 'event' extractor (closes #2109)

											
										
										
											3 years ago
+								            "include_quote_count": "true",
 								            "include_reply_count": "1",
 								            "tweet_mode": "extended",
-												[twitter] remove 'tweet_search_mode' from search parameters (#3522)

and update API root and general query parameters

											
										
										
											2 years ago
+								            "include_ext_collab_control": "true",
 								            "include_ext_views": "true",
-												[twitter] add 'event' extractor (closes #2109)

											
										
										
											3 years ago
+								            "include_entities": "true",
 								            "include_user_entities": "true",
 								            "include_ext_media_color": "true",
 								            "include_ext_media_availability": "true",
 								            "include_ext_sensitive_media_warning": "true",
-												[twitter] remove 'tweet_search_mode' from search parameters (#3522)

and update API root and general query parameters

											
										
										
											2 years ago
+								            "include_ext_trusted_friends_metadata": "true",
-												[twitter] add 'event' extractor (closes #2109)

											
										
										
											3 years ago
+								            "send_error_codes": "true",
 								            "simple_quoted_tweet": "true",
-												[twitter] remove 'tweet_search_mode' from search parameters (#3522)

and update API root and general query parameters

											
										
										
											2 years ago
+								            "q": None,
-												[twitter] add 'event' extractor (closes #2109)

											
										
										
											3 years ago
+								            "count": "100",
-												[twitter] remove 'tweet_search_mode' from search parameters (#3522)

and update API root and general query parameters

											
										
										
											2 years ago
+								            "query_source": None,
-												[twitter] add 'event' extractor (closes #2109)

											
										
										
											3 years ago
+								            "cursor": None,
-												[twitter] remove 'tweet_search_mode' from search parameters (#3522)

and update API root and general query parameters

											
										
										
											2 years ago
+								            "pc": None,
 								            "spelling_corrections": None,
 								            "include_ext_edit_control": "true",
 								            "ext": "mediaStats,highlightedLabel,hasNftAvatar,voiceInfo,"
 								                   "enrichments,superFollowMetadata,unmentionInfo,editControl,"
 								                   "collab_control,vibe",
-												[twitter] add 'event' extractor (closes #2109)

											
										
										
											3 years ago
+								        }
-												[twitter] simplify

- use dict with common GraphQL variables
- reduce 'variables' size with custom JSON encoder instance
- centralise TwitterAPI() creation

											
										
										
											3 years ago
+								        self.variables = {
 								            "withDownvotePerspective": False,
 								            "withReactionsMetadata": False,
 								            "withReactionsPerspective": False,
 								        }
-												[twitter] update API endpoints and parameters

											
										
										
											2 years ago
+								        self.features = {
-												[twitter] update query hashes

											
										
										
											1 year ago
+								            "blue_business_profile_image_shape_enabled": False,
-												[twitter] update API endpoints and parameters

											
										
										
											2 years ago
+								            "responsive_web_twitter_blue_verified_badge_is_enabled": True,
 								            "responsive_web_graphql_exclude_directive_enabled": True,
 								            "verified_phone_label_enabled": False,
 								            "responsive_web_graphql_skip_user_profile_"
 								            "image_extensions_enabled": False,
 								            "responsive_web_graphql_timeline_navigation_enabled": True,
 								        }
 								        self.features_pagination = {
-												[twitter] allow setting custom features per API endpoint

											
										
										
											1 year ago
+								            "blue_business_profile_image_shape_enabled": False,
-												[twitter] update API endpoints and parameters

											
										
										
											2 years ago
+								            "responsive_web_twitter_blue_verified_badge_is_enabled": True,
 								            "responsive_web_graphql_exclude_directive_enabled": True,
 								            "verified_phone_label_enabled": False,
 								            "responsive_web_graphql_timeline_navigation_enabled": True,
 								            "responsive_web_graphql_skip_user_profile_"
 								            "image_extensions_enabled": False,
 								            "tweetypie_unmention_optimization_enabled": True,
 								            "vibe_api_enabled": True,
 								            "responsive_web_edit_tweet_api_enabled": True,
 								            "graphql_is_translatable_rweb_tweet_is_translatable_enabled": True,
 								            "view_counts_everywhere_api_enabled": True,
 								            "longform_notetweets_consumption_enabled": True,
 								            "tweet_awards_web_tipping_enabled": False,
 								            "freedom_of_speech_not_reach_fetch_enabled": False,
 								            "standardized_nudges_misinfo": True,
 								            "tweet_with_visibility_results_prefer_gql_"
 								            "limited_actions_policy_enabled": False,
 								            "interactive_text_enabled": True,
 								            "responsive_web_text_conversations_enabled": False,
 								            "longform_notetweets_richtext_consumption_enabled": False,
 								            "responsive_web_enhance_cards_enabled": False,
 								        }
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
 								    def tweet_detail(self, tweet_id):
-												[twitter] update query hashes

											
										
										
											1 year ago
+								        endpoint = "/graphql/AV_lPTkN6Fc6LgerQpK8Zg/TweetDetail"
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								        variables = {
 								            "focalTweetId": tweet_id,
-												[twitter] update API endpoints and parameters

											
										
										
											2 years ago
+								            "referrer": "profile",
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								            "with_rux_injections": False,
-												[twitter] update API endpoints and parameters

											
										
										
											2 years ago
+								            "includePromotedContent": True,
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								            "withCommunity": True,
 								            "withQuickPromoteEligibilityTweetFields": True,
 								            "withBirdwatchNotes": False,
-												[twitter] update API endpoints and parameters

											
										
										
											2 years ago
+								            "withSuperFollowsUserFields": True,
 								            "withSuperFollowsTweetFields": True,
 								            "withVoice": True,
 								            "withV2Timeline": True,
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								        }
 								        return self._pagination_tweets(
-												[twitter] update API endpoints and parameters

											
										
										
											2 years ago
+								            endpoint, variables, ("threaded_conversation_with_injections_v2",))
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
 								    def user_tweets(self, screen_name):
-												[twitter] update query hashes

											
										
										
											1 year ago
+								        endpoint = "/graphql/BeHK76TOCY3P8nO-FWocjA/UserTweets"
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								        variables = {
 								            "userId": self._user_id_by_screen_name(screen_name),
 								            "count": 100,
-												[twitter] update API endpoints and parameters

											
										
										
											2 years ago
+								            "includePromotedContent": True,
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								            "withQuickPromoteEligibilityTweetFields": True,
-												[twitter] update API endpoints and parameters

											
										
										
											2 years ago
+								            "withVoice": True,
 								            "withV2Timeline": True,
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								        }
 								        return self._pagination_tweets(endpoint, variables)
 								    def user_tweets_and_replies(self, screen_name):
-												[twitter] update query hashes

											
										
										
											1 year ago
+								        endpoint = "/graphql/eZVlZu_1gwb6hMUDXBnZoQ/UserTweetsAndReplies"
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								        variables = {
 								            "userId": self._user_id_by_screen_name(screen_name),
 								            "count": 100,
-												[twitter] update API endpoints and parameters

											
										
										
											2 years ago
+								            "includePromotedContent": True,
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								            "withCommunity": True,
-												[twitter] update API endpoints and parameters

											
										
										
											2 years ago
+								            "withVoice": True,
 								            "withV2Timeline": True,
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								        }
 								        return self._pagination_tweets(endpoint, variables)
 								    def user_media(self, screen_name):
-												[twitter] update query hashes

											
										
										
											1 year ago
+								        endpoint = "/graphql/d_ONZLUHGCsErBCriRsLXg/UserMedia"
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								        variables = {
 								            "userId": self._user_id_by_screen_name(screen_name),
 								            "count": 100,
-												[twitter] update API endpoints and parameters

											
										
										
											2 years ago
+								            "includePromotedContent": False,
 								            "withClientEventToken": False,
 								            "withBirdwatchNotes": False,
 								            "withVoice": True,
 								            "withV2Timeline": True,
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								        }
 								        return self._pagination_tweets(endpoint, variables)
-												[twitter] fall back to legacy /media endpoint when not logged in

											
										
										
											2 years ago
+								    def user_media_legacy(self, screen_name):
 								        endpoint = "/graphql/nRybED9kRbN-TOWioHq1ng/UserMedia"
 								        variables = {
 								            "userId": self._user_id_by_screen_name(screen_name),
 								            "count": 100,
 								            "includePromotedContent": False,
 								            "withSuperFollowsUserFields": True,
 								            "withBirdwatchPivots": False,
 								            "withSuperFollowsTweetFields": True,
 								            "withClientEventToken": False,
 								            "withBirdwatchNotes": False,
 								            "withVoice": True,
 								            "withV2Timeline": False,
 								            "__fs_interactive_text": False,
 								            "__fs_dont_mention_me_view_api_enabled": False,
 								        }
 								        return self._pagination_tweets(
 								            endpoint, variables, ("user", "result", "timeline", "timeline"),
 								            features=False)
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								    def user_likes(self, screen_name):
-												[twitter] update query hashes

											
										
										
											1 year ago
+								        endpoint = "/graphql/fN4-E0MjFJ9Cn7IYConL7g/Likes"
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								        variables = {
 								            "userId": self._user_id_by_screen_name(screen_name),
 								            "count": 100,
-												[twitter] update API endpoints and parameters

											
										
										
											2 years ago
+								            "includePromotedContent": False,
 								            "withClientEventToken": False,
 								            "withBirdwatchNotes": False,
 								            "withVoice": True,
 								            "withV2Timeline": True,
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								        }
 								        return self._pagination_tweets(endpoint, variables)
 								    def user_bookmarks(self):
-												[twitter] update to bookmark timeline v2 (#3859)

											
										
										
											1 year ago
+								        endpoint = "/graphql/RV1g3b8n_SGOHwkqKYSCFw/Bookmarks"
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								        variables = {
 								            "count": 100,
 								        }
-												[twitter] allow setting custom features per API endpoint

											
										
										
											1 year ago
+								        features = self.features_pagination.copy()
-												[twitter] update to bookmark timeline v2 (#3859)

											
										
										
											1 year ago
+								        features["graphql_timeline_v2_bookmark_timeline"] = True
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								        return self._pagination_tweets(
-												[twitter] update to bookmark timeline v2 (#3859)

											
										
										
											1 year ago
+								            endpoint, variables, ("bookmark_timeline_v2", "timeline"), False,
-												[twitter] allow setting custom features per API endpoint

											
										
										
											1 year ago
+								            features=features)
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
 								    def list_latest_tweets_timeline(self, list_id):
-												[twitter] update query hashes

											
										
										
											1 year ago
+								        endpoint = "/graphql/5DAiJG3bD77SiWEs4xViBw/ListLatestTweetsTimeline"
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								        variables = {
 								            "listId": list_id,
 								            "count": 100,
 								        }
 								        return self._pagination_tweets(
 								            endpoint, variables, ("list", "tweets_timeline", "timeline"))
-												[twitter] add 'event' extractor (closes #2109)

											
										
										
											3 years ago
+								    def search_adaptive(self, query):
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								        endpoint = "/2/search/adaptive.json"
-												[twitter] add 'event' extractor (closes #2109)

											
										
										
											3 years ago
+								        params = self.params.copy()
 								        params["q"] = query
-												[twitter] fix search (#3536)

- partially revert 18fe4b334db0fbfa21fe932b73a2e64ebdd456a8
- properly search for cursor when processing 'replaceEntry'

											
										
										
											2 years ago
+								        params["tweet_search_mode"] = "live"
-												[twitter] add 'event' extractor (closes #2109)

											
										
										
											3 years ago
+								        params["query_source"] = "typed_query"
 								        params["pc"] = "1"
 								        params["spelling_corrections"] = "1"
 								        return self._pagination_legacy(endpoint, params)
 								    def live_event_timeline(self, event_id):
 								        endpoint = "/2/live_event/timeline/{}.json".format(event_id)
 								        params = self.params.copy()
 								        params["timeline_id"] = "recap"
 								        params["urt"] = "true"
 								        params["get_annotations"] = "true"
 								        return self._pagination_legacy(endpoint, params)
 								    def live_event(self, event_id):
 								        endpoint = "/1.1/live_event/1/{}/timeline.json".format(event_id)
 								        params = self.params.copy()
 								        params["count"] = "0"
 								        params["urt"] = "true"
 								        return (self._call(endpoint, params)
 								                ["twitter_objects"]["live_events"][event_id])
-												[twitter] rewrite; use new interface (#740, #806)

Everything except logging in with username & password and TwitPic
embeds should be working again.

Metadata per Tweet is massively different than before (mostly raw API
responses - might need some cleaning up) and the default 'archive_fmt'
changed.

											
										
										
											4 years ago
-												[twitter] add 'list' extractor (#1096)

											
										
										
											4 years ago
+								    def list_by_rest_id(self, list_id):
-												[twitter] update query hashes

											
										
										
											1 year ago
+								        endpoint = "/graphql/D0EoyrDcct2MEqC-LnPzFg/ListByRestId"
-												[twitter] update API endpoints and parameters

											
										
										
											2 years ago
+								        params = {
 								            "variables": self._json_dumps({
 								                "listId": list_id,
 								                "withSuperFollowsUserFields": True,
 								            }),
 								            "features": self._json_dumps(self.features),
 								        }
-												[twitter] add 'list' extractor (#1096)

											
										
										
											4 years ago
+								        try:
 								            return self._call(endpoint, params)["data"]["list"]
 								        except KeyError:
 								            raise exception.NotFoundError("list")
-												[twitter] add extractor for followed users (#1337)

https://twitter.com/USER/following or
https://twitter.com/id:USERID/following

											
										
										
											4 years ago
+								    def list_members(self, list_id):
-												[twitter] update query hashes

											
										
										
											1 year ago
+								        endpoint = "/graphql/tzsIIbGUH9RyFCVmtO2W2w/ListMembers"
-												[twitter] add extractor for followed users (#1337)

https://twitter.com/USER/following or
https://twitter.com/id:USERID/following

											
										
										
											4 years ago
+								        variables = {
 								            "listId": list_id,
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								            "count": 100,
 								            "withSafetyModeUserFields": True,
-												[twitter] add extractor for followed users (#1337)

https://twitter.com/USER/following or
https://twitter.com/id:USERID/following

											
										
										
											4 years ago
+								        }
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								        return self._pagination_users(
 								            endpoint, variables, ("list", "members_timeline", "timeline"))
-												[twitter] add extractor for followed users (#1337)

https://twitter.com/USER/following or
https://twitter.com/id:USERID/following

											
										
										
											4 years ago
 								    def user_following(self, screen_name):
-												[twitter] update query hashes

											
										
										
											1 year ago
+								        endpoint = "/graphql/FaBzCqZXuQCb4PhB0RHqHw/Following"
-												[twitter] add extractor for followed users (#1337)

https://twitter.com/USER/following or
https://twitter.com/id:USERID/following

											
										
										
											4 years ago
+								        variables = {
 								            "userId": self._user_id_by_screen_name(screen_name),
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								            "count": 100,
-												[twitter] update API endpoints and parameters

											
										
										
											2 years ago
+								            "includePromotedContent": False,
-												[twitter] add extractor for followed users (#1337)

https://twitter.com/USER/following or
https://twitter.com/id:USERID/following

											
										
										
											4 years ago
+								        }
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								        return self._pagination_users(endpoint, variables)
-												[twitter] add extractor for followed users (#1337)

https://twitter.com/USER/following or
https://twitter.com/id:USERID/following

											
										
										
											4 years ago
-												[twitter] restore 'logout' functionality (#1719)

											
										
										
											3 years ago
+								    def user_by_rest_id(self, rest_id):
-												[twitter] update query hashes

											
										
										
											1 year ago
+								        endpoint = "/graphql/S2BkcAyFMG--jef2N6Dgzw/UserByRestId"
-												[twitter] update API endpoints and parameters

											
										
										
											2 years ago
+								        params = {
 								            "variables": self._json_dumps({
 								                "userId": rest_id,
 								                "withSafetyModeUserFields": True,
 								            }),
 								            "features": self._json_dumps(self.features),
 								        }
-												[twitter] restore 'logout' functionality (#1719)

											
										
										
											3 years ago
+								        return self._call(endpoint, params)["data"]["user"]["result"]
-												[twitter] rewrite; use new interface (#740, #806)

Everything except logging in with username & password and TwitPic
embeds should be working again.

Metadata per Tweet is massively different than before (mostly raw API
responses - might need some cleaning up) and the default 'archive_fmt'
changed.

											
										
										
											4 years ago
+								    def user_by_screen_name(self, screen_name):
-												[twitter] update query hashes

											
										
										
											1 year ago
+								        endpoint = "/graphql/k26ASEiniqy4eXMdknTSoQ/UserByScreenName"
-												[twitter] update API endpoints and parameters

											
										
										
											2 years ago
+								        params = {
 								            "variables": self._json_dumps({
 								                "screen_name": screen_name,
 								                "withSafetyModeUserFields": True,
 								            }),
 								            "features": self._json_dumps(self.features),
 								        }
-												[twitter] improve error handling

- handle accounts without 'rest_id'
- handle timelines with empty 'instructions'

											
										
										
											3 years ago
+								        return self._call(endpoint, params)["data"]["user"]["result"]
-												[twitter] rewrite; use new interface (#740, #806)

Everything except logging in with username & password and TwitPic
embeds should be working again.

Metadata per Tweet is massively different than before (mostly raw API
responses - might need some cleaning up) and the default 'archive_fmt'
changed.

											
										
										
											4 years ago
-												[twitter] support specifying users by ID (#980)

by using 'id:…' as their screen name, i.e.
https://www.twitter.com/id:2976459548/media
instead of
https://twitter.com/supernaturepics/media

The user ID can, for example, be obtained from the output of
$ gallery-dl -j --range 1 https://twitter.com/<screen-name>

											
										
										
											4 years ago
+								    def _user_id_by_screen_name(self, screen_name):
-												[twitter] fix using user IDs for suspended accounts

											
										
										
											2 years ago
+								        user = ()
 								        try:
 								            if screen_name.startswith("id:"):
 								                user = self.user_by_rest_id(screen_name[3:])
 								            else:
-												[twitter] update 'user' and 'author' fields

- 'author' is always the user who authored a tweet
- 'user' is always the user specified in the input URL
  or equal to 'author' when the former is not given

											
										
										
											2 years ago
+								                user = self.user_by_screen_name(screen_name)
-												[twitter] fix using user IDs for suspended accounts

											
										
										
											2 years ago
+								            self.extractor._assign_user(user)
 								            return user["rest_id"]
 								        except KeyError:
 								            if "unavailable_message" in user:
 								                raise exception.NotFoundError("{} ({})".format(
 								                    user["unavailable_message"].get("text"),
 								                    user.get("reason")), False)
 								            else:
 								                raise exception.NotFoundError("user")
-												[twitter] support specifying users by ID (#980)

by using 'id:…' as their screen name, i.e.
https://www.twitter.com/id:2976459548/media
instead of
https://twitter.com/supernaturepics/media

The user ID can, for example, be obtained from the output of
$ gallery-dl -j --range 1 https://twitter.com/<screen-name>

											
										
										
											4 years ago
-												[twitter] move '_guest_token()' into TwitterAPI class

											
										
										
											4 years ago
+								    @cache(maxage=3600)
 								    def _guest_token(self):
-												[twitter] update API calls

- use 'https://twitter.com/i/api' for all requests
  except '/guest/activate.json'
- update (default) URL parameters
- update GraphQL endpoints

											
										
										
											4 years ago
+								        endpoint = "/1.1/guest/activate.json"
-												[twitter] refresh guest tokens (#3445, #3458)

											
										
										
											2 years ago
+								        self.extractor.log.info("Requesting guest token")
 								        return str(self._call(endpoint, None, "POST", False)["guest_token"])
-												[twitter] move '_guest_token()' into TwitterAPI class

											
										
										
											4 years ago
-												[twitter] fix login after 32b03433

											
										
										
											2 years ago
+								    def _authenticate_guest(self):
 								        guest_token = self._guest_token()
 								        if guest_token != self.headers["x-guest-token"]:
 								            self.headers["x-guest-token"] = guest_token
 								            self.extractor.session.cookies.set(
 								                "gt", guest_token, domain=self.extractor.cookiedomain)
-												[twitter] refresh guest tokens (#3445, #3458)

											
										
										
											2 years ago
+								    def _call(self, endpoint, params, method="GET", auth=True):
 								        url = self.root + endpoint
-												[twitter] improve error message formatting

											
										
										
											4 years ago
-												[twitter] improve and fix retry after hitting rate limit

- replace recursive call with infinite loop
- fix function arguments for recursive call

											
										
										
											4 years ago
+								        while True:
-												[twitter] refresh guest tokens (#3445, #3458)

											
										
										
											2 years ago
+								            if not self.headers["x-twitter-auth-type"] and auth:
-												[twitter] fix login after 32b03433

											
										
										
											2 years ago
+								                self._authenticate_guest()
-												[twitter] refresh guest tokens (#3445, #3458)

											
										
										
											2 years ago
-												[twitter] improve and fix retry after hitting rate limit

- replace recursive call with infinite loop
- fix function arguments for recursive call

											
										
										
											4 years ago
+								            response = self.extractor.request(
-												[twitter] refresh guest tokens (#3445, #3458)

											
										
										
											2 years ago
+								                url, method=method, params=params,
-												[twitter] improve and fix retry after hitting rate limit

- replace recursive call with infinite loop
- fix function arguments for recursive call

											
										
										
											4 years ago
+								                headers=self.headers, fatal=None)
 								            # update 'x-csrf-token' header (#1170)
 								            csrf_token = response.cookies.get("ct0")
 								            if csrf_token:
 								                self.headers["x-csrf-token"] = csrf_token
 								            if response.status_code < 400:
-												[twitter] add option to log out when blocked (#1719)

											
										
										
											3 years ago
+								                # success
-												[twitter] fix handling of 429 responses (fixes #2339)

Twitter doesn't return a valid JSON response for 429 errors anymore.

											
										
										
											3 years ago
+								                return response.json()
-												[twitter] add option to log out when blocked (#1719)

											
										
										
											3 years ago
-												[twitter] improve and fix retry after hitting rate limit

- replace recursive call with infinite loop
- fix function arguments for recursive call

											
										
										
											4 years ago
+								            if response.status_code == 429:
-												[twitter] add option to log out when blocked (#1719)

											
										
										
											3 years ago
+								                # rate limit exceeded
-												[twitter] improve and fix retry after hitting rate limit

- replace recursive call with infinite loop
- fix function arguments for recursive call

											
										
										
											4 years ago
+								                until = response.headers.get("x-rate-limit-reset")
 								                seconds = None if until else 60
 								                self.extractor.wait(until=until, seconds=seconds)
 								                continue
-												[twitter] add option to log out when blocked (#1719)

											
										
										
											3 years ago
-												[twitter] better error message for guest searches (#3942)

											
										
										
											1 year ago
+								            if response.status_code == 403 and \
 								                    not self.headers["x-twitter-auth-type"] and \
 								                    endpoint == "/2/search/adaptive.json":
 								                raise exception.AuthorizationError(
 								                    "Login required to access search results")
-												[twitter] add option to log out when blocked (#1719)

											
										
										
											3 years ago
+								            # error
-												[twitter] fix handling of 429 responses (fixes #2339)

Twitter doesn't return a valid JSON response for 429 errors anymore.

											
										
										
											3 years ago
+								            try:
 								                data = response.json()
 								                errors = ", ".join(e["message"] for e in data["errors"])
 								            except ValueError:
 								                errors = response.text
 								            except Exception:
 								                errors = data.get("errors", "")
-												[twitter] improve and fix retry after hitting rate limit

- replace recursive call with infinite loop
- fix function arguments for recursive call

											
										
										
											4 years ago
+								            raise exception.StopExtraction(
-												[twitter] distinguish between fatal & nonfatal errors (#2020)

only show a warning for nonfatal errors
and do not raise a StopExtraction exception

											
										
										
											3 years ago
+								                "%s %s (%s)", response.status_code, response.reason, errors)
-												[twitter] rewrite; use new interface (#740, #806)

Everything except logging in with username & password and TwitPic
embeds should be working again.

Metadata per Tweet is massively different than before (mostly raw API
responses - might need some cleaning up) and the default 'archive_fmt'
changed.

											
										
										
											4 years ago
-												[twitter] add 'event' extractor (closes #2109)

											
										
										
											3 years ago
+								    def _pagination_legacy(self, endpoint, params):
-												[twitter] extend 'retweets' option (closes #1026)

Setting 'retweets' to '"original"' will use metadata from the
original retweeted Tweets, and not from the Retweet entry.

											
										
										
											4 years ago
+								        original_retweets = (self.extractor.retweets == "original")
-												[twitter] fix search (#3536)

- partially revert 18fe4b334db0fbfa21fe932b73a2e64ebdd456a8
- properly search for cursor when processing 'replaceEntry'

											
										
										
											2 years ago
+								        bottom = ("cursor-bottom-", "sq-cursor-bottom")
-												[twitter] add 'bookmark' extractor (closes #625)

											
										
										
											5 years ago
 								        while True:
-												[twitter] rewrite; use new interface (#740, #806)

Everything except logging in with username & password and TwitPic
embeds should be working again.

Metadata per Tweet is massively different than before (mostly raw API
responses - might need some cleaning up) and the default 'archive_fmt'
changed.

											
										
										
											4 years ago
+								            data = self._call(endpoint, params)
-												[twitter] improve pagination

											
										
										
											4 years ago
-												[twitter] "fix" search pagination (#3536, #3534)

- properly process instructions
- do not expect a predetermined instruction order

											
										
										
											2 years ago
+								            instructions = data["timeline"]["instructions"]
 								            if not instructions:
-												[twitter] improve pagination

											
										
										
											4 years ago
+								                return
-												[twitter] update 'search' pagination (#544)

Only stop when list of all returned Tweets is empty
instead of when no valid Tweet was found.

											
										
										
											2 years ago
-												[twitter] add 'bookmark' extractor (closes #625)

											
										
										
											5 years ago
+								            tweets = data["globalObjects"]["tweets"]
-												[twitter] rewrite; use new interface (#740, #806)

Everything except logging in with username & password and TwitPic
embeds should be working again.

Metadata per Tweet is massively different than before (mostly raw API
responses - might need some cleaning up) and the default 'archive_fmt'
changed.

											
										
										
											4 years ago
+								            users = data["globalObjects"]["users"]
-												[twitter] update 'search' pagination (#544)

Only stop when list of all returned Tweets is empty
instead of when no valid Tweet was found.

											
										
										
											2 years ago
+								            tweet_id = cursor = None
 								            tweet_ids = []
-												[twitter] "fix" search pagination (#3536, #3534)

- properly process instructions
- do not expect a predetermined instruction order

											
										
										
											2 years ago
+								            entries = ()
 								            # process instructions
 								            for instr in instructions:
 								                if "addEntries" in instr:
 								                    entries = instr["addEntries"]["entries"]
 								                elif "replaceEntry" in instr:
 								                    entry = instr["replaceEntry"]["entry"]
-												[twitter] fix search (#3536)

- partially revert 18fe4b334db0fbfa21fe932b73a2e64ebdd456a8
- properly search for cursor when processing 'replaceEntry'

											
										
										
											2 years ago
+								                    if entry["entryId"].startswith(bottom):
-												[twitter] "fix" search pagination (#3536, #3534)

- properly process instructions
- do not expect a predetermined instruction order

											
										
										
											2 years ago
+								                        cursor = (entry["content"]["operation"]
 								                                  ["cursor"]["value"])
-												[twitter] rewrite; use new interface (#740, #806)

Everything except logging in with username & password and TwitPic
embeds should be working again.

Metadata per Tweet is massively different than before (mostly raw API
responses - might need some cleaning up) and the default 'archive_fmt'
changed.

											
										
										
											4 years ago
-												[twitter] fetch tweets from  'homeConversation' entries

When logged in, some entries returned by Twitter's API are so called
'homeConversation's (they would be regular tweet entries otherwise.)

Those weren't picked up before and resulted in missing files compared
to accessing a timeline as guest.

('/media' timelines and search results were not affected)

											
										
										
											4 years ago
+								            # collect tweet IDs and cursor value
-												[twitter] "fix" search pagination (#3536, #3534)

- properly process instructions
- do not expect a predetermined instruction order

											
										
										
											2 years ago
+								            for entry in entries:
-												[twitter] fetch tweets from  'homeConversation' entries

When logged in, some entries returned by Twitter's API are so called
'homeConversation's (they would be regular tweet entries otherwise.)

Those weren't picked up before and resulted in missing files compared
to accessing a timeline as guest.

('/media' timelines and search results were not affected)

											
										
										
											4 years ago
+								                entry_startswith = entry["entryId"].startswith
 								                if entry_startswith(("tweet-", "sq-I-t-")):
 								                    tweet_ids.append(
 								                        entry["content"]["item"]["content"]["tweet"]["id"])
-												[twitter] rewrite; use new interface (#740, #806)

Everything except logging in with username & password and TwitPic
embeds should be working again.

Metadata per Tweet is massively different than before (mostly raw API
responses - might need some cleaning up) and the default 'archive_fmt'
changed.

											
										
										
											4 years ago
-												[twitter] fetch tweets from  'homeConversation' entries

When logged in, some entries returned by Twitter's API are so called
'homeConversation's (they would be regular tweet entries otherwise.)

Those weren't picked up before and resulted in missing files compared
to accessing a timeline as guest.

('/media' timelines and search results were not affected)

											
										
										
											4 years ago
+								                elif entry_startswith("homeConversation-"):
 								                    tweet_ids.extend(
 								                        entry["content"]["timelineModule"]["metadata"]
 								                        ["conversationMetadata"]["allTweetIds"][::-1])
-												[twitter] fix search (#3536)

- partially revert 18fe4b334db0fbfa21fe932b73a2e64ebdd456a8
- properly search for cursor when processing 'replaceEntry'

											
										
										
											2 years ago
+								                elif entry_startswith(bottom):
-												[twitter] improve pagination

											
										
										
											4 years ago
+								                    cursor = entry["content"]["operation"]["cursor"]
-												[twitter] add 'event' extractor (closes #2109)

											
										
										
											3 years ago
+								                    if not cursor.get("stopOnEmptyResponse", True):
-												[twitter] improve pagination

											
										
										
											4 years ago
+								                        # keep going even if there are no tweets
-												[twitter] update 'search' pagination (#544)

Only stop when list of all returned Tweets is empty
instead of when no valid Tweet was found.

											
										
										
											2 years ago
+								                        tweet_id = True
-												[twitter] improve pagination

											
										
										
											4 years ago
+								                    cursor = cursor["value"]
-												[twitter] add option to download all media from a conversation

(fixes #1319)

											
										
										
											4 years ago
+								                elif entry_startswith("conversationThread-"):
 								                    tweet_ids.extend(
 								                        item["entryId"][6:]
 								                        for item in entry["content"]["timelineModule"]["items"]
 								                        if item["entryId"].startswith("tweet-")
 								                    )
-												[twitter] fetch tweets from  'homeConversation' entries

When logged in, some entries returned by Twitter's API are so called
'homeConversation's (they would be regular tweet entries otherwise.)

Those weren't picked up before and resulted in missing files compared
to accessing a timeline as guest.

('/media' timelines and search results were not affected)

											
										
										
											4 years ago
+								            # process tweets
 								            for tweet_id in tweet_ids:
 								                try:
 								                    tweet = tweets[tweet_id]
 								                except KeyError:
 								                    self.extractor.log.debug("Skipping %s (deleted)", tweet_id)
 								                    continue
 								                if "retweeted_status_id_str" in tweet:
 								                    retweet = tweets.get(tweet["retweeted_status_id_str"])
 								                    if original_retweets:
 								                        if not retweet:
 								                            continue
-												[twitter] set 'retweet_id' for original retweets (#1481)

											
										
										
											3 years ago
+								                        retweet["retweeted_status_id_str"] = retweet["id_str"]
-												[twitter] fetch tweets from  'homeConversation' entries

When logged in, some entries returned by Twitter's API are so called
'homeConversation's (they would be regular tweet entries otherwise.)

Those weren't picked up before and resulted in missing files compared
to accessing a timeline as guest.

('/media' timelines and search results were not affected)

											
										
										
											4 years ago
+								                        retweet["_retweet_id_str"] = tweet["id_str"]
 								                        tweet = retweet
 								                    elif retweet:
 								                        tweet["author"] = users[retweet["user_id_str"]]
-												[twitter] add missing retweet media entities (fixes #1555)

from the original tweets

											
										
										
											3 years ago
+								                        if "extended_entities" in retweet and \
 								                                "extended_entities" not in tweet:
 								                            tweet["extended_entities"] = \
 								                                retweet["extended_entities"]
-												[twitter] fetch tweets from  'homeConversation' entries

When logged in, some entries returned by Twitter's API are so called
'homeConversation's (they would be regular tweet entries otherwise.)

Those weren't picked up before and resulted in missing files compared
to accessing a timeline as guest.

('/media' timelines and search results were not affected)

											
										
										
											4 years ago
+								                tweet["user"] = users[tweet["user_id_str"]]
 								                yield tweet
 								                if "quoted_status_id_str" in tweet:
 								                    quoted = tweets.get(tweet["quoted_status_id_str"])
 								                    if quoted:
-												[twitter] fix issue when filtering quote tweets (#1792)

When a user quotes his own Tweet and that Tweet gets filtered by
'"quoted": false', it could also get filtered when it appeared later
as regular Tweet.

											
										
										
											3 years ago
+								                        quoted = quoted.copy()
-												[twitter] fetch tweets from  'homeConversation' entries

When logged in, some entries returned by Twitter's API are so called
'homeConversation's (they would be regular tweet entries otherwise.)

Those weren't picked up before and resulted in missing files compared
to accessing a timeline as guest.

('/media' timelines and search results were not affected)

											
										
										
											4 years ago
+								                        quoted["author"] = users[quoted["user_id_str"]]
-												[twitter] update 'quote_id' and 'quote_by'

- 'quote_id' is now non-null for quoted Tweets and has the ID of the
  quoting Tweet, instead the other way round like before
- 'quote_by' is now the 'screen_name' of the quoting user
  (was the same the new 'quote_id' is now)

											
										
										
											2 years ago
+								                        quoted["quoted_by"] = tweet["user"]["screen_name"]
-												[twitter] add 'quote_by' metadata field (#1481)

Only present for tweets quoted by another tweet.
Represents the tweet_id of said tweet quoting this one.

											
										
										
											3 years ago
+								                        quoted["quoted_by_id_str"] = tweet["id_str"]
-												[twitter] fetch tweets from  'homeConversation' entries

When logged in, some entries returned by Twitter's API are so called
'homeConversation's (they would be regular tweet entries otherwise.)

Those weren't picked up before and resulted in missing files compared
to accessing a timeline as guest.

('/media' timelines and search results were not affected)

											
										
										
											4 years ago
+								                        yield quoted
-												[twitter] "fix" search pagination (#3536, #3534)

- properly process instructions
- do not expect a predetermined instruction order

											
										
										
											2 years ago
+								            # stop on empty response
-												[twitter] update 'search' pagination (#544)

Only stop when list of all returned Tweets is empty
instead of when no valid Tweet was found.

											
										
										
											2 years ago
+								            if not cursor or (not tweets and not tweet_id):
-												[twitter] add 'bookmark' extractor (closes #625)

											
										
										
											5 years ago
+								                return
-												[twitter] rewrite; use new interface (#740, #806)

Everything except logging in with username & password and TwitPic
embeds should be working again.

Metadata per Tweet is massively different than before (mostly raw API
responses - might need some cleaning up) and the default 'archive_fmt'
changed.

											
										
										
											4 years ago
+								            params["cursor"] = cursor
-												[twitter] add 'list-members' extractor (closes #1096)

											
										
										
											4 years ago
-												[twitter] update bookmarks pagination (#3172)

Do not stop when there aren't any tweets in a batch,
but only when the same cursor value appears twice in a row.

											
										
										
											2 years ago
+								    def _pagination_tweets(self, endpoint, variables,
-												[twitter] allow setting custom features per API endpoint

											
										
										
											1 year ago
+								                           path=None, stop_tweets=True, features=None):
-												[twitter] restore 'logout' functionality (#1719)

											
										
										
											3 years ago
+								        extr = self.extractor
-												[twitter] simplify

- use dict with common GraphQL variables
- reduce 'variables' size with custom JSON encoder instance
- centralise TwitterAPI() creation

											
										
										
											3 years ago
+								        variables.update(self.variables)
-												[twitter] restore 'logout' functionality (#1719)

											
										
										
											3 years ago
+								        original_retweets = (extr.retweets == "original")
 								        pinned_tweet = extr.pinned
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
-												[twitter] fall back to legacy /media endpoint when not logged in

											
										
										
											2 years ago
+								        params = {"variables": None}
-												[twitter] allow setting custom features per API endpoint

											
										
										
											1 year ago
+								        if features is None:
 								            features = self.features_pagination
-												[twitter] fall back to legacy /media endpoint when not logged in

											
										
										
											2 years ago
+								        if features:
-												[twitter] allow setting custom features per API endpoint

											
										
										
											1 year ago
+								            params["features"] = self._json_dumps(features)
-												[twitter] fall back to legacy /media endpoint when not logged in

											
										
										
											2 years ago
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								        while True:
-												[twitter] fall back to legacy /media endpoint when not logged in

											
										
										
											2 years ago
+								            params["variables"] = self._json_dumps(variables)
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								            data = self._call(endpoint, params)["data"]
-												[twitter] improve error handling

											
										
										
											3 years ago
+								            try:
 								                if path is None:
-												[twitter] update API endpoints and parameters

											
										
										
											2 years ago
+								                    instructions = (data["user"]["result"]["timeline_v2"]
-												[twitter] improve error handling

											
										
										
											3 years ago
+								                                    ["timeline"]["instructions"])
 								                else:
-												[twitter] restore errors for protected timelines etc (fixes #2237)

											
										
										
											3 years ago
+								                    instructions = data
-												[twitter] improve error handling

											
										
										
											3 years ago
+								                    for key in path:
-												[twitter] restore errors for protected timelines etc (fixes #2237)

											
										
										
											3 years ago
+								                        instructions = instructions[key]
 								                    instructions = instructions["instructions"]
-												[twitter] improve error handling

- handle accounts without 'rest_id'
- handle timelines with empty 'instructions'

											
										
										
											3 years ago
-												[twitter] fix extraction (#2275)

											
										
										
											3 years ago
+								                for instr in instructions:
 								                    if instr.get("type") == "TimelineAddEntries":
 								                        entries = instr["entries"]
 								                        break
 								                else:
 								                    raise KeyError()
-												combine KeyError & IndexError to common base class LookupError

											
										
										
											3 years ago
+								            except LookupError:
-												[twitter] restore 'logout' functionality (#1719)

											
										
										
											3 years ago
+								                extr.log.debug(data)
-												[twitter] update 'user' and 'author' fields

- 'author' is always the user who authored a tweet
- 'user' is always the user specified in the input URL
  or equal to 'author' when the former is not given

											
										
										
											2 years ago
+								                user = extr._user_obj
 								                if user:
 								                    user = user["legacy"]
 								                    if user.get("blocked_by"):
-												[twitter] restore 'logout' functionality (#1719)

											
										
										
											3 years ago
+								                        if self.headers["x-twitter-auth-type"] and \
 								                                extr.config("logout"):
 								                            extr._cookiefile = None
 								                            del extr.session.cookies["auth_token"]
 								                            self.headers["x-twitter-auth-type"] = None
 								                            extr.log.info("Retrying API request as guest")
 								                            continue
 								                        raise exception.AuthorizationError(
 								                            "{} blocked your account".format(
 								                                user["screen_name"]))
 								                    elif user.get("protected"):
 								                        raise exception.AuthorizationError(
 								                            "{}'s Tweets are protected".format(
 								                                user["screen_name"]))
 								                raise exception.StopExtraction(
 								                    "Unable to retrieve Tweets from this timeline")
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
-												[twitter] improve error handling

- handle accounts without 'rest_id'
- handle timelines with empty 'instructions'

											
										
										
											3 years ago
+								            tweets = []
 								            tweet = cursor = None
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								            if pinned_tweet:
 								                pinned_tweet = False
 								                if instructions[-1]["type"] == "TimelinePinEntry":
-												[twitter] fix pinned tweets (#2216)

caused by the changes in dffa440edef9be1e169ef1e2d6bc0a492493ffce

											
										
										
											3 years ago
+								                    tweets.append(instructions[-1]["entry"])
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
-												[twitter] improve error handling

- handle accounts without 'rest_id'
- handle timelines with empty 'instructions'

											
										
										
											3 years ago
+								            for entry in entries:
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								                esw = entry["entryId"].startswith
 								                if esw("tweet-"):
-												[twitter] improve handling of deleted tweets (#2212)

											
										
										
											3 years ago
+								                    tweets.append(entry)
-												[twitter] support 'profile-conversation' entries (#3938)

											
										
										
											1 year ago
+								                elif esw(("homeConversation-",
 								                          "profile-conversation-",
 								                          "conversationthread-")):
-												[twitter] improve handling of deleted tweets (#2212)

											
										
										
											3 years ago
+								                    tweets.extend(entry["content"]["items"])
-												[twitter] warn about age-restricted Tweets (#2354)

											
										
										
											3 years ago
+								                elif esw("tombstone-"):
-												[twitter] add 'syndication' option (#2354)

to fetch age-restricted content using Twitter's  syndication API

											
										
										
											3 years ago
+								                    item = entry["content"]["itemContent"]
 								                    item["tweet_results"] = \
 								                        {"result": {"tombstone": item["tombstoneInfo"]}}
 								                    tweets.append(entry)
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								                elif esw("cursor-bottom-"):
 								                    cursor = entry["content"]
-												[twitter] fix pagination for conversion tweets

a relic from the switch to GraphQL API

											
										
										
											2 years ago
+								                    if "itemContent" in cursor:
 								                        cursor = cursor["itemContent"]
-												[twitter] add 'event' extractor (closes #2109)

											
										
										
											3 years ago
+								                    if not cursor.get("stopOnEmptyResponse", True):
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								                        # keep going even if there are no tweets
 								                        tweet = True
-												[twitter] improve handling of deleted tweets (#2212)

											
										
										
											3 years ago
+								                    cursor = cursor.get("value")
-												[twitter] fix several errors (#2212, #2216, #2225)

- fix Tweets with deleted quotes
- fix suspended Tweets without 'legacy' entry
- fix unified_cards without 'type'

											
										
										
											3 years ago
+								            for entry in tweets:
-												[twitter] improve handling of deleted tweets (#2212)

											
										
										
											3 years ago
+								                try:
-												[twitter] fix several errors (#2212, #2216, #2225)

- fix Tweets with deleted quotes
- fix suspended Tweets without 'legacy' entry
- fix unified_cards without 'type'

											
										
										
											3 years ago
+								                    tweet = ((entry.get("content") or entry["item"])
-												[twitter] improve handling of deleted tweets (#2212)

											
										
										
											3 years ago
+								                             ["itemContent"]["tweet_results"]["result"])
-												[twitter] warn about age-restricted Tweets (#2354)

											
										
										
											3 years ago
+								                    if "tombstone" in tweet:
-												[twitter] add 'syndication' option (#2354)

to fetch age-restricted content using Twitter's  syndication API

											
										
										
											3 years ago
+								                        tweet = self._process_tombstone(
 								                            entry, tweet["tombstone"])
 								                        if not tweet:
 								                            continue
-												[twitter] handle Tweets with "softIntervention" entries

or other such things where the actual Tweet data is one level deeper
than usual

											
										
										
											3 years ago
+								                    if "tweet" in tweet:
 								                        tweet = tweet["tweet"]
-												[twitter] fix several errors (#2212, #2216, #2225)

- fix Tweets with deleted quotes
- fix suspended Tweets without 'legacy' entry
- fix unified_cards without 'type'

											
										
										
											3 years ago
+								                    legacy = tweet["legacy"]
-												[twitter] fix regression from 160335ad

Tweets from 'homeConversation' or 'conversationthread' entries do not
contain a 'sortIndex' field. Accessing it raises a KeyError and would
erroneously get them labeled as 'deleted'.

											
										
										
											1 year ago
+								                    tweet["sortIndex"] = entry.get("sortIndex")
-												[twitter] improve handling of deleted tweets (#2212)

											
										
										
											3 years ago
+								                except KeyError:
-												[twitter] restore 'logout' functionality (#1719)

											
										
										
											3 years ago
+								                    extr.log.debug(
-												[twitter] improve handling of deleted tweets (#2212)

											
										
										
											3 years ago
+								                        "Skipping %s (deleted)",
-												[twitter] fix several errors (#2212, #2216, #2225)

- fix Tweets with deleted quotes
- fix suspended Tweets without 'legacy' entry
- fix unified_cards without 'type'

											
										
										
											3 years ago
+								                        (entry.get("entryId") or "").rpartition("-")[2])
-												[twitter] improve handling of deleted tweets (#2212)

											
										
										
											3 years ago
+								                    continue
 								                if "retweeted_status_result" in legacy:
 								                    retweet = legacy["retweeted_status_result"]["result"]
-												[twitter] fix some 'original' retweets not downloading (#3744)

											
										
										
											2 years ago
+								                    if "tweet" in retweet:
 								                        retweet = retweet["tweet"]
-												[twitter] improve handling of deleted tweets (#2212)

											
										
										
											3 years ago
+								                    if original_retweets:
-												[twitter] fix deleted/invalid retweets (#2225)

											
										
										
											3 years ago
+								                        try:
 								                            retweet["legacy"]["retweeted_status_id_str"] = \
 								                                retweet["rest_id"]
 								                            retweet["_retweet_id_str"] = tweet["rest_id"]
 								                            tweet = retweet
 								                        except KeyError:
-												[twitter] improve handling of deleted tweets (#2212)

											
										
										
											3 years ago
+								                            continue
-												[twitter] fix deleted/invalid retweets (#2225)

											
										
										
											3 years ago
+								                    else:
 								                        try:
 								                            legacy["retweeted_status_id_str"] = \
 								                                retweet["rest_id"]
-												[twitter] update 'user' and 'author' fields

- 'author' is always the user who authored a tweet
- 'user' is always the user specified in the input URL
  or equal to 'author' when the former is not given

											
										
										
											2 years ago
+								                            tweet["author"] = \
-												[twitter] fix deleted/invalid retweets (#2225)

											
										
										
											3 years ago
+								                                retweet["core"]["user_results"]["result"]
-												[twitter] warn about 'withheld' Tweets and users (#3864)

											
										
										
											1 year ago
 								                            rtlegacy = retweet["legacy"]
 								                            if "extended_entities" in rtlegacy and \
-												[twitter] fix deleted/invalid retweets (#2225)

											
										
										
											3 years ago
+								                                    "extended_entities" not in legacy:
 								                                legacy["extended_entities"] = \
-												[twitter] warn about 'withheld' Tweets and users (#3864)

											
										
										
											1 year ago
+								                                    rtlegacy["extended_entities"]
 								                            if "withheld_scope" in rtlegacy and \
 								                                    "withheld_scope" not in legacy:
 								                                legacy["withheld_scope"] = \
 								                                    rtlegacy["withheld_scope"]
 								                                legacy["full_text"] = rtlegacy["full_text"]
-												[twitter] fix deleted/invalid retweets (#2225)

											
										
										
											3 years ago
+								                        except KeyError:
 								                            pass
-												[twitter] improve handling of deleted tweets (#2212)

											
										
										
											3 years ago
+								                yield tweet
 								                if "quoted_status_result" in tweet:
-												[twitter] fix several errors (#2212, #2216, #2225)

- fix Tweets with deleted quotes
- fix suspended Tweets without 'legacy' entry
- fix unified_cards without 'type'

											
										
										
											3 years ago
+								                    try:
 								                        quoted = tweet["quoted_status_result"]["result"]
-												[twitter] update 'quote_id' and 'quote_by'

- 'quote_id' is now non-null for quoted Tweets and has the ID of the
  quoting Tweet, instead the other way round like before
- 'quote_by' is now the 'screen_name' of the quoting user
  (was the same the new 'quote_id' is now)

											
										
										
											2 years ago
+								                        quoted["legacy"]["quoted_by"] = (
 								                            tweet["core"]["user_results"]["result"]
 								                            ["legacy"]["screen_name"])
-												[twitter] fix several errors (#2212, #2216, #2225)

- fix Tweets with deleted quotes
- fix suspended Tweets without 'legacy' entry
- fix unified_cards without 'type'

											
										
										
											3 years ago
+								                        quoted["legacy"]["quoted_by_id_str"] = tweet["rest_id"]
-												[twitter] fix regression from 160335ad

Tweets from 'homeConversation' or 'conversationthread' entries do not
contain a 'sortIndex' field. Accessing it raises a KeyError and would
erroneously get them labeled as 'deleted'.

											
										
										
											1 year ago
+								                        quoted["sortIndex"] = entry.get("sortIndex")
-												[twitter] fix several errors (#2212, #2216, #2225)

- fix Tweets with deleted quotes
- fix suspended Tweets without 'legacy' entry
- fix unified_cards without 'type'

											
										
										
											3 years ago
+								                        yield quoted
 								                    except KeyError:
-												[twitter] restore 'logout' functionality (#1719)

											
										
										
											3 years ago
+								                        extr.log.debug(
-												[twitter] fix several errors (#2212, #2216, #2225)

- fix Tweets with deleted quotes
- fix suspended Tweets without 'legacy' entry
- fix unified_cards without 'type'

											
										
										
											3 years ago
+								                            "Skipping quote of %s (deleted)",
 								                            tweet.get("rest_id"))
 								                        continue
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
-												[twitter] update bookmarks pagination (#3172)

Do not stop when there aren't any tweets in a batch,
but only when the same cursor value appears twice in a row.

											
										
										
											2 years ago
+								            if stop_tweets and not tweet:
 								                return
 								            if not cursor or cursor == variables.get("cursor"):
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								                return
 								            variables["cursor"] = cursor
 								    def _pagination_users(self, endpoint, variables, path=None):
-												[twitter] simplify

- use dict with common GraphQL variables
- reduce 'variables' size with custom JSON encoder instance
- centralise TwitterAPI() creation

											
										
										
											3 years ago
+								        variables.update(self.variables)
-												[twitter] fall back to legacy /media endpoint when not logged in

											
										
										
											2 years ago
+								        params = {"variables": None,
 								                  "features" : self._json_dumps(self.features_pagination)}
-												[twitter] simplify

- use dict with common GraphQL variables
- reduce 'variables' size with custom JSON encoder instance
- centralise TwitterAPI() creation

											
										
										
											3 years ago
-												[twitter] add 'list-members' extractor (closes #1096)

											
										
										
											4 years ago
+								        while True:
 								            cursor = entry = stop = None
-												[twitter] fall back to legacy /media endpoint when not logged in

											
										
										
											2 years ago
+								            params["variables"] = self._json_dumps(variables)
-												[twitter] update to GraphQL API (#2212)

The old REST API endpoints, which were not used by Twitter since
summer 2021, are going to finally be phased out it seems, with
'/2/timeline/profile/USERID.json' being the first one.

Only Twitter's search doesn't have a GraphQL interface yet.

											
										
										
											3 years ago
+								            data = self._call(endpoint, params)["data"]
-												[twitter] add 'list-members' extractor (closes #1096)

											
										
										
											4 years ago
-												[twitter] improve error handling

											
										
										
											3 years ago
+								            try:
 								                if path is None:
 								                    instructions = (data["user"]["result"]["timeline"]
 								                                    ["timeline"]["instructions"])
 								                else:
 								                    for key in path:
 								                        data = data[key]
 								                    instructions = data["instructions"]
 								            except KeyError:
 								                return
-												[twitter] add 'list-members' extractor (closes #1096)

											
										
										
											4 years ago
 								            for instr in instructions:
 								                if instr["type"] == "TimelineAddEntries":
 								                    for entry in instr["entries"]:
 								                        if entry["entryId"].startswith("user-"):
-												[twitter] ignore invalid user entries (#2850)

											
										
										
											2 years ago
+								                            try:
 								                                user = (entry["content"]["itemContent"]
 								                                        ["user_results"]["result"])
 								                            except KeyError:
 								                                pass
 								                            else:
 								                                if "rest_id" in user:
 								                                    yield user
-												[twitter] add 'list-members' extractor (closes #1096)

											
										
										
											4 years ago
+								                        elif entry["entryId"].startswith("cursor-bottom-"):
 								                            cursor = entry["content"]["value"]
 								                elif instr["type"] == "TimelineTerminateTimeline":
 								                    if instr["direction"] == "Bottom":
 								                        stop = True
 								            if stop or not cursor or not entry:
 								                return
 								            variables["cursor"] = cursor
-												[twitter] warn about age-restricted Tweets (#2354)

											
										
										
											3 years ago
-												[twitter] add 'syndication' option (#2354)

to fetch age-restricted content using Twitter's  syndication API

											
										
										
											3 years ago
+								    def _process_tombstone(self, entry, tombstone):
-												[twitter] warn about age-restricted Tweets (#2354)

											
										
										
											3 years ago
+								        text = (tombstone.get("richText") or tombstone["text"])["text"]
-												[twitter] add 'syndication' option (#2354)

to fetch age-restricted content using Twitter's  syndication API

											
										
										
											3 years ago
+								        tweet_id = entry["entryId"].rpartition("-")[2]
 								        if text.startswith("Age-restricted"):
 								            if self._syndication:
 								                return self._syndication_tweet(tweet_id)
 								            elif self._nsfw_warning:
 								                self._nsfw_warning = False
 								                self.extractor.log.warning('"%s"', text)
 								        self.extractor.log.debug("Skipping %s (\"%s\")", tweet_id, text)
 								    def _syndication_tweet(self, tweet_id):
-												[twitter] update URL for syndication API (#3160)

Twitter changed the URL format to access tweet data through their syndication API.
											
										
										
											2 years ago
+								        base_url = "https://cdn.syndication.twimg.com/tweet-result?id="
 								        tweet = self.extractor.request(base_url + tweet_id).json()
-												[twitter] add 'syndication' option (#2354)

to fetch age-restricted content using Twitter's  syndication API

											
										
										
											3 years ago
 								        tweet["user"]["description"] = ""
 								        tweet["user"]["entities"] = {"description": {}}
-												[twitter] fix various syndication issues

- handle retweets
- fix videos without dimensions in URL (3e942a58)
- fix '"retweets": "self"' filter (#2499)

											
										
										
											2 years ago
+								        tweet["user_id_str"] = tweet["user"]["id_str"]
 								        if tweet["id_str"] != tweet_id:
 								            tweet["retweeted_status_id_str"] = tweet["id_str"]
 								            tweet["id_str"] = retweet_id = tweet_id
 								        else:
 								            retweet_id = None
-												[twitter] add 'syndication' option (#2354)

to fetch age-restricted content using Twitter's  syndication API

											
										
										
											3 years ago
-												[twitter] assume 'conversation_id' when using syndication

not possible to expand replies at the momemt

											
										
										
											2 years ago
+								        # assume 'conversation_id' is the same as 'id' when the tweet
 								        # is not a reply
 								        if "conversation_id_str" not in tweet and \
 								                "in_reply_to_status_id_str" not in tweet:
 								            tweet["conversation_id_str"] = tweet["id_str"]
-												[twitter] calculate 'date' from Tweet IDs

20 times faster than parsing 'created_at'

											
										
										
											1 year ago
+								        if int(tweet_id) < 300000000000000:
 								            tweet["created_at"] = text.parse_datetime(
 								                tweet["created_at"], "%Y-%m-%dT%H:%M:%S.%fZ").strftime(
 								                "%a %b %d %H:%M:%S +0000 %Y")
-												[twitter] provide proper 'date' for syndication results (#2920)

											
										
										
											2 years ago
-												[twitter] add 'syndication' option (#2354)

to fetch age-restricted content using Twitter's  syndication API

											
										
										
											3 years ago
+								        if "video" in tweet:
 								            video = tweet["video"]
-												[twitter] improve syndication video selection (#2354)

- ignore .m3u8 manifests
- always select largest format

											
										
										
											2 years ago
+								            video["variants"] = (max(
 								                (v for v in video["variants"] if v["type"] == "video/mp4"),
-												[twitter] fix various syndication issues

- handle retweets
- fix videos without dimensions in URL (3e942a58)
- fix '"retweets": "self"' filter (#2499)

											
										
										
											2 years ago
+								                key=lambda v: text.parse_int(
 								                    v["src"].split("/")[-2].partition("x")[0])
-												[twitter] improve syndication video selection (#2354)

- ignore .m3u8 manifests
- always select largest format

											
										
										
											2 years ago
+								            ),)
-												[twitter] add 'syndication' option (#2354)

to fetch age-restricted content using Twitter's  syndication API

											
										
										
											3 years ago
+								            video["variants"][0]["url"] = video["variants"][0]["src"]
 								            tweet["extended_entities"] = {"media": [{
 								                "video_info"   : video,
 								                "original_info": {"width" : 0, "height": 0},
 								            }]}
 								        elif "photos" in tweet:
 								            for p in tweet["photos"]:
 								                p["media_url_https"] = p["url"]
 								                p["original_info"] = {
 								                    "width" : p["width"],
 								                    "height": p["height"],
 								                }
 								            tweet["extended_entities"] = {"media": tweet["photos"]}
 								        return {
 								            "rest_id": tweet["id_str"],
 								            "legacy" : tweet,
-												[twitter] implement constant 'user' for tweet URLs

											
										
										
											2 years ago
+								            "core"   : {"user_results": {"result": tweet["user"]}},
-												[twitter] fix various syndication issues

- handle retweets
- fix videos without dimensions in URL (3e942a58)
- fix '"retweets": "self"' filter (#2499)

											
										
										
											2 years ago
+								            "_retweet_id_str": retweet_id,
-												[twitter] add 'syndication' option (#2354)

to fetch age-restricted content using Twitter's  syndication API

											
										
										
											3 years ago
+								        }
-												[twitter] fix login (#3220)

Using an email as 'username' seems to no longer be possible,
as Twitter will always additionally ask for username or phone number
when providing an email address as 'username'.

											
										
										
											2 years ago
 								@cache(maxage=360*86400, keyarg=1)
 								def _login_impl(extr, username, password):
 								    import re
 								    import random
 								    if re.fullmatch(r"[\w.%+-]+@[\w.-]+\.\w{2,}", username):
 								        extr.log.warning(
 								            "Login with email is no longer possible. "
 								            "You need to provide your username or phone number instead.")
 								    def process(response):
 								        try:
 								            data = response.json()
 								        except ValueError:
 								            data = {"errors": ({"message": "Invalid response"},)}
 								        else:
 								            if response.status_code < 400:
 								                return data["flow_token"]
 								        errors = []
 								        for error in data.get("errors") or ():
 								            msg = error.get("message")
 								            errors.append('"{}"'.format(msg) if msg else "Unknown error")
 								        extr.log.debug(response.text)
 								        raise exception.AuthenticationError(", ".join(errors))
 								    extr.session.cookies.clear()
 								    api = TwitterAPI(extr)
-												[twitter] fix login after 32b03433

											
										
										
											2 years ago
+								    api._authenticate_guest()
-												[twitter] fix login (#3220)

Using an email as 'username' seems to no longer be possible,
as Twitter will always additionally ask for username or phone number
when providing an email address as 'username'.

											
										
										
											2 years ago
+								    headers = api.headers
-												[twitter] fix login after 32b03433

											
										
										
											2 years ago
 								    extr.log.info("Logging in as %s", username)
-												[twitter] fix login (#3220)

Using an email as 'username' seems to no longer be possible,
as Twitter will always additionally ask for username or phone number
when providing an email address as 'username'.

											
										
										
											2 years ago
 								    # init
 								    data = {
 								        "input_flow_data": {
 								            "flow_context": {
 								                "debug_overrides": {},
 								                "start_location": {"location": "unknown"},
 								            },
 								        },
 								        "subtask_versions": {
 								            "action_list": 2,
 								            "alert_dialog": 1,
 								            "app_download_cta": 1,
 								            "check_logged_in_account": 1,
 								            "choice_selection": 3,
 								            "contacts_live_sync_permission_prompt": 0,
 								            "cta": 7,
 								            "email_verification": 2,
 								            "end_flow": 1,
 								            "enter_date": 1,
 								            "enter_email": 2,
 								            "enter_password": 5,
 								            "enter_phone": 2,
 								            "enter_recaptcha": 1,
 								            "enter_text": 5,
 								            "enter_username": 2,
 								            "generic_urt": 3,
 								            "in_app_notification": 1,
 								            "interest_picker": 3,
 								            "js_instrumentation": 1,
 								            "menu_dialog": 1,
 								            "notifications_permission_prompt": 2,
 								            "open_account": 2,
 								            "open_home_timeline": 1,
 								            "open_link": 1,
 								            "phone_verification": 4,
 								            "privacy_options": 1,
 								            "security_key": 3,
 								            "select_avatar": 4,
 								            "select_banner": 2,
 								            "settings_list": 7,
 								            "show_code": 1,
 								            "sign_up": 2,
 								            "sign_up_review": 4,
 								            "tweet_selection_urt": 1,
 								            "update_users": 1,
 								            "upload_media": 1,
 								            "user_recommendations_list": 4,
 								            "user_recommendations_urt": 1,
 								            "wait_spinner": 3,
 								            "web_modal": 1,
 								        },
 								    }
-												[twitter] fix login after 32b03433

											
										
										
											2 years ago
+								    url = "https://api.twitter.com/1.1/onboarding/task.json?flow_name=login"
-												[twitter] fix login (#3220)

Using an email as 'username' seems to no longer be possible,
as Twitter will always additionally ask for username or phone number
when providing an email address as 'username'.

											
										
										
											2 years ago
+								    response = extr.request(url, method="POST", headers=headers, json=data)
 								    data = {
 								        "flow_token": process(response),
 								        "subtask_inputs": [
 								            {
 								                "subtask_id": "LoginJsInstrumentationSubtask",
 								                "js_instrumentation": {
 								                    "response": "{}",
 								                    "link": "next_link",
 								                },
 								            },
 								        ],
 								    }
-												[twitter] fix login after 32b03433

											
										
										
											2 years ago
+								    url = "https://api.twitter.com/1.1/onboarding/task.json"
-												[twitter] fix login (#3220)

Using an email as 'username' seems to no longer be possible,
as Twitter will always additionally ask for username or phone number
when providing an email address as 'username'.

											
										
										
											2 years ago
+								    response = extr.request(
 								        url, method="POST", headers=headers, json=data, fatal=None)
 								    # username
 								    data = {
 								        "flow_token": process(response),
 								        "subtask_inputs": [
 								            {
 								                "subtask_id": "LoginEnterUserIdentifierSSO",
 								                "settings_list": {
 								                    "setting_responses": [
 								                        {
 								                            "key": "user_identifier",
 								                            "response_data": {
 								                                "text_data": {"result": username},
 								                            },
 								                        },
 								                    ],
 								                    "link": "next_link",
 								                },
 								            },
 								        ],
 								    }
-												[twitter] fix login after 32b03433

											
										
										
											2 years ago
+								    #  url = "https://api.twitter.com/1.1/onboarding/task.json"
-												[twitter] fix login (#3220)

Using an email as 'username' seems to no longer be possible,
as Twitter will always additionally ask for username or phone number
when providing an email address as 'username'.

											
										
										
											2 years ago
+								    extr.sleep(random.uniform(2.0, 4.0), "login (username)")
 								    response = extr.request(
 								        url, method="POST", headers=headers, json=data, fatal=None)
 								    # password
 								    data = {
 								        "flow_token": process(response),
 								        "subtask_inputs": [
 								            {
 								                "subtask_id": "LoginEnterPassword",
 								                "enter_password": {
 								                    "password": password,
 								                    "link": "next_link",
 								                },
 								            },
 								        ],
 								    }
-												[twitter] fix login after 32b03433

											
										
										
											2 years ago
+								    #  url = "https://api.twitter.com/1.1/onboarding/task.json"
-												[twitter] fix login (#3220)

Using an email as 'username' seems to no longer be possible,
as Twitter will always additionally ask for username or phone number
when providing an email address as 'username'.

											
										
										
											2 years ago
+								    extr.sleep(random.uniform(2.0, 4.0), "login (password)")
 								    response = extr.request(
 								        url, method="POST", headers=headers, json=data, fatal=None)
 								    # account duplication check ?
 								    data = {
 								        "flow_token": process(response),
 								        "subtask_inputs": [
 								            {
 								                "subtask_id": "AccountDuplicationCheck",
 								                "check_logged_in_account": {
 								                    "link": "AccountDuplicationCheck_false",
 								                },
 								            },
 								        ],
 								    }
-												[twitter] fix login after 32b03433

											
										
										
											2 years ago
+								    #  url = "https://api.twitter.com/1.1/onboarding/task.json"
-												[twitter] fix login (#3220)

Using an email as 'username' seems to no longer be possible,
as Twitter will always additionally ask for username or phone number
when providing an email address as 'username'.

											
										
										
											2 years ago
+								    response = extr.request(
 								        url, method="POST", headers=headers, json=data, fatal=None)
 								    process(response)
 								    return {
 								        cookie.name: cookie.value
 								        for cookie in extr.session.cookies
 								    }