gallery-dl/gallery_dl/extractor/twitter.py

# -*- coding: utf-8 -*-

# Copyright 2016-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extract images from https://twitter.com/"""

from .common import Extractor, Message
from .. import text, exception
from ..cache import cache


class TwitterExtractor(Extractor):
    """Base class for twitter extractors"""
    category = "twitter"
    directory_fmt = ("{category}", "{user}")
    filename_fmt = "{tweet_id}_{num}.{extension}"
    archive_fmt = "{tweet_id}_{retweet_id}_{num}"
    root = "https://twitter.com"
    sizes = (":orig", ":large", ":medium", ":small")

    def __init__(self, match):
        Extractor.__init__(self, match)
        self.user = match.group(1)
        self.retweets = self.config("retweets", True)
        self.videos = self.config("videos", False)

    def items(self):
        self.login()
        yield Message.Version, 1
        yield Message.Directory, self.metadata()

        for tweet in self.tweets():
            data = self._data_from_tweet(tweet)

            if not self.retweets and data["retweet_id"]:
                continue

            images = text.extract_iter(
                tweet, 'data-image-url="', '"')
            for data["num"], url in enumerate(images, 1):
                text.nameext_from_url(url, data)
                urls = [url + size for size in self.sizes]
                yield Message.Urllist, urls, data

            if self.videos and "-videoContainer" in tweet:
                data["num"] = 1
                url = "ytdl:{}/{}/status/{}".format(
                    self.root, data["user"], data["tweet_id"])
                yield Message.Url, url, data

    def metadata(self):
        """Return general metadata"""
        return {"user": self.user}

    def tweets(self):
        """Yield HTML content of all relevant tweets"""

    def login(self):
        username, password = self._get_auth_info()
        if username:
            self._update_cookies(self._login_impl(username, password))

    @cache(maxage=360*24*3600, keyarg=1)
    def _login_impl(self, username, password):
        self.log.info("Logging in as %s", username)

        page = self.request(self.root + "/login").text
        pos = page.index('name="authenticity_token"')
        token = text.extract(page, 'value="', '"', pos-80)[0]

        url = self.root + "/sessions"
        data = {
            "session[username_or_email]": username,
            "session[password]"         : password,
            "authenticity_token"        : token,
            "ui_metrics"                : '{"rf":{},"s":""}',
            "scribe_log"                : "",
            "redirect_after_login"      : "",
            "remember_me"               : "1",
        }
        response = self.request(url, method="POST", data=data)

        if "/error" in response.url:
            raise exception.AuthenticationError()
        return self.session.cookies

    @staticmethod
    def _data_from_tweet(tweet):
        extr = text.extract_from(tweet)
        return {
            "tweet_id"  : text.parse_int(extr('data-tweet-id="'  , '"')),
            "retweet_id": text.parse_int(extr('data-retweet-id="', '"')),
            "retweeter" : extr('data-retweeter="'  , '"'),
            "user"      : extr('data-screen-name="', '"'),
            "username"  : extr('data-name="'       , '"'),
            "user_id"   : text.parse_int(extr('data-user-id="'   , '"')),
            "date"      : text.parse_timestamp(extr('data-time="', '"')),
            "content"   : text.unescape(text.remove_html(extr(
                '<div class="js-tweet-text-container">', '\n</div>'
            ))).replace(" @ ", " @").replace(" # ", " #"),
        }

    def _tweets_from_api(self, url):
        params = {
            "include_available_features": "1",
            "include_entities": "1",
            "reset_error_state": "false",
            "lang": "en",
        }
        headers = {
            "X-Requested-With": "XMLHttpRequest",
            "X-Twitter-Active-User": "yes",
            "Referer": "{}/{}".format(self.root, self.user)
        }

        while True:
            data = self.request(url, params=params, headers=headers).json()
            if "inner" in data:
                data = data["inner"]

            for tweet in text.extract_iter(
                    data["items_html"], '<div class="tweet ', '\n</li>'):
                yield tweet

            if not data["has_more_items"]:
                return

            position = text.parse_int(text.extract(
                tweet, 'data-tweet-id="', '"')[0])
            if "max_position" in params and position >= params["max_position"]:
                return
            params["max_position"] = position


class TwitterTimelineExtractor(TwitterExtractor):
    """Extractor for all images from a user's timeline"""
    subcategory = "timeline"
    pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
               r"/([^/?&#]+)/?$")
    test = ("https://twitter.com/supernaturepics", {
        "range": "1-40",
        "url": "0106229d408f4111d9a52c8fd2ad687f64842aa4",
        "keyword": "d07e8d2dd4ece0dc93e068579f8fb75d83d16767",
    })

    def tweets(self):
        url = "{}/i/profiles/show/{}/timeline/tweets".format(
            self.root, self.user)
        return self._tweets_from_api(url)


class TwitterMediaExtractor(TwitterExtractor):
    """Extractor for all images from a user's Media Tweets"""
    subcategory = "media"
    pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
               r"/([^/?&#]+)/media(?!\w)")
    test = ("https://twitter.com/supernaturepics/media", {
        "range": "1-40",
        "url": "0106229d408f4111d9a52c8fd2ad687f64842aa4",
    })

    def tweets(self):
        url = "{}/i/profiles/show/{}/media_timeline".format(
            self.root, self.user)
        return self._tweets_from_api(url)


class TwitterTweetExtractor(TwitterExtractor):
    """Extractor for images from individual tweets"""
    subcategory = "tweet"
    pattern = (r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com"
               r"/([^/?&#]+)/status/(\d+)")
    test = (
        ("https://twitter.com/supernaturepics/status/604341487988576256", {
            "url": "0e801d2f98142dd87c3630ded9e4be4a4d63b580",
            "keyword": "d6149c5734f2e91d29a99600592e04b349daaedb",
            "content": "ab05e1d8d21f8d43496df284d31e8b362cd3bcab",
        }),
        # 4 images
        ("https://twitter.com/perrypumas/status/894001459754180609", {
            "url": "c8a262a9698cb733fb27870f5a8f75faf77d79f6",
            "keyword": "cc9860f46ec0d0f19da2232281544b85d573eb13",
        }),
        # video
        ("https://twitter.com/perrypumas/status/1065692031626829824", {
            "options": (("videos", True),),
            "pattern": r"ytdl:https://twitter.com/perrypumas/status/\d+",
        }),
    )

    def __init__(self, match):
        TwitterExtractor.__init__(self, match)
        self.tweet_id = match.group(2)

    def metadata(self):
        return {"user": self.user, "tweet_id": self.tweet_id}

    def tweets(self):
        url = "{}/{}/status/{}".format(self.root, self.user, self.tweet_id)
        page = self.request(url).text
        return (text.extract(
            page, '<div class="tweet ', 'class="js-tweet-stats-container')[0],)
[twitter] add extractor 8 years ago			`# -- coding: utf-8 --`

simplify extractor constants - single strings for URL patterns - tuples instead of lists for 'directory_fmt' and 'test' - single-tuple tests where applicable 6 years ago			`# Copyright 2016-2019 Mike Fährmann`
[twitter] add extractor 8 years ago			`#`
			`# This program is free software; you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License version 2 as`
			`# published by the Free Software Foundation.`

			`"""Extract images from https://twitter.com/"""`

			`from .common import Extractor, Message`
[twitter] add login support (#214) 6 years ago			`from .. import text, exception`
			`from ..cache import cache`
[twitter] add extractor 8 years ago
code adjustments according to pep8 nr2 8 years ago
[twitter] add support for user-timelines (closes #96) also adds a 'retweets' option to filter retweeted content 6 years ago			`class TwitterExtractor(Extractor):`
			`"""Base class for twitter extractors"""`
[twitter] add extractor 8 years ago			`category = "twitter"`
simplify extractor constants - single strings for URL patterns - tuples instead of lists for 'directory_fmt' and 'test' - single-tuple tests where applicable 6 years ago			`directory_fmt = ("{category}", "{user}")`
change keyword names to valid Python identifiers This commit mostly replaces all minus-signs ('-') in keyword names with underscores ('_') to allow them to be used in filter-expressions. For example 'gallery-id' got renamed to 'gallery_id'. (It is theoretically possible to access any variable, regardless of its name, with 'locals()["NAME"]', but that seems a bit too convoluted if just 'NAME' could be enough) 7 years ago			`filename_fmt = "{tweet_id}_{num}.{extension}"`
[twitter] changes and improvements - rename User- to TimelineExtractor - rename 'userid' to 'user_id' to conform to the other ..._id values - adjust archive_fmt to deal with retweets - emulate browser behavior for API calls 6 years ago			`archive_fmt = "{tweet_id}_{retweet_id}_{num}"`
[twitter] add support for user-timelines (closes #96) also adds a 'retweets' option to filter retweeted content 6 years ago			`root = "https://twitter.com"`
[twitter] add fallback URLs (#237) 5 years ago			`sizes = (":orig", ":large", ":medium", ":small")`
[twitter] add support for user-timelines (closes #96) also adds a 'retweets' option to filter retweeted content 6 years ago
[twitter] add extractor for media-tweet timelines (#96) For example "https://twitter.com/PicturesEarth/media". They are different from normal timelines in that they do not contain any (re)tweets from other users and feature all media the user ever posted, including responses to other tweets. 6 years ago			`def __init__(self, match):`
propagate 'match' to base extractor constructor 6 years ago			`Extractor.__init__(self, match)`
[twitter] add extractor for media-tweet timelines (#96) For example "https://twitter.com/PicturesEarth/media". They are different from normal timelines in that they do not contain any (re)tweets from other users and feature all media the user ever posted, including responses to other tweets. 6 years ago			`self.user = match.group(1)`
			`self.retweets = self.config("retweets", True)`
[twitter] add experimental 'videos' option (#99) Enabling this option will detect videos in tweets and output them as "unsupported" URLs, so that these can then be downloaded with youtube-dl There are a lot of improvements to be made to the current implementation, but it works and does what it is supposed to, even if inefficient as can be ... 6 years ago			`self.videos = self.config("videos", False)`

[twitter] add support for user-timelines (closes #96) also adds a 'retweets' option to filter retweeted content 6 years ago			`def items(self):`
[twitter] add login support (#214) 6 years ago			`self.login()`
[twitter] add support for user-timelines (closes #96) also adds a 'retweets' option to filter retweeted content 6 years ago			`yield Message.Version, 1`
			`yield Message.Directory, self.metadata()`

			`for tweet in self.tweets():`
			`data = self._data_from_tweet(tweet)`
[twitter] extract 'content' metadata (closes #333) 5 years ago
[twitter] add support for user-timelines (closes #96) also adds a 'retweets' option to filter retweeted content 6 years ago			`if not self.retweets and data["retweet_id"]:`
			`continue`

[twitter] add experimental 'videos' option (#99) Enabling this option will detect videos in tweets and output them as "unsupported" URLs, so that these can then be downloaded with youtube-dl There are a lot of improvements to be made to the current implementation, but it works and does what it is supposed to, even if inefficient as can be ... 6 years ago			`images = text.extract_iter(`
			`tweet, 'data-image-url="', '"')`
[twitter] add support for user-timelines (closes #96) also adds a 'retweets' option to filter retweeted content 6 years ago			`for data["num"], url in enumerate(images, 1):`
			`text.nameext_from_url(url, data)`
[twitter] add fallback URLs (#237) 5 years ago			`urls = [url + size for size in self.sizes]`
			`yield Message.Urllist, urls, data`
[twitter] add support for user-timelines (closes #96) also adds a 'retweets' option to filter retweeted content 6 years ago
[twitter] add experimental 'videos' option (#99) Enabling this option will detect videos in tweets and output them as "unsupported" URLs, so that these can then be downloaded with youtube-dl There are a lot of improvements to be made to the current implementation, but it works and does what it is supposed to, even if inefficient as can be ... 6 years ago			`if self.videos and "-videoContainer" in tweet:`
implement youtube-dl downloader module URLs starting with 'ytdl:' will now be handled by youtube-dl. There is probably a lot to fix and improve, but the basic use case works. TODO: - format selection and ytdl options in general - better filename/path handling - ytdl support for "unsupported URLs" - ... 6 years ago			`data["num"] = 1`
			`url = "ytdl:{}/{}/status/{}".format(`
[twitter] add experimental 'videos' option (#99) Enabling this option will detect videos in tweets and output them as "unsupported" URLs, so that these can then be downloaded with youtube-dl There are a lot of improvements to be made to the current implementation, but it works and does what it is supposed to, even if inefficient as can be ... 6 years ago			`self.root, data["user"], data["tweet_id"])`
implement youtube-dl downloader module URLs starting with 'ytdl:' will now be handled by youtube-dl. There is probably a lot to fix and improve, but the basic use case works. TODO: - format selection and ytdl options in general - better filename/path handling - ytdl support for "unsupported URLs" - ... 6 years ago			`yield Message.Url, url, data`
[twitter] add experimental 'videos' option (#99) Enabling this option will detect videos in tweets and output them as "unsupported" URLs, so that these can then be downloaded with youtube-dl There are a lot of improvements to be made to the current implementation, but it works and does what it is supposed to, even if inefficient as can be ... 6 years ago
[twitter] add support for user-timelines (closes #96) also adds a 'retweets' option to filter retweeted content 6 years ago			`def metadata(self):`
			`"""Return general metadata"""`
[twitter] add extractor for media-tweet timelines (#96) For example "https://twitter.com/PicturesEarth/media". They are different from normal timelines in that they do not contain any (re)tweets from other users and feature all media the user ever posted, including responses to other tweets. 6 years ago			`return {"user": self.user}`
[twitter] add support for user-timelines (closes #96) also adds a 'retweets' option to filter retweeted content 6 years ago
			`def tweets(self):`
			`"""Yield HTML content of all relevant tweets"""`

[twitter] add login support (#214) 6 years ago			`def login(self):`
			`username, password = self._get_auth_info()`
			`if username:`
			`self._update_cookies(self._login_impl(username, password))`

			`@cache(maxage=360243600, keyarg=1)`
			`def _login_impl(self, username, password):`
			`self.log.info("Logging in as %s", username)`

			`page = self.request(self.root + "/login").text`
			`pos = page.index('name="authenticity_token"')`
			`token = text.extract(page, 'value="', '"', pos-80)[0]`

			`url = self.root + "/sessions"`
			`data = {`
			`"session[username_or_email]": username,`
			`"session[password]" : password,`
			`"authenticity_token" : token,`
			`"ui_metrics" : '{"rf":{},"s":""}',`
			`"scribe_log" : "",`
			`"redirect_after_login" : "",`
			`"remember_me" : "1",`
			`}`
			`response = self.request(url, method="POST", data=data)`

			`if "/error" in response.url:`
			`raise exception.AuthenticationError()`
			`return self.session.cookies`

[twitter] add support for user-timelines (closes #96) also adds a 'retweets' option to filter retweeted content 6 years ago			`@staticmethod`
			`def _data_from_tweet(tweet):`
use 'text.extract_from()' in a few places 6 years ago			`extr = text.extract_from(tweet)`
			`return {`
			`"tweet_id" : text.parse_int(extr('data-tweet-id="' , '"')),`
			`"retweet_id": text.parse_int(extr('data-retweet-id="', '"')),`
			`"retweeter" : extr('data-retweeter="' , '"'),`
			`"user" : extr('data-screen-name="', '"'),`
			`"username" : extr('data-name="' , '"'),`
			`"user_id" : text.parse_int(extr('data-user-id="' , '"')),`
[twitter] extract 'date' metadata (#224) 5 years ago			`"date" : text.parse_timestamp(extr('data-time="', '"')),`
[twitter] extract 'content' metadata (closes #333) 5 years ago			`"content" : text.unescape(text.remove_html(extr(`
			`'<div class="js-tweet-text-container">', '\n</div>'`
			`))).replace(" @ ", " @").replace(" # ", " #"),`
use 'text.extract_from()' in a few places 6 years ago			`}`
[twitter] add support for user-timelines (closes #96) also adds a 'retweets' option to filter retweeted content 6 years ago
[twitter] add extractor for media-tweet timelines (#96) For example "https://twitter.com/PicturesEarth/media". They are different from normal timelines in that they do not contain any (re)tweets from other users and feature all media the user ever posted, including responses to other tweets. 6 years ago			`def _tweets_from_api(self, url):`
[twitter] changes and improvements - rename User- to TimelineExtractor - rename 'userid' to 'user_id' to conform to the other ..._id values - adjust archive_fmt to deal with retweets - emulate browser behavior for API calls 6 years ago			`params = {`
			`"include_available_features": "1",`
			`"include_entities": "1",`
			`"reset_error_state": "false",`
			`"lang": "en",`
			`}`
			`headers = {`
			`"X-Requested-With": "XMLHttpRequest",`
			`"X-Twitter-Active-User": "yes",`
			`"Referer": "{}/{}".format(self.root, self.user)`
			`}`
[twitter] add support for user-timelines (closes #96) also adds a 'retweets' option to filter retweeted content 6 years ago
			`while True:`
[twitter] changes and improvements - rename User- to TimelineExtractor - rename 'userid' to 'user_id' to conform to the other ..._id values - adjust archive_fmt to deal with retweets - emulate browser behavior for API calls 6 years ago			`data = self.request(url, params=params, headers=headers).json()`
[twitter] unpack API responses when logged in (closes #123) 6 years ago			`if "inner" in data:`
			`data = data["inner"]`
[twitter] add support for user-timelines (closes #96) also adds a 'retweets' option to filter retweeted content 6 years ago
			`for tweet in text.extract_iter(`
[twitter] changes and improvements - rename User- to TimelineExtractor - rename 'userid' to 'user_id' to conform to the other ..._id values - adjust archive_fmt to deal with retweets - emulate browser behavior for API calls 6 years ago			`data["items_html"], '<div class="tweet ', '\n</li>'):`
[twitter] add support for user-timelines (closes #96) also adds a 'retweets' option to filter retweeted content 6 years ago			`yield tweet`

[twitter] changes and improvements - rename User- to TimelineExtractor - rename 'userid' to 'user_id' to conform to the other ..._id values - adjust archive_fmt to deal with retweets - emulate browser behavior for API calls 6 years ago			`if not data["has_more_items"]:`
[twitter] add support for user-timelines (closes #96) also adds a 'retweets' option to filter retweeted content 6 years ago			`return`
[twitter] fix pagination end condition Some timelines would cause an endless loop because 'has_more_items' is always True, even if it would return the same list of tweets over and over again. 5 years ago
			`position = text.parse_int(text.extract(`
			`tweet, 'data-tweet-id="', '"')[0])`
			`if "max_position" in params and position >= params["max_position"]:`
			`return`
			`params["max_position"] = position`
[twitter] add support for user-timelines (closes #96) also adds a 'retweets' option to filter retweeted content 6 years ago

[twitter] add extractor for media-tweet timelines (#96) For example "https://twitter.com/PicturesEarth/media". They are different from normal timelines in that they do not contain any (re)tweets from other users and feature all media the user ever posted, including responses to other tweets. 6 years ago			`class TwitterTimelineExtractor(TwitterExtractor):`
			`"""Extractor for all images from a user's timeline"""`
			`subcategory = "timeline"`
simplify extractor constants - single strings for URL patterns - tuples instead of lists for 'directory_fmt' and 'test' - single-tuple tests where applicable 6 years ago			`pattern = (r"(?:https?://)?(?:www\.\|mobile\.)?twitter\.com"`
			`r"/([^/?&#]+)/?$")`
[twitter] replace unit test URLs https://twitter.com/PicturesEarth was deleted 5 years ago			`test = ("https://twitter.com/supernaturepics", {`
fix 'range' tests and update a few test results 6 years ago			`"range": "1-40",`
[twitter] replace unit test URLs https://twitter.com/PicturesEarth was deleted 5 years ago			`"url": "0106229d408f4111d9a52c8fd2ad687f64842aa4",`
[twitter] extract 'content' metadata (closes #333) 5 years ago			`"keyword": "d07e8d2dd4ece0dc93e068579f8fb75d83d16767",`
simplify extractor constants - single strings for URL patterns - tuples instead of lists for 'directory_fmt' and 'test' - single-tuple tests where applicable 6 years ago			`})`
[twitter] add extractor for media-tweet timelines (#96) For example "https://twitter.com/PicturesEarth/media". They are different from normal timelines in that they do not contain any (re)tweets from other users and feature all media the user ever posted, including responses to other tweets. 6 years ago
			`def tweets(self):`
			`url = "{}/i/profiles/show/{}/timeline/tweets".format(`
			`self.root, self.user)`
			`return self._tweets_from_api(url)`


			`class TwitterMediaExtractor(TwitterExtractor):`
			`"""Extractor for all images from a user's Media Tweets"""`
			`subcategory = "media"`
simplify extractor constants - single strings for URL patterns - tuples instead of lists for 'directory_fmt' and 'test' - single-tuple tests where applicable 6 years ago			`pattern = (r"(?:https?://)?(?:www\.\|mobile\.)?twitter\.com"`
			`r"/([^/?&#]+)/media(?!\w)")`
[twitter] replace unit test URLs https://twitter.com/PicturesEarth was deleted 5 years ago			`test = ("https://twitter.com/supernaturepics/media", {`
fix 'range' tests and update a few test results 6 years ago			`"range": "1-40",`
[twitter] replace unit test URLs https://twitter.com/PicturesEarth was deleted 5 years ago			`"url": "0106229d408f4111d9a52c8fd2ad687f64842aa4",`
simplify extractor constants - single strings for URL patterns - tuples instead of lists for 'directory_fmt' and 'test' - single-tuple tests where applicable 6 years ago			`})`
[twitter] add extractor for media-tweet timelines (#96) For example "https://twitter.com/PicturesEarth/media". They are different from normal timelines in that they do not contain any (re)tweets from other users and feature all media the user ever posted, including responses to other tweets. 6 years ago
			`def tweets(self):`
			`url = "{}/i/profiles/show/{}/media_timeline".format(`
			`self.root, self.user)`
			`return self._tweets_from_api(url)`


[twitter] add support for user-timelines (closes #96) also adds a 'retweets' option to filter retweeted content 6 years ago			`class TwitterTweetExtractor(TwitterExtractor):`
[twitter] changes and improvements - rename User- to TimelineExtractor - rename 'userid' to 'user_id' to conform to the other ..._id values - adjust archive_fmt to deal with retweets - emulate browser behavior for API calls 6 years ago			`"""Extractor for images from individual tweets"""`
[twitter] add support for user-timelines (closes #96) also adds a 'retweets' option to filter retweeted content 6 years ago			`subcategory = "tweet"`
simplify extractor constants - single strings for URL patterns - tuples instead of lists for 'directory_fmt' and 'test' - single-tuple tests where applicable 6 years ago			`pattern = (r"(?:https?://)?(?:www\.\|mobile\.)?twitter\.com"`
			`r"/([^/?&#]+)/status/(\d+)")`
			`test = (`
[twitter] replace unit test URLs https://twitter.com/PicturesEarth was deleted 5 years ago			`("https://twitter.com/supernaturepics/status/604341487988576256", {`
			`"url": "0e801d2f98142dd87c3630ded9e4be4a4d63b580",`
[twitter] extract 'content' metadata (closes #333) 5 years ago			`"keyword": "d6149c5734f2e91d29a99600592e04b349daaedb",`
[twitter] replace unit test URLs https://twitter.com/PicturesEarth was deleted 5 years ago			`"content": "ab05e1d8d21f8d43496df284d31e8b362cd3bcab",`
[twitter] ignore "Promoted Tweets" 7 years ago			`}),`
[twitter] extract 'date' metadata (#224) 5 years ago			`# 4 images`
[twitter] ignore "Promoted Tweets" 7 years ago			`("https://twitter.com/perrypumas/status/894001459754180609", {`
			`"url": "c8a262a9698cb733fb27870f5a8f75faf77d79f6",`
[twitter] extract 'content' metadata (closes #333) 5 years ago			`"keyword": "cc9860f46ec0d0f19da2232281544b85d573eb13",`
[twitter] extract 'date' metadata (#224) 5 years ago			`}),`
			`# video`
			`("https://twitter.com/perrypumas/status/1065692031626829824", {`
			`"options": (("videos", True),),`
			`"pattern": r"ytdl:https://twitter.com/perrypumas/status/\d+",`
[twitter] ignore "Promoted Tweets" 7 years ago			`}),`
simplify extractor constants - single strings for URL patterns - tuples instead of lists for 'directory_fmt' and 'test' - single-tuple tests where applicable 6 years ago			`)`
[twitter] add extractor 8 years ago
			`def __init__(self, match):`
[twitter] add extractor for media-tweet timelines (#96) For example "https://twitter.com/PicturesEarth/media". They are different from normal timelines in that they do not contain any (re)tweets from other users and feature all media the user ever posted, including responses to other tweets. 6 years ago			`TwitterExtractor.__init__(self, match)`
			`self.tweet_id = match.group(2)`
[twitter] add extractor 8 years ago
[twitter] add support for user-timelines (closes #96) also adds a 'retweets' option to filter retweeted content 6 years ago			`def metadata(self):`
			`return {"user": self.user, "tweet_id": self.tweet_id}`
[twitter] add extractor 8 years ago
[twitter] add support for user-timelines (closes #96) also adds a 'retweets' option to filter retweeted content 6 years ago			`def tweets(self):`
			`url = "{}/{}/status/{}".format(self.root, self.user, self.tweet_id)`
			`page = self.request(url).text`
			`return (text.extract(`
[twitter] extract 'content' metadata (closes #333) 5 years ago			`page, '<div class="tweet ', 'class="js-tweet-stats-container')[0],)`