From 32df8d06fef77f4408222c0ab89643fcff6372fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 5 Mar 2020 22:55:26 +0100 Subject: [PATCH] [twitter] add 'bookmark' extractor (closes #625) --- docs/supportedsites.rst | 3 +- gallery_dl/extractor/twitter.py | 75 +++++++++++++++++++++++++++++++++ 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 1fb38b1b..6fa66b38 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -117,7 +117,7 @@ SmugMug https://www.smugmug.com/ |smugmug-C| The /b/ Archive https://thebarchive.com/ Threads Tsumino https://www.tsumino.com/ Galleries, Search Results Optional Tumblr https://www.tumblr.com/ Likes, Posts, Tag-Searches, User Profiles Optional (OAuth) -Twitter https://twitter.com/ Media Timelines, Search Results, Timelines, Tweets Optional +Twitter https://twitter.com/ |twitter-C| Optional VSCO https://vsco.co/ Collections, individual Images, User Profiles Wallhaven https://wallhaven.cc/ individual Images, Search Results |wallhaven-A| Warosu https://warosu.org/ Threads @@ -151,5 +151,6 @@ Turboimagehost https://www.turboimagehost.com/ individual Images .. |pixiv-C| replace:: Favorites, Follows, pixiv.me Links, Rankings, Search Results, User Profiles, individual Images .. |reddit-C| replace:: individual Images, Submissions, Subreddits, User Profiles .. |smugmug-C| replace:: Albums, individual Images, Images from Users and Folders +.. |twitter-C| replace:: Bookmarks, Media Timelines, Search Results, Timelines, Tweets .. |wallhaven-A| replace:: Optional (`API Key `__) .. |yuki-S| replace:: yuki.la 4chan archive diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index 9db23dd4..650f5789 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -406,6 +406,81 @@ class TwitterTweetExtractor(TwitterExtractor): return (page[beg:end],) +class TwitterBookmarkExtractor(TwitterExtractor): + """Extractor for bookmarked tweets""" + subcategory = "bookmark" + pattern = r"(?:https?://)?(?:www\.|mobile\.)?twitter\.com/i/bookmarks()" + test = ("https://twitter.com/i/bookmarks",) + + def items(self): + self.login() + if not self.logged_in: + raise exception.AuthorizationError("Login required") + for cookie in self.session.cookies: + cookie.expires = None + + url = "https://api.twitter.com/2/timeline/bookmark.json" + params = { + "include_profile_interstitial_type": "1", + "include_blocking": "1", + "include_blocked_by": "1", + "include_followed_by": "1", + "include_want_retweets": "1", + "include_mute_edge": "1", + "include_can_dm": "1", + "include_can_media_tag": "1", + "skip_status": "1", + "cards_platform": "Web-12", + "include_cards": "1", + "include_composer_source": "true", + "include_ext_alt_text": "true", + "include_reply_count": "1", + "tweet_mode": "extended", + "include_entities": "true", + "include_user_entities": "true", + "include_ext_media_color": "true", + "include_ext_media_availability": "true", + "send_error_codes": "true", + "simple_quoted_tweets": "true", + "count": "100", + "cursor": None, + "ext": "mediaStats%2CcameraMoment", + } + headers = { + "authorization": "Bearer AAAAAAAAAAAAAAAAAAAAANRILgAAAAAAnNwIzUejR" + "COuH5E6I8xnZz4puTs%3D1Zv7ttfk8LF81IUq16cHjhLTvJu" + "4FA33AGWWjCpTnA", + "Origin": self.root, + "Referer": self.root + "/i/bookmarks", + "x-csrf-token": self.session.cookies.get("ct0"), + "x-twitter-active-user": "yes", + "x-twitter-auth-type": "Auth2Session", + "x-twitter-client-language": "en", + } + + while True: + response = self.request( + url, params=params, headers=headers, fatal=False) + if response.status_code >= 400: + raise exception.StopExtraction(response.text) + data = response.json() + tweets = data["globalObjects"]["tweets"] + + if not tweets: + return + for tweet_id, tweet_data in tweets.items(): + tweet_url = "{}/i/web/status/{}".format(self.root, tweet_id) + tweet_data["_extractor"] = TwitterTweetExtractor + yield Message.Queue, tweet_url, tweet_data + + inst = data["timeline"]["instructions"][0] + for entry in inst["addEntries"]["entries"]: + if entry["entryId"].startswith("cursor-bottom-"): + params["cursor"] = \ + entry["content"]["operation"]["cursor"]["value"] + break + + @memcache() def _guest_token(extr, headers): return extr.request(