|
|
@ -495,7 +495,7 @@ class TwitterFollowingExtractor(TwitterExtractor):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TwitterSearchExtractor(TwitterExtractor):
|
|
|
|
class TwitterSearchExtractor(TwitterExtractor):
|
|
|
|
"""Extractor for all images from a search timeline"""
|
|
|
|
"""Extractor for Twitter search results"""
|
|
|
|
subcategory = "search"
|
|
|
|
subcategory = "search"
|
|
|
|
pattern = BASE_PATTERN + r"/search/?\?(?:[^&#]+&)*q=([^&#]+)"
|
|
|
|
pattern = BASE_PATTERN + r"/search/?\?(?:[^&#]+&)*q=([^&#]+)"
|
|
|
|
test = ("https://twitter.com/search?q=nature", {
|
|
|
|
test = ("https://twitter.com/search?q=nature", {
|
|
|
@ -508,7 +508,24 @@ class TwitterSearchExtractor(TwitterExtractor):
|
|
|
|
return {"search": text.unquote(self.user)}
|
|
|
|
return {"search": text.unquote(self.user)}
|
|
|
|
|
|
|
|
|
|
|
|
def tweets(self):
|
|
|
|
def tweets(self):
|
|
|
|
return TwitterAPI(self).search(text.unquote(self.user))
|
|
|
|
return TwitterAPI(self).search_adaptive(text.unquote(self.user))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TwitterEventExtractor(TwitterExtractor):
|
|
|
|
|
|
|
|
"""Extractor for Tweets from a Twitter Event"""
|
|
|
|
|
|
|
|
subcategory = "event"
|
|
|
|
|
|
|
|
pattern = BASE_PATTERN + r"/i/events/(\d+)"
|
|
|
|
|
|
|
|
test = ("https://twitter.com/i/events/1484669206993903616", {
|
|
|
|
|
|
|
|
"range": "1-20",
|
|
|
|
|
|
|
|
"count": ">5",
|
|
|
|
|
|
|
|
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def metadata(self):
|
|
|
|
|
|
|
|
self.api = TwitterAPI(self)
|
|
|
|
|
|
|
|
return {"event": self.api.live_event(self.user)}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def tweets(self):
|
|
|
|
|
|
|
|
return self.api.live_event_timeline(self.user)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class TwitterTweetExtractor(TwitterExtractor):
|
|
|
|
class TwitterTweetExtractor(TwitterExtractor):
|
|
|
@ -684,6 +701,35 @@ class TwitterAPI():
|
|
|
|
"x-csrf-token": None,
|
|
|
|
"x-csrf-token": None,
|
|
|
|
"Referer": "https://twitter.com/",
|
|
|
|
"Referer": "https://twitter.com/",
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
self.params = {
|
|
|
|
|
|
|
|
"include_profile_interstitial_type": "1",
|
|
|
|
|
|
|
|
"include_blocking": "1",
|
|
|
|
|
|
|
|
"include_blocked_by": "1",
|
|
|
|
|
|
|
|
"include_followed_by": "1",
|
|
|
|
|
|
|
|
"include_want_retweets": "1",
|
|
|
|
|
|
|
|
"include_mute_edge": "1",
|
|
|
|
|
|
|
|
"include_can_dm": "1",
|
|
|
|
|
|
|
|
"include_can_media_tag": "1",
|
|
|
|
|
|
|
|
"include_ext_has_nft_avatar": "1",
|
|
|
|
|
|
|
|
"skip_status": "1",
|
|
|
|
|
|
|
|
"cards_platform": "Web-12",
|
|
|
|
|
|
|
|
"include_cards": "1",
|
|
|
|
|
|
|
|
"include_ext_alt_text": "true",
|
|
|
|
|
|
|
|
"include_quote_count": "true",
|
|
|
|
|
|
|
|
"include_reply_count": "1",
|
|
|
|
|
|
|
|
"tweet_mode": "extended",
|
|
|
|
|
|
|
|
"include_entities": "true",
|
|
|
|
|
|
|
|
"include_user_entities": "true",
|
|
|
|
|
|
|
|
"include_ext_media_color": "true",
|
|
|
|
|
|
|
|
"include_ext_media_availability": "true",
|
|
|
|
|
|
|
|
"include_ext_sensitive_media_warning": "true",
|
|
|
|
|
|
|
|
"send_error_codes": "true",
|
|
|
|
|
|
|
|
"simple_quoted_tweet": "true",
|
|
|
|
|
|
|
|
"count": "100",
|
|
|
|
|
|
|
|
"cursor": None,
|
|
|
|
|
|
|
|
"ext": "mediaStats,highlightedLabel,hasNftAvatar,"
|
|
|
|
|
|
|
|
"voiceInfo,superFollowMetadata",
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
cookies = extractor.session.cookies
|
|
|
|
cookies = extractor.session.cookies
|
|
|
|
cookiedomain = extractor.cookiedomain
|
|
|
|
cookiedomain = extractor.cookiedomain
|
|
|
@ -844,40 +890,31 @@ class TwitterAPI():
|
|
|
|
return self._pagination_tweets(
|
|
|
|
return self._pagination_tweets(
|
|
|
|
endpoint, variables, ("list", "tweets_timeline", "timeline"))
|
|
|
|
endpoint, variables, ("list", "tweets_timeline", "timeline"))
|
|
|
|
|
|
|
|
|
|
|
|
def search(self, query):
|
|
|
|
def search_adaptive(self, query):
|
|
|
|
endpoint = "/2/search/adaptive.json"
|
|
|
|
endpoint = "/2/search/adaptive.json"
|
|
|
|
params = {
|
|
|
|
params = self.params.copy()
|
|
|
|
"include_profile_interstitial_type": "1",
|
|
|
|
params["q"] = query
|
|
|
|
"include_blocking": "1",
|
|
|
|
params["tweet_search_mode"] = "live"
|
|
|
|
"include_blocked_by": "1",
|
|
|
|
params["query_source"] = "typed_query"
|
|
|
|
"include_followed_by": "1",
|
|
|
|
params["pc"] = "1"
|
|
|
|
"include_want_retweets": "1",
|
|
|
|
params["spelling_corrections"] = "1"
|
|
|
|
"include_mute_edge": "1",
|
|
|
|
return self._pagination_legacy(endpoint, params)
|
|
|
|
"include_can_dm": "1",
|
|
|
|
|
|
|
|
"include_can_media_tag": "1",
|
|
|
|
def live_event_timeline(self, event_id):
|
|
|
|
"skip_status": "1",
|
|
|
|
endpoint = "/2/live_event/timeline/{}.json".format(event_id)
|
|
|
|
"cards_platform": "Web-12",
|
|
|
|
params = self.params.copy()
|
|
|
|
"include_cards": "1",
|
|
|
|
params["timeline_id"] = "recap"
|
|
|
|
"include_ext_alt_text": "true",
|
|
|
|
params["urt"] = "true"
|
|
|
|
"include_quote_count": "true",
|
|
|
|
params["get_annotations"] = "true"
|
|
|
|
"include_reply_count": "1",
|
|
|
|
return self._pagination_legacy(endpoint, params)
|
|
|
|
"tweet_mode": "extended",
|
|
|
|
|
|
|
|
"include_entities": "true",
|
|
|
|
def live_event(self, event_id):
|
|
|
|
"include_user_entities": "true",
|
|
|
|
endpoint = "/1.1/live_event/1/{}/timeline.json".format(event_id)
|
|
|
|
"include_ext_media_color": "true",
|
|
|
|
params = self.params.copy()
|
|
|
|
"include_ext_media_availability": "true",
|
|
|
|
params["count"] = "0"
|
|
|
|
"send_error_codes": "true",
|
|
|
|
params["urt"] = "true"
|
|
|
|
"simple_quoted_tweet": "true",
|
|
|
|
return (self._call(endpoint, params)
|
|
|
|
"count": "100",
|
|
|
|
["twitter_objects"]["live_events"][event_id])
|
|
|
|
"cursor": None,
|
|
|
|
|
|
|
|
"ext": "mediaStats,highlightedLabel",
|
|
|
|
|
|
|
|
"q": query,
|
|
|
|
|
|
|
|
"tweet_search_mode": "live",
|
|
|
|
|
|
|
|
"query_source": "typed_query",
|
|
|
|
|
|
|
|
"pc": "1",
|
|
|
|
|
|
|
|
"spelling_corrections": "1",
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
return self._pagination_search(endpoint, params)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def list_by_rest_id(self, list_id):
|
|
|
|
def list_by_rest_id(self, list_id):
|
|
|
|
endpoint = "/graphql/BWEhzAk7k8TwbU4lKH2dpw/ListByRestId"
|
|
|
|
endpoint = "/graphql/BWEhzAk7k8TwbU4lKH2dpw/ListByRestId"
|
|
|
@ -1004,7 +1041,7 @@ class TwitterAPI():
|
|
|
|
raise exception.StopExtraction(
|
|
|
|
raise exception.StopExtraction(
|
|
|
|
"%s %s (%s)", response.status_code, response.reason, errors)
|
|
|
|
"%s %s (%s)", response.status_code, response.reason, errors)
|
|
|
|
|
|
|
|
|
|
|
|
def _pagination_search(self, endpoint, params=None):
|
|
|
|
def _pagination_legacy(self, endpoint, params):
|
|
|
|
original_retweets = (self.extractor.retweets == "original")
|
|
|
|
original_retweets = (self.extractor.retweets == "original")
|
|
|
|
|
|
|
|
|
|
|
|
while True:
|
|
|
|
while True:
|
|
|
@ -1033,7 +1070,7 @@ class TwitterAPI():
|
|
|
|
|
|
|
|
|
|
|
|
elif entry_startswith(("cursor-bottom-", "sq-cursor-bottom")):
|
|
|
|
elif entry_startswith(("cursor-bottom-", "sq-cursor-bottom")):
|
|
|
|
cursor = entry["content"]["operation"]["cursor"]
|
|
|
|
cursor = entry["content"]["operation"]["cursor"]
|
|
|
|
if not cursor.get("stopOnEmptyResponse"):
|
|
|
|
if not cursor.get("stopOnEmptyResponse", True):
|
|
|
|
# keep going even if there are no tweets
|
|
|
|
# keep going even if there are no tweets
|
|
|
|
tweet = True
|
|
|
|
tweet = True
|
|
|
|
cursor = cursor["value"]
|
|
|
|
cursor = cursor["value"]
|
|
|
@ -1123,7 +1160,7 @@ class TwitterAPI():
|
|
|
|
tweets.extend(entry["content"]["items"])
|
|
|
|
tweets.extend(entry["content"]["items"])
|
|
|
|
elif esw("cursor-bottom-"):
|
|
|
|
elif esw("cursor-bottom-"):
|
|
|
|
cursor = entry["content"]
|
|
|
|
cursor = entry["content"]
|
|
|
|
if not cursor.get("stopOnEmptyResponse"):
|
|
|
|
if not cursor.get("stopOnEmptyResponse", True):
|
|
|
|
# keep going even if there are no tweets
|
|
|
|
# keep going even if there are no tweets
|
|
|
|
tweet = True
|
|
|
|
tweet = True
|
|
|
|
cursor = cursor.get("value")
|
|
|
|
cursor = cursor.get("value")
|
|
|
@ -1133,7 +1170,6 @@ class TwitterAPI():
|
|
|
|
tweet = ((tweet.get("content") or tweet["item"])
|
|
|
|
tweet = ((tweet.get("content") or tweet["item"])
|
|
|
|
["itemContent"]["tweet_results"]["result"])
|
|
|
|
["itemContent"]["tweet_results"]["result"])
|
|
|
|
except KeyError:
|
|
|
|
except KeyError:
|
|
|
|
print(tweet["entryId"])
|
|
|
|
|
|
|
|
self.extractor.log.debug(
|
|
|
|
self.extractor.log.debug(
|
|
|
|
"Skipping %s (deleted)",
|
|
|
|
"Skipping %s (deleted)",
|
|
|
|
tweet["entryId"].rpartition("-")[2])
|
|
|
|
tweet["entryId"].rpartition("-")[2])
|
|
|
|