[twitter] wait for rate limit reset before 429 error (#5532)

pull/5591/head
Mike Fährmann 4 months ago
parent 3b6f306a8b
commit 33b07c4603
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -12,6 +12,7 @@ from .common import Extractor, Message
from .. import text, util, exception from .. import text, util, exception
from ..cache import cache, memcache from ..cache import cache, memcache
import itertools import itertools
import random
import json import json
import re import re
@ -1300,6 +1301,11 @@ class TwitterAPI():
if csrf_token: if csrf_token:
self.headers["x-csrf-token"] = csrf_token self.headers["x-csrf-token"] = csrf_token
remaining = int(response.headers.get("x-rate-limit-remaining", 6))
if remaining < 6 and remaining <= random.randrange(1, 6):
self._handle_ratelimit(response)
continue
try: try:
data = response.json() data = response.json()
except ValueError: except ValueError:
@ -1353,13 +1359,7 @@ class TwitterAPI():
not self.headers["x-twitter-auth-type"]: not self.headers["x-twitter-auth-type"]:
raise exception.AuthorizationError("Login required") raise exception.AuthorizationError("Login required")
elif response.status_code == 429: elif response.status_code == 429:
# rate limit exceeded self._handle_ratelimit(response)
if self.extractor.config("ratelimit") == "abort":
raise exception.StopExtraction("Rate limit exceeded")
until = response.headers.get("x-rate-limit-reset")
seconds = None if until else 60
self.extractor.wait(until=until, seconds=seconds)
continue continue
# error # error
@ -1702,6 +1702,13 @@ class TwitterAPI():
return return
variables["cursor"] = cursor variables["cursor"] = cursor
def _handle_ratelimit(self, response):
if self.extractor.config("ratelimit") == "abort":
raise exception.StopExtraction("Rate limit exceeded")
until = response.headers.get("x-rate-limit-reset")
self.extractor.wait(until=until, seconds=None if until else 60)
def _process_tombstone(self, entry, tombstone): def _process_tombstone(self, entry, tombstone):
text = (tombstone.get("richText") or tombstone["text"])["text"] text = (tombstone.get("richText") or tombstone["text"])["text"]
tweet_id = entry["entryId"].rpartition("-")[2] tweet_id = entry["entryId"].rpartition("-")[2]
@ -1716,7 +1723,6 @@ class TwitterAPI():
@cache(maxage=365*86400, keyarg=1) @cache(maxage=365*86400, keyarg=1)
def _login_impl(extr, username, password): def _login_impl(extr, username, password):
import random
def process(data, params=None): def process(data, params=None):
response = extr.request( response = extr.request(

Loading…
Cancel
Save