|
|
@ -43,6 +43,7 @@ class Extractor():
|
|
|
|
browser = None
|
|
|
|
browser = None
|
|
|
|
request_interval = 0.0
|
|
|
|
request_interval = 0.0
|
|
|
|
request_interval_min = 0.0
|
|
|
|
request_interval_min = 0.0
|
|
|
|
|
|
|
|
request_interval_429 = 60.0
|
|
|
|
request_timestamp = 0.0
|
|
|
|
request_timestamp = 0.0
|
|
|
|
|
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
def __init__(self, match):
|
|
|
@ -203,7 +204,9 @@ class Extractor():
|
|
|
|
self.log.warning("Cloudflare CAPTCHA")
|
|
|
|
self.log.warning("Cloudflare CAPTCHA")
|
|
|
|
break
|
|
|
|
break
|
|
|
|
|
|
|
|
|
|
|
|
if code == 429 and self._interval_429:
|
|
|
|
if code == 429 and self._handle_429(response):
|
|
|
|
|
|
|
|
continue
|
|
|
|
|
|
|
|
elif code == 429 and self._interval_429:
|
|
|
|
pass
|
|
|
|
pass
|
|
|
|
elif code not in retry_codes and code < 500:
|
|
|
|
elif code not in retry_codes and code < 500:
|
|
|
|
break
|
|
|
|
break
|
|
|
@ -231,6 +234,8 @@ class Extractor():
|
|
|
|
|
|
|
|
|
|
|
|
raise exception.HttpError(msg, response)
|
|
|
|
raise exception.HttpError(msg, response)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
_handle_429 = util.false
|
|
|
|
|
|
|
|
|
|
|
|
def wait(self, seconds=None, until=None, adjust=1.0,
|
|
|
|
def wait(self, seconds=None, until=None, adjust=1.0,
|
|
|
|
reason="rate limit"):
|
|
|
|
reason="rate limit"):
|
|
|
|
now = time.time()
|
|
|
|
now = time.time()
|
|
|
@ -324,7 +329,7 @@ class Extractor():
|
|
|
|
self.request_interval_min,
|
|
|
|
self.request_interval_min,
|
|
|
|
)
|
|
|
|
)
|
|
|
|
self._interval_429 = util.build_duration_func(
|
|
|
|
self._interval_429 = util.build_duration_func(
|
|
|
|
self.config("sleep-429", 60),
|
|
|
|
self.config("sleep-429", self.request_interval_429),
|
|
|
|
)
|
|
|
|
)
|
|
|
|
|
|
|
|
|
|
|
|
if self._retries < 0:
|
|
|
|
if self._retries < 0:
|
|
|
|