|
|
|
@ -31,6 +31,8 @@ class Extractor():
|
|
|
|
|
cookiedomain = ""
|
|
|
|
|
root = ""
|
|
|
|
|
test = None
|
|
|
|
|
_request_last = 0
|
|
|
|
|
_request_interval = 0
|
|
|
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
|
self.session = requests.Session()
|
|
|
|
@ -46,6 +48,8 @@ class Extractor():
|
|
|
|
|
self._retries = self.config("retries", 4)
|
|
|
|
|
self._timeout = self.config("timeout", 30)
|
|
|
|
|
self._verify = self.config("verify", True)
|
|
|
|
|
self._request_interval = self.config(
|
|
|
|
|
"sleep-request", self._request_interval)
|
|
|
|
|
|
|
|
|
|
if self._retries < 0:
|
|
|
|
|
self._retries = float("inf")
|
|
|
|
@ -85,6 +89,13 @@ class Extractor():
|
|
|
|
|
kwargs.setdefault("verify", self._verify)
|
|
|
|
|
response = None
|
|
|
|
|
|
|
|
|
|
if self._request_interval:
|
|
|
|
|
seconds = (self._request_interval -
|
|
|
|
|
(time.time() - Extractor._request_last))
|
|
|
|
|
if seconds > 0:
|
|
|
|
|
self.log.debug("Sleeping for %.5s seconds", seconds)
|
|
|
|
|
time.sleep(seconds)
|
|
|
|
|
|
|
|
|
|
while True:
|
|
|
|
|
try:
|
|
|
|
|
response = session.request(method, url, **kwargs)
|
|
|
|
@ -123,6 +134,8 @@ class Extractor():
|
|
|
|
|
msg = "'{} {}' for '{}'".format(code, reason, url)
|
|
|
|
|
if code < 500 and code != 429 and code != 430:
|
|
|
|
|
break
|
|
|
|
|
finally:
|
|
|
|
|
Extractor._request_last = time.time()
|
|
|
|
|
|
|
|
|
|
self.log.debug("%s (%s/%s)", msg, tries, retries+1)
|
|
|
|
|
if tries > retries:
|
|
|
|
|