|
|
@ -34,8 +34,9 @@ class Extractor():
|
|
|
|
def __init__(self):
|
|
|
|
def __init__(self):
|
|
|
|
self.session = requests.Session()
|
|
|
|
self.session = requests.Session()
|
|
|
|
self.log = logging.getLogger(self.category)
|
|
|
|
self.log = logging.getLogger(self.category)
|
|
|
|
self._set_cookies(self.config("cookies"))
|
|
|
|
|
|
|
|
self._set_headers()
|
|
|
|
self._set_headers()
|
|
|
|
|
|
|
|
self._set_cookies()
|
|
|
|
|
|
|
|
self._set_proxies()
|
|
|
|
|
|
|
|
|
|
|
|
def __iter__(self):
|
|
|
|
def __iter__(self):
|
|
|
|
return self.items()
|
|
|
|
return self.items()
|
|
|
@ -105,8 +106,9 @@ class Extractor():
|
|
|
|
"user-agent", ("Mozilla/5.0 (X11; Linux x86_64; rv:54.0) "
|
|
|
|
"user-agent", ("Mozilla/5.0 (X11; Linux x86_64; rv:54.0) "
|
|
|
|
"Gecko/20100101 Firefox/54.0"))
|
|
|
|
"Gecko/20100101 Firefox/54.0"))
|
|
|
|
|
|
|
|
|
|
|
|
def _set_cookies(self, cookies):
|
|
|
|
def _set_cookies(self):
|
|
|
|
"""Populate the cookiejar with 'cookies'"""
|
|
|
|
"""Populate the session's cookiejar"""
|
|
|
|
|
|
|
|
cookies = self.config("cookies")
|
|
|
|
if cookies:
|
|
|
|
if cookies:
|
|
|
|
if isinstance(cookies, dict):
|
|
|
|
if isinstance(cookies, dict):
|
|
|
|
setcookie = self.session.cookies.set
|
|
|
|
setcookie = self.session.cookies.set
|
|
|
@ -120,6 +122,20 @@ class Extractor():
|
|
|
|
except OSError as exc:
|
|
|
|
except OSError as exc:
|
|
|
|
self.log.warning("cookies: %s", exc)
|
|
|
|
self.log.warning("cookies: %s", exc)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def _set_proxies(self):
|
|
|
|
|
|
|
|
"""Update the session's proxy map"""
|
|
|
|
|
|
|
|
proxies = self.config("proxy")
|
|
|
|
|
|
|
|
if proxies:
|
|
|
|
|
|
|
|
if isinstance(proxies, str):
|
|
|
|
|
|
|
|
proxies = {"http": proxies, "https": proxies}
|
|
|
|
|
|
|
|
if isinstance(proxies, dict):
|
|
|
|
|
|
|
|
for scheme, proxy in proxies.items():
|
|
|
|
|
|
|
|
if "://" not in proxy:
|
|
|
|
|
|
|
|
proxies[scheme] = "http://" + proxy.lstrip("/")
|
|
|
|
|
|
|
|
self.session.proxies = proxies
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
self.log.warning("invalid proxy specifier: %s", proxies)
|
|
|
|
|
|
|
|
|
|
|
|
def _check_cookies(self, cookienames, domain=None):
|
|
|
|
def _check_cookies(self, cookienames, domain=None):
|
|
|
|
"""Check if all 'cookienames' are in the session's cookiejar"""
|
|
|
|
"""Check if all 'cookienames' are in the session's cookiejar"""
|
|
|
|
if not domain and self.cookiedomain:
|
|
|
|
if not domain and self.cookiedomain:
|
|
|
|