cache cfclearance cookies

pull/197/head
Mike Fährmann 6 years ago
parent 34ea0d6a10
commit f612284d24
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -13,6 +13,7 @@ import time
import operator
import urllib.parse
from . import text
from .cache import memcache
def is_challenge(response):
@ -47,7 +48,14 @@ def solve_challenge(session, response, kwargs):
location = cf_response.headers["Location"]
if location[0] == "/":
location = root + location
return location
for cookie in cf_response.cookies:
if cookie.name == "cf_clearance":
return location, cookie.domain, {
cookie.name: cookie.value,
"__cfduid" : response.cookies.get("__cfduid", ""),
}
return location, "", {}
def solve_js_challenge(page, netloc):
@ -126,3 +134,8 @@ VALUES = {
"!+": 1,
"+!!": 1,
}
@memcache(keyarg=0)
def cookies(category):
return None

@ -90,7 +90,9 @@ class Extractor():
return response
if cloudflare.is_challenge(response):
self.log.info("Solving Cloudflare challenge")
url = cloudflare.solve_challenge(session, response, kwargs)
url, domain, cookies = cloudflare.solve_challenge(
session, response, kwargs)
cloudflare.cookies.update(self.category, (domain, cookies))
continue
msg = "{}: {} for url: {}".format(code, response.reason, url)
@ -159,6 +161,11 @@ class Extractor():
else:
self.session.cookies.update(cookiejar)
cookies = cloudflare.cookies(self.category)
if cookies:
domain, cookies = cookies
self._update_cookies_dict(cookies, domain)
def _update_cookies(self, cookies, *, domain=""):
"""Update the session's cookiejar with 'cookies'"""
if isinstance(cookies, dict):

Loading…
Cancel
Save