include redirects and headers in --write-pages dumps (#737)

pull/866/head
Mike Fährmann 4 years ago
parent 6bcdb264e0
commit a363da4b43
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -313,24 +313,6 @@ class Extractor():
result.append((Message.Queue, url, {"_extractor": extr}))
return iter(result)
@staticmethod
def _dump_response(response):
"""Write the response content to a .dump file in the current directory.
The file name is derived from the response url,
replacing special characters with "_"
"""
if hasattr(Extractor, "_dump_index"):
Extractor._dump_index += 1
else:
Extractor._dump_index = 1
Extractor._dump_sanitize = re.compile(r"[\\\\|/<>:\"?*&=#]+").sub
outfilename = "{:>03}_{}.dump".format(
Extractor._dump_index, Extractor._dump_sanitize('_', response.url))
with open(outfilename, 'wb') as outfile:
outfile.write(response.content)
@classmethod
def _get_tests(cls):
"""Yield an extractor's test cases as (URL, RESULTS) tuples"""
@ -346,6 +328,62 @@ class Extractor():
test = (test, None)
yield test
def _dump_response(self, response):
"""Write the response content to a .dump file in the current directory.
The file name is derived from the response url,
replacing special characters with "_"
"""
for resp in response.history:
self._dump_response(resp)
if hasattr(Extractor, "_dump_index"):
Extractor._dump_index += 1
else:
Extractor._dump_index = 1
Extractor._dump_sanitize = re.compile(r"[\\\\|/<>:\"?*&=#]+").sub
outfmt = """\
{request.method} {request.url}
Status: {response.status_code} {response.reason}
Request Headers
---------------
{request_headers}
Response Headers
----------------
{response_headers}
Content
-------
"""
fname = "{:>02}_{}".format(
Extractor._dump_index,
Extractor._dump_sanitize('_', response.url)
)[:250]
headers = outfmt.format(
request=response.request,
response=response,
request_headers=("\n".join(
name + ": " + value
for name, value in response.request.headers.items()
)),
response_headers=("\n".join(
name + ": " + value
for name, value in response.headers.items()
)),
)
try:
with open(fname + ".dump", 'wb') as fp:
fp.write(headers.encode())
fp.write(response.content)
except Exception as e:
self.log.warning("Failed to dump HTTP request (%s: %s)",
e.__class__.__name__, e)
class GalleryExtractor(Extractor):

Loading…
Cancel
Save