Implement --write-pages option (#736)

* Implement --write-pages option

* Fix long lines

* Fix file mode to binary

* Fix pattern for Windows compatibility
pull/750/head
Vrihub 4 years ago committed by GitHub
parent fe224416bf
commit 4cc761c730
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23

@ -96,6 +96,17 @@ class Extractor():
(400 <= code < 429 or 431 <= code < 500):
if encoding:
response.encoding = encoding
if config.get((), "write_pages", False):
# Write the response content to a .dump file
# in the current directory.
# The file name is derived from the response
# url, replacing special characters with "_"
r = re.compile(r"[\\\\|/<>:\"?*&=#]+")
outfilename = r.sub('_', response.url) + '.dump'
with open(outfilename, 'wb') as outfile:
outfile.write(response.content)
return response
if notfound and code == 404:
raise exception.NotFoundError(notfound)

@ -173,6 +173,12 @@ def build_parser():
help=("Write URLs, which get emitted by other extractors but cannot "
"be handled, to FILE"),
)
output.add_argument(
"--write-pages",
dest="write_pages", nargs=0, action=ConfigConstAction, const=True,
help=("Write downloaded intermediary pages to files "
"in the current directory to debug problems"),
)
downloader = parser.add_argument_group("Downloader Options")
downloader.add_argument(

Loading…
Cancel
Save