From 4cc761c730fc057d2944517880fddc15f5a2b62a Mon Sep 17 00:00:00 2001 From: Vrihub Date: Tue, 12 May 2020 14:25:21 +0200 Subject: [PATCH] Implement --write-pages option (#736) * Implement --write-pages option * Fix long lines * Fix file mode to binary * Fix pattern for Windows compatibility --- gallery_dl/extractor/common.py | 11 +++++++++++ gallery_dl/option.py | 6 ++++++ 2 files changed, 17 insertions(+) diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 3a282c28..1016f229 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -96,6 +96,17 @@ class Extractor(): (400 <= code < 429 or 431 <= code < 500): if encoding: response.encoding = encoding + + if config.get((), "write_pages", False): + # Write the response content to a .dump file + # in the current directory. + # The file name is derived from the response + # url, replacing special characters with "_" + r = re.compile(r"[\\\\|/<>:\"?*&=#]+") + outfilename = r.sub('_', response.url) + '.dump' + with open(outfilename, 'wb') as outfile: + outfile.write(response.content) + return response if notfound and code == 404: raise exception.NotFoundError(notfound) diff --git a/gallery_dl/option.py b/gallery_dl/option.py index 34222a21..081a9a34 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -173,6 +173,12 @@ def build_parser(): help=("Write URLs, which get emitted by other extractors but cannot " "be handled, to FILE"), ) + output.add_argument( + "--write-pages", + dest="write_pages", nargs=0, action=ConfigConstAction, const=True, + help=("Write downloaded intermediary pages to files " + "in the current directory to debug problems"), + ) downloader = parser.add_argument_group("Downloader Options") downloader.add_argument(