From df77271438238afac534fe48e23938a7f45289b6 Mon Sep 17 00:00:00 2001 From: ClosedPort22 <44864697+ClosedPort22@users.noreply.github.com> Date: Thu, 9 Mar 2023 20:55:28 +0800 Subject: [PATCH] [downloader:http] add 'consume-content' option * fix connection not being released when the response is neither successful nor retried * add the ability to consume the HTTP response body instead of closing the connection reference: https://docs.python-requests.org/en/latest/user/advanced/#body-content-workflow --- docs/configuration.rst | 19 +++++++++++++++++++ gallery_dl/downloader/http.py | 17 ++++++++++++++++- 2 files changed, 35 insertions(+), 1 deletion(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index fbb0416b..c88f8eb1 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -3616,6 +3616,25 @@ Description contains JPEG/JFIF data. +downloader.http.consume-content +--------------------------------- +Type + ``bool`` +Default + ``false`` +Description + Controls the behavior when an HTTP response is considered + unsuccessful + + If the value is ``true``, consume the response body. This + avoids closing the connection and therefore improves connection + reuse. + + If the value is ``false``, immediately close the connection + without reading the response. This can be useful if the server + is known to send large bodies for error responses. + + downloader.http.chunk-size -------------------------- Type diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index f14af249..30b59714 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -44,6 +44,12 @@ class HttpDownloader(DownloaderBase): self.mtime = self.config("mtime", True) self.rate = self.config("rate") + if not self.config("consume-content", False): + # this resets the underlying TCP connection, and therefore + # if the program makes another request to the same domain, + # a new connection (either TLS or plain TCP) must be made + self.release_conn = lambda resp: resp.close() + if self.retries < 0: self.retries = float("inf") if self.minsize: @@ -113,7 +119,7 @@ class HttpDownloader(DownloaderBase): while True: if tries: if response: - response.close() + self.release_conn(response) response = None self.log.warning("%s (%s/%s)", msg, tries, self.retries+1) if tries > self.retries: @@ -170,6 +176,7 @@ class HttpDownloader(DownloaderBase): if code in retry_codes or 500 <= code < 600: continue self.log.warning(msg) + self.release_conn(response) return False # check for invalid responses @@ -182,6 +189,7 @@ class HttpDownloader(DownloaderBase): continue if not result: self.log.warning("Invalid response") + self.release_conn(response) return False # check file size @@ -191,11 +199,13 @@ class HttpDownloader(DownloaderBase): self.log.warning( "File size smaller than allowed minimum (%s < %s)", size, self.minsize) + self.release_conn(response) return False if self.maxsize and size > self.maxsize: self.log.warning( "File size larger than allowed maximum (%s > %s)", size, self.maxsize) + self.release_conn(response) return False build_path = False @@ -284,6 +294,11 @@ class HttpDownloader(DownloaderBase): return True + def release_conn(self, response): + """Release connection back to pool by consuming response body""" + for _ in response.iter_content(self.chunk_size): + pass + @staticmethod def receive(fp, content, bytes_total, bytes_start): write = fp.write