diff --git a/docs/configuration.rst b/docs/configuration.rst index e6cbbb7b..5854c4b2 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -1397,6 +1397,21 @@ Description Enable/Disable this downloader module. =========== ===== +downloader.*.filesize-min & .filesize-max +----------------------------------------- +=========== ===== +Type ``string`` +Default ``null`` +Example ``"32000"``, ``"500k"``, ``"2.5M"`` +Description Minimum/Maximum allowed file size in bytes. + Any file smaller/larger than this limit will not be downloaded. + + Possible values are valid integer or floating-point numbers + optionally followed by one of ``k``, ``m``. ``g``, ``t`` or ``p``. + These suffixes are case-insensitive. +=========== ===== + + downloader.*.mtime ------------------ =========== ===== diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 56147e91..6d38a5d5 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -189,6 +189,8 @@ "downloader": { + "filesize-min": null, + "filesize-max": null, "part": true, "part-directory": null, diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index 6644827c..7f14c030 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -31,6 +31,8 @@ class HttpDownloader(DownloaderBase): self.downloading = False self.adjust_extension = self.config("adjust-extensions", True) + self.minsize = self.config("filesize-min") + self.maxsize = self.config("filesize-max") self.retries = self.config("retries", extractor._retries) self.timeout = self.config("timeout", extractor._timeout) self.verify = self.config("verify", extractor._verify) @@ -39,6 +41,16 @@ class HttpDownloader(DownloaderBase): if self.retries < 0: self.retries = float("inf") + if self.minsize: + minsize = text.parse_bytes(self.minsize) + if not minsize: + self.log.warning("Invalid minimum filesize (%r)", self.minsize) + self.minsize = minsize + if self.maxsize: + maxsize = text.parse_bytes(self.maxsize) + if not maxsize: + self.log.warning("Invalid maximum filesize (%r)", self.maxsize) + self.maxsize = maxsize if self.rate: rate = text.parse_bytes(self.rate) if rate: @@ -116,7 +128,20 @@ class HttpDownloader(DownloaderBase): continue self.log.warning(msg) return False - size = text.parse_int(size) + + # check filesize + size = text.parse_int(size, None) + if size is not None: + if self.minsize and size < self.minsize: + self.log.warning( + "File size smaller than allowed minimum (%s < %s)", + size, self.minsize) + return False + if self.maxsize and size > self.maxsize: + self.log.warning( + "File size larger than allowed maximum (%s > %s)", + size, self.maxsize) + return False # set missing filename extension if not pathfmt.extension: diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py index c3dd863c..8086b5d9 100644 --- a/gallery_dl/downloader/ytdl.py +++ b/gallery_dl/downloader/ytdl.py @@ -31,6 +31,10 @@ class YoutubeDLDownloader(DownloaderBase): "nopart": not self.part, "updatetime": self.config("mtime", True), "proxy": extractor.session.proxies.get("http"), + "min_filesize": text.parse_bytes( + self.config("filesize-min"), None), + "max_filesize": text.parse_bytes( + self.config("filesize-max"), None), } options.update(self.config("raw-options") or {}) diff --git a/gallery_dl/option.py b/gallery_dl/option.py index 5b99beee..50134478 100644 --- a/gallery_dl/option.py +++ b/gallery_dl/option.py @@ -209,6 +209,16 @@ def build_parser(): dest="sleep", metavar="SECONDS", type=float, action=ConfigAction, help="Number of seconds to sleep before each download", ) + downloader.add_argument( + "--filesize-min", + dest="filesize-min", metavar="SIZE", action=ConfigAction, + help="Do not download files smaller than SIZE (e.g. 500k or 2.5M)", + ) + downloader.add_argument( + "--filesize-max", + dest="filesize-max", metavar="SIZE", action=ConfigAction, + help="Do not download files larger than SIZE (e.g. 500k or 2.5M)", + ) downloader.add_argument( "--no-part", dest="part", nargs=0, action=ConfigConstAction, const=False, diff --git a/test/test_downloader.py b/test/test_downloader.py index 5d73a4c2..99cfb629 100644 --- a/test/test_downloader.py +++ b/test/test_downloader.py @@ -179,6 +179,9 @@ class TestHTTPDownloader(TestDownloaderBase): server = http.server.HTTPServer(("", port), HttpRequestHandler) threading.Thread(target=server.serve_forever, daemon=True).start() + def tearDown(self): + self.downloader.minsize = self.downloader.maxsize = None + def test_http_download(self): self._run_test(self._jpg, None, DATA_JPG, "jpg", "jpg") self._run_test(self._png, None, DATA_PNG, "png", "png") @@ -199,6 +202,20 @@ class TestHTTPDownloader(TestDownloaderBase): self._run_test(self._png, None, DATA_PNG, "gif", "png") self._run_test(self._gif, None, DATA_GIF, "jpg", "gif") + def test_http_filesize_min(self): + pathfmt = self._prepare_destination(None, extension=None) + self.downloader.minsize = 100 + with self.assertLogs(self.downloader.log, "WARNING"): + success = self.downloader.download(self._gif, pathfmt) + self.assertFalse(success) + + def test_http_filesize_max(self): + pathfmt = self._prepare_destination(None, extension=None) + self.downloader.maxsize = 100 + with self.assertLogs(self.downloader.log, "WARNING"): + success = self.downloader.download(self._jpg, pathfmt) + self.assertFalse(success) + class TestTextDownloader(TestDownloaderBase):