From 9514cb8c1294f4a680ee3a2c31b1d9b0c7753a80 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 22 Apr 2021 22:41:14 +0200 Subject: [PATCH] [exhentai] update 'limits' check (#1487) Only use 'limits' to set a custom upper bound. Checking if the actual maximum gets exceeded is not necessary. --- docs/configuration.rst | 11 ++---- gallery_dl/extractor/exhentai.py | 63 ++++++++++++++------------------ 2 files changed, 31 insertions(+), 43 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index 2f6f4972..b47ae42c 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -923,15 +923,12 @@ Description extractor.exhentai.limits ------------------------- Type - ``bool`` or ``integer`` + ``integer`` Default - ``true`` + ``null`` Description - Check image download limits - and stop extraction when they are exceeded. - - If this value is an ``integer``, it gets used as the limit maximum - instead of the value listed on ``https://e-hentai.org/home.php`` + Sets a custom image download limit and + stops extraction when it gets exceeded. extractor.exhentai.domain diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 09058680..910da7d0 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -43,16 +43,14 @@ class ExhentaiExtractor(Extractor): self.cookiedomain = "." + domain Extractor.__init__(self, match) - self.limits = self.config("limits", True) self.original = self.config("original", True) - if type(self.limits) is int: - self._limit_max = self.limits - self.limits = True + limits = self.config("limits", False) + if limits and limits.__class__ is int: + self.limits = limits + self._remaining = 0 else: - self._limit_max = 0 - - self._remaining = 0 + self.limits = False self.session.headers["Referer"] = self.root + "/" if version != "ex": @@ -219,6 +217,8 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): if "/fullimg.php" in url: data["extension"] = "" data["_http_validate"] = _validate_response + else: + data["_http_validate"] = None yield Message.Url, url, data def get_metadata(self, page): @@ -358,6 +358,26 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): "Continue with '%s/s/%s/%s-%s' as URL after resetting it.", self.root, data["image_token"], self.gallery_id, data["num"]) + def _check_limits(self, data): + if not self._remaining or data["num"] % 25 == 0: + self._update_limits() + self._remaining -= data["cost"] + if self._remaining <= 0: + self._report_limits(data) + + def _update_limits(self): + url = "https://e-hentai.org/home.php" + cookies = { + cookie.name: cookie.value + for cookie in self.session.cookies + if cookie.domain == self.cookiedomain and cookie.name != "igneous" + } + + page = self.request(url, cookies=cookies).text + current = text.extract(page, "", "")[0] + self.log.debug("Image Limits: %s/%s", current, self.limits) + self._remaining = self.limits - text.parse_int(current) + def _gallery_page(self): url = "{}/g/{}/{}/".format( self.root, self.gallery_id, self.gallery_token) @@ -381,35 +401,6 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): raise exception.NotFoundError("image page") return page - def _check_limits(self, data): - if not self._remaining or data["num"] % 25 == 0: - self._update_limits() - self._remaining -= data["cost"] - - if self._remaining <= 0: - ExhentaiExtractor.LIMIT = True - url = "{}/s/{}/{}-{}".format( - self.root, data["image_token"], self.gallery_id, data["num"]) - raise exception.StopExtraction( - "Image limit reached! Continue with '%s' " - "as URL after resetting it.", url) - - def _update_limits(self): - url = "https://e-hentai.org/home.php" - cookies = { - cookie.name: cookie.value - for cookie in self.session.cookies - if cookie.domain == self.cookiedomain and cookie.name != "igneous" - } - - page = self.request(url, cookies=cookies).text - current, pos = text.extract(page, "", "") - maximum, pos = text.extract(page, "", "", pos) - if self._limit_max: - maximum = self._limit_max - self.log.debug("Image Limits: %s/%s", current, maximum) - self._remaining = text.parse_int(maximum) - text.parse_int(current) - @staticmethod def _parse_image_info(url): for part in url.split("/")[4:]: