From 51fd14f87d109aee1ea7cd6c7d22d7b1b809f883 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 12 Jul 2024 21:39:12 +0200 Subject: [PATCH] [gelbooru_v02] use total number of posts as end marker (#5830) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit … and potentially retry on empty responses --- gallery_dl/extractor/gelbooru_v02.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/gallery_dl/extractor/gelbooru_v02.py b/gallery_dl/extractor/gelbooru_v02.py index 8d8b8ad1..fbbd26c8 100644 --- a/gallery_dl/extractor/gelbooru_v02.py +++ b/gallery_dl/extractor/gelbooru_v02.py @@ -36,7 +36,9 @@ class GelbooruV02Extractor(booru.BooruExtractor): params["pid"] = self.page_start params["limit"] = self.per_page - post = None + post = total = None + count = 0 + while True: try: root = self._api_request(params) @@ -50,12 +52,29 @@ class GelbooruV02Extractor(booru.BooruExtractor): params["pid"] = 0 continue + if total is None: + try: + total = int(root.attrib["count"]) + self.log.debug("%s posts in total", total) + except Exception as exc: + total = 0 + self.log.debug( + "Failed to get total number of posts (%s: %s)", + exc.__class__.__name__, exc) + post = None for post in root: yield post.attrib - if len(root) < self.per_page: - return + num = len(root) + count += num + if num < self.per_page: + if not total or count >= total: + return + if not num: + self.log.debug("Empty response - Retrying") + continue + params["pid"] += 1 def _pagination_html(self, params):