[gelbooru] implement 'pool' pagination (#2853)

pull/2877/head
Mike Fährmann 2 years ago
parent 67a2efb885
commit d508b2c049
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -10,7 +10,7 @@
from .common import Extractor, Message from .common import Extractor, Message
from . import gelbooru_v02 from . import gelbooru_v02
from .. import text, util, exception from .. import text, exception
import binascii import binascii
@ -90,28 +90,29 @@ class GelbooruTagExtractor(GelbooruBase,
class GelbooruPoolExtractor(GelbooruBase, class GelbooruPoolExtractor(GelbooruBase,
gelbooru_v02.GelbooruV02PoolExtractor): gelbooru_v02.GelbooruV02PoolExtractor):
"""Extractor for image-pools from gelbooru.com""" """Extractor for gelbooru pools"""
per_page = 45
pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?" pattern = (r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?"
r"\?page=pool&s=show&id=(?P<pool>\d+)") r"\?page=pool&s=show&id=(?P<pool>\d+)")
test = ( test = (
("https://gelbooru.com/index.php?page=pool&s=show&id=761", { ("https://gelbooru.com/index.php?page=pool&s=show&id=761", {
"count": 6, "count": 6,
}), }),
("https://gelbooru.com/index.php?page=pool&s=show&id=761", {
"options": (("api", False),),
"count": 6,
}),
) )
def metadata(self): def metadata(self):
url = "{}/index.php?page=pool&s=show&id={}".format( url = self.root + "/index.php"
self.root, self.pool_id) self._params = {
page = self.request(url).text "page": "pool",
"s" : "show",
"id" : self.pool_id,
"pid" : self.page_start,
}
self._page = self.request(url, params=self._params).text
name, pos = text.extract(page, "<h3>Now Viewing: ", "</h3>") name, pos = text.extract(self._page, "<h3>Now Viewing: ", "</h3>")
if not name: if not name:
raise exception.NotFoundError("pool") raise exception.NotFoundError("pool")
self.post_ids = text.extract_iter(page, 'class="" id="p', '"', pos)
return { return {
"pool": text.parse_int(self.pool_id), "pool": text.parse_int(self.pool_id),
@ -119,9 +120,23 @@ class GelbooruPoolExtractor(GelbooruBase,
} }
def posts(self): def posts(self):
params = {} url = self.root + "/index.php"
for params["id"] in util.advance(self.post_ids, self.page_start): params = self._params
yield from self._api_request(params)
page = self._page
del self._page
data = {}
while True:
num_ids = 0
for data["id"] in text.extract_iter(page, '" id="p', '"'):
num_ids += 1
yield from self._api_request(data)
if num_ids < self.per_page:
return
params["pid"] += self.per_page
page = self.request(url, params=params).text
class GelbooruPostExtractor(GelbooruBase, class GelbooruPostExtractor(GelbooruBase,

Loading…
Cancel
Save