[gelbooru] workaround pagination limits

Gelbooru only allows to retrieve the latest 20k posts for a tag search.
Add 'id:<N' to the search tags to work around that limitation, where N
is the ID of the last retrieved post.

http://gelbooru.me/index.php?page=forum&s=view&id=1467
pull/2065/head
Mike Fährmann 3 years ago
parent f2ae179713
commit 93cef78450
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -27,8 +27,21 @@ class GelbooruV02Extractor(booru.BooruExtractor):
params["pid"] = self.page_start
params["limit"] = self.per_page
post = None
while True:
root = self._api_request(params)
try:
root = self._api_request(params)
except ElementTree.ParseError:
if "tags" not in params or post is None:
raise
taglist = [tag for tag in params["tags"].split()
if not tag.startswith("id:<")]
taglist.append("id:<" + str(post.attrib["id"]))
params["tags"] = " ".join(taglist)
params["pid"] = 0
continue
post = None
for post in root:
yield post.attrib

Loading…
Cancel
Save