From d0886f411e4ff84f8c18fbf8e37569af54fb86ac Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 21 Dec 2017 21:42:40 +0100 Subject: [PATCH] [gelbooru] re-enable API use (closes #56) Gelbooru's API allows access to all images and is not restricted to the first 20000. This also adds an option to select between API use and manual information extraction in case their API gets disabled again. --- docs/configuration.rst | 12 ++++++ docs/gallery-dl.conf | 3 +- gallery_dl/extractor/gelbooru.py | 69 +++++++++++++++++++++++++------- 3 files changed, 69 insertions(+), 15 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index b8a76cf5..f9916b7c 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -441,6 +441,18 @@ Description Sets the maximum allowed size for downloaded images. =========== ===== +extractor.gelbooru.api +---------------------- +=========== ===== +Type ``bool`` +Default ``true`` +Description Enable use of Gelbooru's API. + + Set this value to `false` if the API has been disabled to switch + to manual information extraction. +=========== ===== + + extractor.gfycat.format ----------------------- =========== ===== diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 2ea7e303..c27dee04 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -70,7 +70,8 @@ }, "gelbooru": { - "filename": "{category}_{id:>07}_{md5}.{extension}" + "filename": "{category}_{id:>07}_{md5}.{extension}", + "api": true }, "reddit": { diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py index b5918509..1ca0b3b0 100644 --- a/gallery_dl/extractor/gelbooru.py +++ b/gallery_dl/extractor/gelbooru.py @@ -9,7 +9,8 @@ """Extract images from https://gelbooru.com/""" from .common import SharedConfigExtractor, Message -from .. import text, util +from .. import text, util, exception +import xml.etree.ElementTree as ET class GelbooruExtractor(SharedConfigExtractor): @@ -17,19 +18,26 @@ class GelbooruExtractor(SharedConfigExtractor): basecategory = "booru" category = "gelbooru" filename_fmt = "{category}_{id}_{md5}.{extension}" + api_url = "https://gelbooru.com/index.php?page=dapi&s=post&q=index" def __init__(self): SharedConfigExtractor.__init__(self) self.start_post = 0 + self.use_api = self.config("api", True) + if self.use_api: + self.get_post_data = self.get_post_data_api def items(self): yield Message.Version, 1 yield Message.Directory, self.get_metadata() - for post_id in util.advance(self.get_posts(), self.start_post): - data = self.get_post_data(post_id) - url = data["file_url"] - yield Message.Url, url, text.nameext_from_url(url, data) + for post in util.advance(self.get_posts(), self.start_post): + if isinstance(post, str): + post = self.get_post_data(post) + for key in ("id", "width", "height", "score", "change"): + post[key] = util.safe_int(post[key]) + url = post["file_url"] + yield Message.Url, url, text.nameext_from_url(url, post) def skip(self, num): self.start_post += num @@ -40,7 +48,7 @@ class GelbooruExtractor(SharedConfigExtractor): return {} def get_posts(self): - """Return an iterable containing all relevant post ids""" + """Return an iterable containing all relevant post objects""" def get_post_data(self, post_id): """Extract metadata of a single post""" @@ -58,14 +66,20 @@ class GelbooruExtractor(SharedConfigExtractor): (None , '
  • Score: ', ''), ("score" , '>', '<'), ("file_url" , '
  • Now Viewing: ", "") self.posts = list(text.extract_iter(page, 'id="p', '"', pos)) + if not name: + raise exception.NotFoundError("pool") + return { "pool": util.safe_int(self.pool_id), "pool_name": text.unescape(name),