diff --git a/docs/configuration.rst b/docs/configuration.rst index d9164102..8b3a393f 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -617,6 +617,25 @@ Description A (comma-separated) list of post types to extract images, etc. from. =========== ===== +extractor.yandere.tags +---------------------- +=========== ===== +Type ``bool`` +Default ``false`` +Description Split tags into different categories + and provide the following additional metadata-entries: + + - ``tags_artist`` + - ``tags_character`` + - ``tags_circle`` + - ``tags_copyright`` + - ``tags_faults`` + - ``tags_general`` + + Note: This requires 1 additional HTTP request for each post. +=========== ===== + + Downloader Options ================== diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 9763e977..b49d0e1b 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -104,6 +104,10 @@ "inline": false, "posts": "photo", "reblogs": true + }, + "yandere": + { + "tags": false } }, diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py index 0113f62c..922d8201 100644 --- a/gallery_dl/extractor/booru.py +++ b/gallery_dl/extractor/booru.py @@ -53,6 +53,7 @@ class BooruExtractor(SharedConfigExtractor): if url.startswith("/"): url = text.urljoin(self.api_url, url) image.update(data) + self.prepare(image) yield Message.Url, url, text.nameext_from_url(url, image) except KeyError: continue @@ -80,6 +81,9 @@ class BooruExtractor(SharedConfigExtractor): """Collect metadata for extractor-job""" return {} + def prepare(self, image): + """Prepare and modify an 'image' object""" + class XmlParserMixin(): """Mixin for XML based API responses""" diff --git a/gallery_dl/extractor/yandere.py b/gallery_dl/extractor/yandere.py index 29f87604..62146ce3 100644 --- a/gallery_dl/extractor/yandere.py +++ b/gallery_dl/extractor/yandere.py @@ -9,6 +9,7 @@ """Extract images from https://yande.re/""" from . import booru +from .. import text class YandereExtractor(booru.MoebooruPageMixin, booru.BooruExtractor): @@ -16,6 +17,30 @@ class YandereExtractor(booru.MoebooruPageMixin, booru.BooruExtractor): category = "yandere" api_url = "https://yande.re/post.json" + def __init__(self, match): + super().__init__(match) + if self.config("tags", False): + self.prepare = self._categorize_tags + + def _categorize_tags(self, image): + url = "https://yande.re/post/show/{}".format(image["id"]) + page = self.request(url).text + taghtml = text.extract(page, '')[0] + + pos = 0 + tags = {"artist": [], "copyright": [], "character": [], + "circle": [], "faults": [], "general": []} + + while True: + tagtype, pos = text.extract(taghtml, "tag-type-", '"', pos) + if not tagtype: + break + tagname, pos = text.extract(taghtml, "?tags=", '"', pos) + tags[tagtype].append(text.unquote(tagname)) + + for key, value in tags.items(): + image["tags_" + key] = " ".join(value) + class YandereTagExtractor(booru.TagMixin, YandereExtractor): """Extractor for images from yande.re based on search-tags""" @@ -39,6 +64,15 @@ class YanderePostExtractor(booru.PostMixin, YandereExtractor): pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post/show/(?P\d+)"] test = [("https://yande.re/post/show/51824", { "content": "59201811c728096b2d95ce6896fd0009235fe683", + "options": (("tags", True),), + "keyword": { + "tags_artist": "sasaki_tamaru", + "tags_circle": "softhouse_chara", + "tags_copyright": "ouzoku", + "tags_character": str, + "tags_faults": str, + "tags_general": str, + }, })]