[yandere] add option to split tags by type (#92)

pull/133/head
Mike Fährmann 6 years ago
parent a699787d01
commit 87853538b4
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -617,6 +617,25 @@ Description A (comma-separated) list of post types to extract images, etc. from.
=========== =====
extractor.yandere.tags
----------------------
=========== =====
Type ``bool``
Default ``false``
Description Split tags into different categories
and provide the following additional metadata-entries:
- ``tags_artist``
- ``tags_character``
- ``tags_circle``
- ``tags_copyright``
- ``tags_faults``
- ``tags_general``
Note: This requires 1 additional HTTP request for each post.
=========== =====
Downloader Options
==================

@ -104,6 +104,10 @@
"inline": false,
"posts": "photo",
"reblogs": true
},
"yandere":
{
"tags": false
}
},

@ -53,6 +53,7 @@ class BooruExtractor(SharedConfigExtractor):
if url.startswith("/"):
url = text.urljoin(self.api_url, url)
image.update(data)
self.prepare(image)
yield Message.Url, url, text.nameext_from_url(url, image)
except KeyError:
continue
@ -80,6 +81,9 @@ class BooruExtractor(SharedConfigExtractor):
"""Collect metadata for extractor-job"""
return {}
def prepare(self, image):
"""Prepare and modify an 'image' object"""
class XmlParserMixin():
"""Mixin for XML based API responses"""

@ -9,6 +9,7 @@
"""Extract images from https://yande.re/"""
from . import booru
from .. import text
class YandereExtractor(booru.MoebooruPageMixin, booru.BooruExtractor):
@ -16,6 +17,30 @@ class YandereExtractor(booru.MoebooruPageMixin, booru.BooruExtractor):
category = "yandere"
api_url = "https://yande.re/post.json"
def __init__(self, match):
super().__init__(match)
if self.config("tags", False):
self.prepare = self._categorize_tags
def _categorize_tags(self, image):
url = "https://yande.re/post/show/{}".format(image["id"])
page = self.request(url).text
taghtml = text.extract(page, '<ul id="tag-sidebar">', '</ul>')[0]
pos = 0
tags = {"artist": [], "copyright": [], "character": [],
"circle": [], "faults": [], "general": []}
while True:
tagtype, pos = text.extract(taghtml, "tag-type-", '"', pos)
if not tagtype:
break
tagname, pos = text.extract(taghtml, "?tags=", '"', pos)
tags[tagtype].append(text.unquote(tagname))
for key, value in tags.items():
image["tags_" + key] = " ".join(value)
class YandereTagExtractor(booru.TagMixin, YandereExtractor):
"""Extractor for images from yande.re based on search-tags"""
@ -39,6 +64,15 @@ class YanderePostExtractor(booru.PostMixin, YandereExtractor):
pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post/show/(?P<post>\d+)"]
test = [("https://yande.re/post/show/51824", {
"content": "59201811c728096b2d95ce6896fd0009235fe683",
"options": (("tags", True),),
"keyword": {
"tags_artist": "sasaki_tamaru",
"tags_circle": "softhouse_chara",
"tags_copyright": "ouzoku",
"tags_character": str,
"tags_faults": str,
"tags_general": str,
},
})]

Loading…
Cancel
Save