[agnph] implement 'tags' option (#5284)

2 months ago · 279854cd9e
parent d2dda2bc00
commit 279854cd9e
2 changed files with 35 additions and 0 deletions
--- a/gallery_dl/extractor/agnph.py
+++ b/gallery_dl/extractor/agnph.py
@ -12,6 +12,8 @@ from . import booru
 from .. import text

 from xml.etree import ElementTree
+import collections
+import re

 BASE_PATTERN = r"(?:https?://)?agn\.ph"

@ -22,6 +24,17 @@ class AgnphExtractor(booru.BooruExtractor):
    page_start = 1
    per_page = 45

+    TAG_TYPES = {
+        "a": "artist",
+        "b": "copyright",
+        "c": "character",
+        "d": "species",
+        "m": "general",
+    }
+
+    def _init(self):
+        self.cookies.set("confirmed_age", "true", domain="agn.ph")
+
    def _prepare(self, post):
        post["date"] = text.parse_timestamp(post["created_at"])
        post["status"] = post["status"].strip()
@ -50,6 +63,23 @@ class AgnphExtractor(booru.BooruExtractor):

            params["page"] += 1

+    def _html(self, post):
+        url = "{}/gallery/post/show/{}/".format(self.root, post["id"])
+        return self.request(url).text
+
+    def _tags(self, post, page):
+        tag_container = text.extr(
+            page, '<ul class="taglist">', '<h3>Statistics</h3>')
+        if not tag_container:
+            return
+
+        tags = collections.defaultdict(list)
+        pattern = re.compile(r'class="(.)typetag">([^<]+)')
+        for tag_type, tag_name in pattern.findall(tag_container):
+            tags[tag_type].append(text.unquote(tag_name).replace(" ", "_"))
+        for key, value in tags.items():
+            post["tags_" + self.TAG_TYPES[key]] = " ".join(value)
+

 class AgnphTagExtractor(AgnphExtractor):
    subcategory = "tag"
--- a/test/results/agnph.py
+++ b/test/results/agnph.py
@ -20,6 +20,7 @@ __tests__ = (
    "#url"     : "https://agn.ph/gallery/post/show/501604/",
    "#category": ("booru", "agnph", "post"),
    "#class"   : agnph.AgnphPostExtractor,
+    "#options" : {"tags": True},
    "#urls"        : "http://agn.ph/gallery/data/7d/a5/7da50021f3e86f6cf1c215652060d772.png",
    "#sha1_content": "93c8b2d3f53e891ad8fa68d5f60f8c7a70acd836",

@ -41,6 +42,10 @@ __tests__ = (
    "source"      : "https://inkbunny.net/s/2886519",
    "status"      : "approved",
    "tags"        : "anthro female hisuian_sneasel regional_form reyn_goldfur shelly_the_sneasel sneasel solo",
+    "tags_artist" : "reyn_goldfur",
+    "tags_character": "shelly_the_sneasel",
+    "tags_general": "anthro female solo",
+    "tags_species": "hisuian_sneasel regional_form sneasel",
    "thumbnail_url": "http://agn.ph/gallery/data/thumb/7d/a5/7da50021f3e86f6cf1c215652060d772.png",
    "width"       : "953",