[danbooru] rewrite to use multiple extractors

9 years ago · 80917f5392
parent 8c3783706d
commit 80917f5392
2 changed files with 25 additions and 18 deletions
--- a/gallery_dl/extractor/booru.py
+++ b/gallery_dl/extractor/booru.py
@ -12,20 +12,17 @@ from .common import Extractor, Message
 from .. import text
 import xml.etree.ElementTree as ET
 import json
-import os.path
 import urllib.parse

 class BooruExtractor(Extractor):

+    info = {}
    api_url = ""

-    def __init__(self, match, info):
+    def __init__(self):
        Extractor.__init__(self)
-        self.info = info
-        self.tags = text.unquote(match.group(1))
        self.page = "page"
-        self.params = {"tags": self.tags}
-        self.headers = {}
+        self.headers = self.params = {}

    def items(self):
        yield Message.Version, 1
@ -51,9 +48,9 @@ class BooruExtractor(Extractor):

    def get_job_metadata(self):
        """Collect metadata for extractor-job"""
+        # Override this method in derived classes
        return {
            "category": self.info["category"],
-            "tags": self.tags
        }

    def get_file_metadata(self, data):
--- a/gallery_dl/extractor/danbooru.py
+++ b/gallery_dl/extractor/danbooru.py
@ -9,19 +9,29 @@
 """Extract image-urls from https://danbooru.donmai.us/"""

 from .booru import JSONBooruExtractor
+from .. import text

-info = {
+class DanbooruTagExtractor(JSONBooruExtractor):
+    """Extract images bsaed on search-tags"""
+
+    info = {
        "category": "danbooru",
-    "extractor": "DanbooruExtractor",
        "directory": ["{category}", "{tags}"],
        "filename": "{category}_{id}_{md5}.{extension}",
-    "pattern": [
-        r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts\?(?:utf8=%E2%9C%93&)?tags=([^&]+).*",
-    ],
-}
-
-class DanbooruExtractor(JSONBooruExtractor):
+    }
+    pattern = [
+        r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts\?(?:utf8=%E2%9C%93&)?tags=([^&]+)",
+    ]

    def __init__(self, match):
-        JSONBooruExtractor.__init__(self, match, info)
+        JSONBooruExtractor.__init__(self)
        self.api_url = "https://danbooru.donmai.us/posts.json"
+        self.tags = text.unquote(match.group(1))
+        self.params = {"tags": self.tags}
+
+    def get_job_metadata(self):
+        """Collect metadata for extractor-job"""
+        return {
+            "category": self.info["category"],
+            "tags": self.tags,
+        }