[danbooru] rewrite to use multiple extractors

pull/13/head
Mike Fährmann 9 years ago
parent 8c3783706d
commit 80917f5392
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -12,20 +12,17 @@ from .common import Extractor, Message
from .. import text
import xml.etree.ElementTree as ET
import json
import os.path
import urllib.parse
class BooruExtractor(Extractor):
info = {}
api_url = ""
def __init__(self, match, info):
def __init__(self):
Extractor.__init__(self)
self.info = info
self.tags = text.unquote(match.group(1))
self.page = "page"
self.params = {"tags": self.tags}
self.headers = {}
self.headers = self.params = {}
def items(self):
yield Message.Version, 1
@ -51,9 +48,9 @@ class BooruExtractor(Extractor):
def get_job_metadata(self):
"""Collect metadata for extractor-job"""
# Override this method in derived classes
return {
"category": self.info["category"],
"tags": self.tags
}
def get_file_metadata(self, data):

@ -9,19 +9,29 @@
"""Extract image-urls from https://danbooru.donmai.us/"""
from .booru import JSONBooruExtractor
from .. import text
info = {
"category": "danbooru",
"extractor": "DanbooruExtractor",
"directory": ["{category}", "{tags}"],
"filename": "{category}_{id}_{md5}.{extension}",
"pattern": [
r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts\?(?:utf8=%E2%9C%93&)?tags=([^&]+).*",
],
}
class DanbooruTagExtractor(JSONBooruExtractor):
"""Extract images bsaed on search-tags"""
class DanbooruExtractor(JSONBooruExtractor):
info = {
"category": "danbooru",
"directory": ["{category}", "{tags}"],
"filename": "{category}_{id}_{md5}.{extension}",
}
pattern = [
r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts\?(?:utf8=%E2%9C%93&)?tags=([^&]+)",
]
def __init__(self, match):
JSONBooruExtractor.__init__(self, match, info)
JSONBooruExtractor.__init__(self)
self.api_url = "https://danbooru.donmai.us/posts.json"
self.tags = text.unquote(match.group(1))
self.params = {"tags": self.tags}
def get_job_metadata(self):
"""Collect metadata for extractor-job"""
return {
"category": self.info["category"],
"tags": self.tags,
}

Loading…
Cancel
Save