[booru] add base classes for tag/pool/post extractors

pull/13/head
Mike Fährmann 9 years ago
parent 900577b013
commit 74199422a8
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -22,7 +22,8 @@ class BooruExtractor(Extractor):
def __init__(self):
Extractor.__init__(self)
self.page = "page"
self.headers = self.params = {}
self.params = {"limit": 50}
self.headers = {}
def items(self):
yield Message.Version, 1
@ -50,12 +51,12 @@ class BooruExtractor(Extractor):
"""Collect metadata for extractor-job"""
# Override this method in derived classes
return {
"category": self.info["category"],
"category": self.category,
}
def get_file_metadata(self, data):
"""Collect metadata for a downloadable file"""
data["category"] = self.info["category"]
data["category"] = self.category
return text.nameext_from_url(self.get_file_url(data), data)
def get_file_url(self, data):
@ -75,10 +76,10 @@ class JSONBooruExtractor(BooruExtractor):
self.request(self.api_url, verify=True, params=self.params,
headers=self.headers).text
)
if len(images) == 0:
return
for data in images:
yield data
if len(images) < self.params["limit"]:
return
self.update_page()
@ -90,8 +91,56 @@ class XMLBooruExtractor(BooruExtractor):
root = ET.fromstring(
self.request(self.api_url, verify=True, params=self.params).text
)
if len(root) == 0:
return
for item in root:
yield item.attrib
if len(root) < self.params["limit"]:
return
self.update_page()
class BooruTagExtractor(BooruExtractor):
"""Extract images based on search-tags"""
directory_fmt = ["{category}", "{tags}"]
filename_fmt = "{category}_{id}_{md5}.{extension}"
def __init__(self, match):
BooruExtractor.__init__(self)
self.tags = text.unquote(match.group(1))
self.params["tags"] = self.tags
def get_job_metadata(self):
return {
"category": self.category,
"tags": self.tags,
}
class BooruPoolExtractor(BooruExtractor):
"""Extract image-pools"""
directory_fmt = ["{category}", "pool", "{pool}"]
filename_fmt = "{category}_{id}_{md5}.{extension}"
def __init__(self, match):
BooruExtractor.__init__(self)
self.pool = match.group(1)
self.params["tags"] = "pool:" + self.pool
def get_job_metadata(self):
return {
"category": self.category,
"pool": self.pool,
}
class BooruPostExtractor(BooruExtractor):
"""Extract single images"""
directory_fmt = ["{category}"]
filename_fmt = "{category}_{id}_{md5}.{extension}"
def __init__(self, match):
BooruExtractor.__init__(self)
self.post = match.group(1)
self.params["tags"] = "id:" + self.post

Loading…
Cancel
Save