[sankaku] add 'tags' option (#94)

pull/133/head
Mike Fährmann 6 years ago
parent 173add6935
commit 269dc2bbd5
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -617,19 +617,7 @@ Description A (comma-separated) list of post types to extract images, etc. from.
=========== ===== =========== =====
extractor.3dbooru.tags extractor.[booru].tags
----------------------
extractor.e621.tags
-------------------
extractor.gelbooru.tags
-----------------------
extractor.konachan.tags
-----------------------
extractor.rule34.tags
---------------------
extractor.safebooru.tags
------------------------
extractor.yandere.tags
---------------------- ----------------------
=========== ===== =========== =====
Type ``bool`` Type ``bool``

@ -48,5 +48,12 @@ class IdolcomplexPostExtractor(IdolcomplexExtractor,
pattern = [r"(?:https?://)?idol\.sankakucomplex\.com/post/show/(\d+)"] pattern = [r"(?:https?://)?idol\.sankakucomplex\.com/post/show/(\d+)"]
test = [("https://idol.sankakucomplex.com/post/show/694215", { test = [("https://idol.sankakucomplex.com/post/show/694215", {
"content": "694ec2491240787d75bf5d0c75d0082b53a85afd", "content": "694ec2491240787d75bf5d0c75d0082b53a85afd",
"count": 1, "options": (("tags", True),),
"keyword": {
"tags_character": "shani_(the_witcher)",
"tags_copyright": "the_witcher",
"tags_idol": "lyumos",
"tags_medium": "3:2_aspect_ratio cosplay",
"tags_general": str,
},
})] })]

@ -11,8 +11,10 @@
from .common import SharedConfigExtractor, Message from .common import SharedConfigExtractor, Message
from .. import text, util, exception from .. import text, util, exception
from ..cache import cache from ..cache import cache
import time import collections
import random import random
import time
import re
class SankakuExtractor(SharedConfigExtractor): class SankakuExtractor(SharedConfigExtractor):
@ -30,6 +32,7 @@ class SankakuExtractor(SharedConfigExtractor):
self.logged_in = True self.logged_in = True
self.start_page = 1 self.start_page = 1
self.start_post = 0 self.start_post = 0
self.extags = self.config("tags", False)
self.wait_min = self.config("wait-min", 2.5) self.wait_min = self.config("wait-min", 2.5)
self.wait_max = self.config("wait-max", 5.0) self.wait_max = self.config("wait-max", 5.0)
if self.wait_max < self.wait_min: if self.wait_max < self.wait_min:
@ -81,7 +84,7 @@ class SankakuExtractor(SharedConfigExtractor):
height, pos = extr(page, 'height=', '>', pos) height, pos = extr(page, 'height=', '>', pos)
file_url = extr(page, '<embed src="', '"', pos)[0] file_url = extr(page, '<embed src="', '"', pos)[0]
return { data = {
"id": text.parse_int(post_id), "id": text.parse_int(post_id),
"md5": file_url.rpartition("/")[2].partition(".")[0], "md5": file_url.rpartition("/")[2].partition(".")[0],
"tags": tags, "tags": tags,
@ -94,6 +97,17 @@ class SankakuExtractor(SharedConfigExtractor):
"height": text.parse_int(height), "height": text.parse_int(height),
} }
if self.extags:
tags = collections.defaultdict(list)
tags_html = text.extract(page, '<ul id=tag-sidebar>', '</ul>')[0]
pattern = re.compile(r'tag-type-([^>]+)><a href="/\?tags=([^"]+)')
for tag_type, tag_name in pattern.findall(tags_html):
tags[tag_type].append(text.unquote(tag_name))
for key, value in tags.items():
data["tags_" + key] = " ".join(value)
return data
def wait(self): def wait(self):
"""Wait for a randomly chosen amount of seconds""" """Wait for a randomly chosen amount of seconds"""
time.sleep(random.uniform(self.wait_min, self.wait_max)) time.sleep(random.uniform(self.wait_min, self.wait_max))
@ -261,7 +275,15 @@ class SankakuPostExtractor(SankakuExtractor):
pattern = [r"(?:https?://)?chan\.sankakucomplex\.com/post/show/(\d+)"] pattern = [r"(?:https?://)?chan\.sankakucomplex\.com/post/show/(\d+)"]
test = [("https://chan.sankakucomplex.com/post/show/360451", { test = [("https://chan.sankakucomplex.com/post/show/360451", {
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229", "content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
"count": 1, "options": (("tags", True),),
"keyword": {
"tags_artist": "bonocho",
"tags_copyright": "batman_(series) the_dark_knight",
"tags_medium": "sketch copyright_name",
"tags_studio": "dc_comics",
"tags_character": str,
"tags_general": str,
},
})] })]
def __init__(self, match): def __init__(self, match):

@ -22,6 +22,8 @@ TRAVIS_SKIP = {
# temporary issues, etc. # temporary issues, etc.
BROKEN = { BROKEN = {
"8chan",
"subapics",
"whatisthisimnotgoodwithcomputers", "whatisthisimnotgoodwithcomputers",
} }

Loading…
Cancel
Save