[sankaku] add 'tags' option (#94)

pull/133/head
Mike Fährmann 6 years ago
parent 173add6935
commit 269dc2bbd5
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -617,19 +617,7 @@ Description A (comma-separated) list of post types to extract images, etc. from.
=========== =====
extractor.3dbooru.tags
----------------------
extractor.e621.tags
-------------------
extractor.gelbooru.tags
-----------------------
extractor.konachan.tags
-----------------------
extractor.rule34.tags
---------------------
extractor.safebooru.tags
------------------------
extractor.yandere.tags
extractor.[booru].tags
----------------------
=========== =====
Type ``bool``

@ -48,5 +48,12 @@ class IdolcomplexPostExtractor(IdolcomplexExtractor,
pattern = [r"(?:https?://)?idol\.sankakucomplex\.com/post/show/(\d+)"]
test = [("https://idol.sankakucomplex.com/post/show/694215", {
"content": "694ec2491240787d75bf5d0c75d0082b53a85afd",
"count": 1,
"options": (("tags", True),),
"keyword": {
"tags_character": "shani_(the_witcher)",
"tags_copyright": "the_witcher",
"tags_idol": "lyumos",
"tags_medium": "3:2_aspect_ratio cosplay",
"tags_general": str,
},
})]

@ -11,8 +11,10 @@
from .common import SharedConfigExtractor, Message
from .. import text, util, exception
from ..cache import cache
import time
import collections
import random
import time
import re
class SankakuExtractor(SharedConfigExtractor):
@ -30,6 +32,7 @@ class SankakuExtractor(SharedConfigExtractor):
self.logged_in = True
self.start_page = 1
self.start_post = 0
self.extags = self.config("tags", False)
self.wait_min = self.config("wait-min", 2.5)
self.wait_max = self.config("wait-max", 5.0)
if self.wait_max < self.wait_min:
@ -81,7 +84,7 @@ class SankakuExtractor(SharedConfigExtractor):
height, pos = extr(page, 'height=', '>', pos)
file_url = extr(page, '<embed src="', '"', pos)[0]
return {
data = {
"id": text.parse_int(post_id),
"md5": file_url.rpartition("/")[2].partition(".")[0],
"tags": tags,
@ -94,6 +97,17 @@ class SankakuExtractor(SharedConfigExtractor):
"height": text.parse_int(height),
}
if self.extags:
tags = collections.defaultdict(list)
tags_html = text.extract(page, '<ul id=tag-sidebar>', '</ul>')[0]
pattern = re.compile(r'tag-type-([^>]+)><a href="/\?tags=([^"]+)')
for tag_type, tag_name in pattern.findall(tags_html):
tags[tag_type].append(text.unquote(tag_name))
for key, value in tags.items():
data["tags_" + key] = " ".join(value)
return data
def wait(self):
"""Wait for a randomly chosen amount of seconds"""
time.sleep(random.uniform(self.wait_min, self.wait_max))
@ -261,7 +275,15 @@ class SankakuPostExtractor(SankakuExtractor):
pattern = [r"(?:https?://)?chan\.sankakucomplex\.com/post/show/(\d+)"]
test = [("https://chan.sankakucomplex.com/post/show/360451", {
"content": "5e255713cbf0a8e0801dc423563c34d896bb9229",
"count": 1,
"options": (("tags", True),),
"keyword": {
"tags_artist": "bonocho",
"tags_copyright": "batman_(series) the_dark_knight",
"tags_medium": "sketch copyright_name",
"tags_studio": "dc_comics",
"tags_character": str,
"tags_general": str,
},
})]
def __init__(self, match):

@ -22,6 +22,8 @@ TRAVIS_SKIP = {
# temporary issues, etc.
BROKEN = {
"8chan",
"subapics",
"whatisthisimnotgoodwithcomputers",
}

Loading…
Cancel
Save