[sankaku] add pool extractor

pull/54/head
Mike Fährmann 7 years ago
parent e52f0cc1ed
commit 19a6ae57b2
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -1,6 +1,7 @@
# Changelog # Changelog
## Unreleased ## Unreleased
- Added pool- and post-extractors for `sankaku`
## 1.1.0 - 2017-12-08 ## 1.1.0 - 2017-12-08
- Added the ``-r/--limit-rate`` command-line option to set a maximum download rate - Added the ``-r/--limit-rate`` command-line option to set a maximum download rate

@ -62,7 +62,7 @@ RebeccaBlackTech https://rbt.asia/ Threads
Reddit https://reddit.com/ Submissions, Subreddits Optional (OAuth) Reddit https://reddit.com/ Submissions, Subreddits Optional (OAuth)
Rule 34 https://rule34.xxx/ Posts, Tag-Searches Rule 34 https://rule34.xxx/ Posts, Tag-Searches
Safebooru https://safebooru.org/ Posts, Tag-Searches Safebooru https://safebooru.org/ Posts, Tag-Searches
Sankaku Channel https://chan.sankakucomplex.com/ Tag-Searches Optional Sankaku Channel https://chan.sankakucomplex.com/ Pools, Posts, Tag-Searches Optional
Sea Otter Scans https://reader.seaotterscans.com/ Chapters, Manga Sea Otter Scans https://reader.seaotterscans.com/ Chapters, Manga
Sen Manga http://raw.senmanga.com/ Chapters Sen Manga http://raw.senmanga.com/ Chapters
Sense-Scans http://sensescans.com/ Chapters, Manga Sense-Scans http://sensescans.com/ Chapters, Manga

@ -112,7 +112,7 @@ class FlickrAlbumExtractor(FlickrExtractor):
test = [(("https://www.flickr.com/photos/" test = [(("https://www.flickr.com/photos/"
"shona_s/albums/72157633471741607"), { "shona_s/albums/72157633471741607"), {
"url": "baf4a3d1b15afcecf9638000a12c0eb3d5df9024", "url": "baf4a3d1b15afcecf9638000a12c0eb3d5df9024",
"keyword": "40605c5b22feafcd029d1121f4dc8786b0aa7dcc", "keyword": "3a99f962f30691dc1b2da46be56fe1b7768fe707",
})] })]
def __init__(self, match): def __init__(self, match):

@ -27,6 +27,7 @@ class SankakuExtractor(SharedConfigExtractor):
def __init__(self): def __init__(self):
SharedConfigExtractor.__init__(self) SharedConfigExtractor.__init__(self)
self.logged_in = True self.logged_in = True
self.start_page = 1
self.start_post = 0 self.start_post = 0
self.wait_min = self.config("wait-min", 2) self.wait_min = self.config("wait-min", 2)
self.wait_max = self.config("wait-max", 4) self.wait_max = self.config("wait-max", 4)
@ -146,7 +147,6 @@ class SankakuTagExtractor(SankakuExtractor):
def __init__(self, match): def __init__(self, match):
SankakuExtractor.__init__(self) SankakuExtractor.__init__(self)
self.tags = text.unquote(match.group(1).replace("+", " ")) self.tags = text.unquote(match.group(1).replace("+", " "))
self.start_page = 1
def skip(self, num): def skip(self, num):
pages, posts = divmod(num, self.per_page) pages, posts = divmod(num, self.per_page)
@ -186,6 +186,44 @@ class SankakuTagExtractor(SankakuExtractor):
"point onwards after setting up an account.)") "point onwards after setting up an account.)")
class SankakuPoolExtractor(SankakuExtractor):
"""Extractor for image-pools from chan.sankakucomplex.com"""
subcategory = "pool"
directory_fmt = ["{category}", "pool", "{pool}"]
pattern = [r"(?:https?://)?chan\.sankakucomplex\.com/pool/show/(\d+)"]
test = [("https://chan.sankakucomplex.com/pool/show/90", {
"count": 5,
})]
per_page = 24
def __init__(self, match):
SankakuExtractor.__init__(self)
self.pool_id = match.group(1)
def skip(self, num):
pages, posts = divmod(num, self.per_page)
self.start_page += pages
self.start_post += posts
return num
def get_metadata(self):
return {"pool": self.pool_id}
def get_posts(self):
url = self.root + "/pool/show/" + self.pool_id
params = {"page": self.start_page}
while True:
page = self.request(url, params=params, retries=10).text
ids = list(text.extract_iter(page, '" id=p', '>'))
yield from ids
if len(ids) < self.per_page:
return
params["page"] += 1
class SankakuPostExtractor(SankakuExtractor): class SankakuPostExtractor(SankakuExtractor):
"""Extractor for single images from chan.sankakucomplex.com""" """Extractor for single images from chan.sankakucomplex.com"""
subcategory = "post" subcategory = "post"

Loading…
Cancel
Save