diff --git a/CHANGELOG.md b/CHANGELOG.md index baadca74..a1a53bf1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,6 +1,7 @@ # Changelog ## Unreleased +- Added pool- and post-extractors for `sankaku` ## 1.1.0 - 2017-12-08 - Added the ``-r/--limit-rate`` command-line option to set a maximum download rate diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 86deecdb..a619d834 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -62,7 +62,7 @@ RebeccaBlackTech https://rbt.asia/ Threads Reddit https://reddit.com/ Submissions, Subreddits Optional (OAuth) Rule 34 https://rule34.xxx/ Posts, Tag-Searches Safebooru https://safebooru.org/ Posts, Tag-Searches -Sankaku Channel https://chan.sankakucomplex.com/ Tag-Searches Optional +Sankaku Channel https://chan.sankakucomplex.com/ Pools, Posts, Tag-Searches Optional Sea Otter Scans https://reader.seaotterscans.com/ Chapters, Manga Sen Manga http://raw.senmanga.com/ Chapters Sense-Scans http://sensescans.com/ Chapters, Manga diff --git a/gallery_dl/extractor/flickr.py b/gallery_dl/extractor/flickr.py index 82946c7e..723b19a2 100644 --- a/gallery_dl/extractor/flickr.py +++ b/gallery_dl/extractor/flickr.py @@ -112,7 +112,7 @@ class FlickrAlbumExtractor(FlickrExtractor): test = [(("https://www.flickr.com/photos/" "shona_s/albums/72157633471741607"), { "url": "baf4a3d1b15afcecf9638000a12c0eb3d5df9024", - "keyword": "40605c5b22feafcd029d1121f4dc8786b0aa7dcc", + "keyword": "3a99f962f30691dc1b2da46be56fe1b7768fe707", })] def __init__(self, match): diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index a96376d3..4aec5313 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -27,6 +27,7 @@ class SankakuExtractor(SharedConfigExtractor): def __init__(self): SharedConfigExtractor.__init__(self) self.logged_in = True + self.start_page = 1 self.start_post = 0 self.wait_min = self.config("wait-min", 2) self.wait_max = self.config("wait-max", 4) @@ -146,7 +147,6 @@ class SankakuTagExtractor(SankakuExtractor): def __init__(self, match): SankakuExtractor.__init__(self) self.tags = text.unquote(match.group(1).replace("+", " ")) - self.start_page = 1 def skip(self, num): pages, posts = divmod(num, self.per_page) @@ -186,6 +186,44 @@ class SankakuTagExtractor(SankakuExtractor): "point onwards after setting up an account.)") +class SankakuPoolExtractor(SankakuExtractor): + """Extractor for image-pools from chan.sankakucomplex.com""" + subcategory = "pool" + directory_fmt = ["{category}", "pool", "{pool}"] + pattern = [r"(?:https?://)?chan\.sankakucomplex\.com/pool/show/(\d+)"] + test = [("https://chan.sankakucomplex.com/pool/show/90", { + "count": 5, + })] + per_page = 24 + + def __init__(self, match): + SankakuExtractor.__init__(self) + self.pool_id = match.group(1) + + def skip(self, num): + pages, posts = divmod(num, self.per_page) + self.start_page += pages + self.start_post += posts + return num + + def get_metadata(self): + return {"pool": self.pool_id} + + def get_posts(self): + url = self.root + "/pool/show/" + self.pool_id + params = {"page": self.start_page} + + while True: + page = self.request(url, params=params, retries=10).text + ids = list(text.extract_iter(page, '" id=p', '>')) + + yield from ids + if len(ids) < self.per_page: + return + + params["page"] += 1 + + class SankakuPostExtractor(SankakuExtractor): """Extractor for single images from chan.sankakucomplex.com""" subcategory = "post"