[sankaku] reimplement login support (#1176, #1182)

pull/1195/head
Mike Fährmann 4 years ago
parent 8d2e4e5f13
commit 465015f75a
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -225,6 +225,7 @@ and optional for
``instagram``,
``luscious``,
``pinterest``,
``sankaku``,
``subscribestar``,
``tsumino``,
and ``twitter``.

@ -286,6 +286,7 @@ Description
* ``instagram``
* ``luscious``
* ``pinterest``
* ``sankaku``
* ``subscribestar``
* ``tsumino``
* ``twitter``

@ -112,7 +112,7 @@ rule #34 https://rule34.paheal.net/ Posts, Tag Searches
Rule 34 https://rule34.xxx/ Pools, Posts, Tag Searches
Safebooru https://safebooru.org/ Pools, Posts, Tag Searches
Sakugabooru https://www.sakugabooru.com/ Pools, Popular Images, Posts, Tag Searches
Sankaku Channel https://chan.sankakucomplex.com/ Pools, Posts, Tag Searches
Sankaku Channel https://beta.sankakucomplex.com/ Pools, Posts, Tag Searches Supported
Sankaku Complex https://www.sankakucomplex.com/ Articles, Tag Searches
Sen Manga https://raw.senmanga.com/ Chapters
Sense-Scans https://sensescans.com/reader/ Chapters, Manga

@ -6,10 +6,11 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://chan.sankakucomplex.com/"""
"""Extractors for https://beta.sankakucomplex.com/"""
from .booru import BooruExtractor
from .. import text, exception
from ..cache import cache
import collections
BASE_PATTERN = r"(?:https?://)?(?:beta|chan)\.sankakucomplex\.com"
@ -20,8 +21,9 @@ class SankakuExtractor(BooruExtractor):
basecategory = "booru"
category = "sankaku"
filename_fmt = "{category}_{id}_{md5}.{extension}"
request_interval_min = 1.0
cookiedomain = None
per_page = 100
_warning = True
TAG_TYPES = {
0: "general",
@ -38,6 +40,10 @@ class SankakuExtractor(BooruExtractor):
def _prepare_post(self, post, extended_tags=False):
url = post["file_url"]
if not url and self._warning:
self.log.warning(
"Login required to download 'contentious_content' posts")
SankakuExtractor._warning = False
if extended_tags:
self._fetch_extended_tags(post)
post["date"] = text.parse_timestamp(post["created_at"]["s"])
@ -52,31 +58,6 @@ class SankakuExtractor(BooruExtractor):
for key, value in tags.items():
post["tags_" + key] = value
def _api_request(self, endpoint, params=None):
url = "https://capi-v2.sankakucomplex.com" + endpoint
while True:
response = self.request(url, params=params, fatal=False)
if response.status_code == 429:
self.wait(until=response.headers.get("X-RateLimit-Reset"))
continue
return response.json()
def _pagination(self, params):
params["lang"] = "en"
params["limit"] = str(self.per_page)
while True:
data = self._api_request("/posts/keyset", params)
if not data.get("success", True):
raise exception.StopExtraction(data.get("code"))
yield from data["data"]
params["next"] = data["meta"]["next"]
if not params["next"]:
return
if "page" in params:
del params["page"]
class SankakuTagExtractor(SankakuExtractor):
"""Extractor for images from chan.sankakucomplex.com by search-tags"""
@ -109,7 +90,7 @@ class SankakuTagExtractor(SankakuExtractor):
return {"search_tags": self.tags}
def posts(self):
return self._pagination({"tags": self.tags})
return SankakuAPI(self).posts_keyset({"tags": self.tags})
class SankakuPoolExtractor(SankakuExtractor):
@ -130,7 +111,7 @@ class SankakuPoolExtractor(SankakuExtractor):
self.pool_id = match.group(1)
def metadata(self):
pool = self._api_request("/pools/" + self.pool_id)
pool = SankakuAPI(self).pools(self.pool_id)
self._posts = pool.pop("posts")
return {"pool": pool}
@ -156,6 +137,11 @@ class SankakuPostExtractor(SankakuExtractor):
"tags_general" : list,
},
}),
# 'contentious_content'
("https://beta.sankakucomplex.com/post/show/21418978", {
"pattern": r"https://s\.sankakucomplex\.com"
r"/data/13/3c/133cda3bfde249c504284493903fb985\.jpg",
}),
("https://chan.sankakucomplex.com/post/show/360451"),
)
@ -164,4 +150,116 @@ class SankakuPostExtractor(SankakuExtractor):
self.post_id = match.group(1)
def posts(self):
return self._pagination({"tags": "id:" + self.post_id})
return SankakuAPI(self).posts_keyset({"tags": "id:" + self.post_id})
class SankakuAPI():
"""Interface for the beta.sankakucomplex.com API"""
def __init__(self, extractor):
self.extractor = extractor
self.headers = {"Accept": "application/vnd.sankaku.api+json;v=2"}
self.username, self.password = self.extractor._get_auth_info()
if not self.username:
self.authenticate = lambda: None
def pools(self, pool_id):
return self._call("/pools/" + pool_id)
def posts_keyset(self, params):
return self._pagination("/posts/keyset", params)
def authenticate(self):
self.headers["Authorization"] = \
_authenticate_impl(self.extractor, self.username, self.password)
def _call(self, endpoint, params=None):
url = "https://capi-v2.sankakucomplex.com" + endpoint
for _ in range(5):
self.authenticate()
response = self.extractor.request(
url, params=params, headers=self.headers, fatal=False)
if response.status_code == 429:
self.extractor.wait(
until=response.headers.get("X-RateLimit-Reset"))
continue
data = response.json()
if not data.get("success", True):
code = data.get("code")
if code == "invalid_token":
_authenticate_impl.invalidate(self.username)
continue
raise exception.StopExtraction(code)
return data
def _pagination(self, endpoint, params):
params["lang"] = "en"
params["limit"] = str(self.extractor.per_page)
while True:
data = self._call(endpoint, params)
yield from data["data"]
params["next"] = data["meta"]["next"]
if not params["next"]:
return
if "page" in params:
del params["page"]
@cache(maxage=365*24*3600, keyarg=2)
def _authenticate_impl(extr, username, password):
extr.log.info("Logging in as %s", username)
headers = {"Accept": "application/vnd.sankaku.api+json;v=2"}
# get initial access_token
url = "https://login.sankakucomplex.com/auth/token"
data = {"login": username, "password": password}
response = extr.request(
url, method="POST", headers=headers, json=data, fatal=False)
data = response.json()
if response.status_code >= 400 or not data.get("success"):
raise exception.AuthenticationError(data.get("error"))
access_token = data["access_token"]
# start openid auth
url = "https://login.sankakucomplex.com/oidc/auth"
params = {
"response_type": "code",
"scope" : "openid",
"client_id" : "sankaku-web-app",
"redirect_uri" : "https://beta.sankakucomplex.com/sso/callback",
"state" : "return_uri=https://beta.sankakucomplex.com/",
"theme" : "black",
"lang" : "undefined",
}
page = extr.request(url, params=params).text
submit_url = text.extract(page, 'submitUrl = "', '"')[0]
# get code from initial access_token
url = "https://login.sankakucomplex.com" + submit_url
data = {
"accessToken": access_token,
"nonce" : "undefined",
}
response = extr.request(url, method="POST", data=data)
query = text.parse_query(response.request.url.partition("?")[2])
# get final access_token from code
url = "https://capi-v2.sankakucomplex.com/sso/finalize?lang=en"
data = {
"code" : query["code"],
"client_id" : "sankaku-web-app",
"redirect_uri": "https://beta.sankakucomplex.com/sso/callback",
}
response = extr.request(
url, method="POST", headers=headers, json=data, fatal=False)
data = response.json()
if response.status_code >= 400 or not data.get("success"):
raise exception.AuthenticationError(data.get("error"))
return "Bearer " + data["access_token"]

@ -181,6 +181,7 @@ AUTH_MAP = {
"pinterest" : "Supported",
"pixiv" : "Required",
"reddit" : _OAUTH,
"sankaku" : "Supported",
"seiga" : "Required",
"smugmug" : _OAUTH,
"subscribestar" : "Supported",

Loading…
Cancel
Save