# -*- coding: utf-8 -*- # Copyright 2018-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://idol.sankakucomplex.com/""" from .sankaku import SankakuExtractor from .common import Message from ..cache import cache from .. import text, util, exception import collections import re BASE_PATTERN = r"(?:https?://)?idol\.sankakucomplex\.com(?:/[a-z]{2})?" class IdolcomplexExtractor(SankakuExtractor): """Base class for idolcomplex extractors""" category = "idolcomplex" root = "https://idol.sankakucomplex.com" cookies_domain = "idol.sankakucomplex.com" cookies_names = ("_idolcomplex_session",) referer = False request_interval = (3.0, 6.0) def __init__(self, match): SankakuExtractor.__init__(self, match) self.logged_in = True self.start_page = 1 self.start_post = 0 def _init(self): self.find_pids = re.compile( r" href=[\"#]/\w\w/posts/(\w+)" ).findall self.find_tags = re.compile( r'tag-type-([^"]+)">\s*]*?href="/[^?]*\?tags=([^"]+)' ).findall def items(self): self.login() data = self.metadata() for post_id in util.advance(self.post_ids(), self.start_post): post = self._extract_post(post_id) url = post["file_url"] post.update(data) text.nameext_from_url(url, post) yield Message.Directory, post yield Message.Url, url, post def skip(self, num): self.start_post += num return num def post_ids(self): """Return an iterable containing all relevant post ids""" def login(self): if self.cookies_check(self.cookies_names): return username, password = self._get_auth_info() if username: return self.cookies_update(self._login_impl(username, password)) self.logged_in = False @cache(maxage=90*86400, keyarg=1) def _login_impl(self, username, password): self.log.info("Logging in as %s", username) url = self.root + "/users/login" page = self.request(url).text headers = { "Referer": url, } url = self.root + (text.extr(page, '

") vcnt = extr('>Votes:', "<") pid = extr(">Post ID:", "<") created = extr(' title="', '"') file_url = extr('>Original:', 'id=') if file_url: file_url = extr(' href="', '"') width = extr(">", "x") height = extr("", " ") else: width = extr('