diff --git a/gallery_dl/extractor/idolcomplex.py b/gallery_dl/extractor/idolcomplex.py index dcb4a548..0290c18f 100644 --- a/gallery_dl/extractor/idolcomplex.py +++ b/gallery_dl/extractor/idolcomplex.py @@ -1,26 +1,116 @@ # -*- coding: utf-8 -*- -# Copyright 2018-2019 Mike Fährmann +# Copyright 2018-2020 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extract images from https://idol.sankakucomplex.com/""" +"""Extractors for https://idol.sankakucomplex.com/""" -from . import sankaku +from .sankaku import SankakuExtractor +from .common import Message +from .. import text, util, exception +import collections +import random +import time +import re -class IdolcomplexExtractor(sankaku.SankakuExtractor): +class IdolcomplexExtractor(SankakuExtractor): """Base class for idolcomplex extractors""" category = "idolcomplex" cookiedomain = "idol.sankakucomplex.com" + root = "https://" + cookiedomain subdomain = "idol" + def __init__(self, match): + SankakuExtractor.__init__(self, match) + self.logged_in = True + self.start_page = 1 + self.start_post = 0 + self.extags = self.config("tags", False) + self.wait_min = self.config("wait-min", 3.0) + self.wait_max = self.config("wait-max", 6.0) + if self.wait_max < self.wait_min: + self.wait_max = self.wait_min -class IdolcomplexTagExtractor(IdolcomplexExtractor, - sankaku.SankakuTagExtractor): + def items(self): + self.login() + data = self.metadata() + + for post_id in util.advance(self.post_ids(), self.start_post): + self.wait() + post = self._parse_post(post_id) + url = post["file_url"] + post.update(data) + text.nameext_from_url(url, post) + yield Message.Directory, post + yield Message.Url, url, post + + def skip(self, num): + self.start_post += num + return num + + def post_ids(self): + """Return an iterable containing all relevant post ids""" + + def _parse_post(self, post_id): + """Extract metadata of a single post""" + url = self.root + "/post/show/" + post_id + page = self.request(url, retries=10).text + extr = text.extract + + tags , pos = extr(page, "