diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 2f40592d..2f88f2b5 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -48,6 +48,7 @@ imgur https://imgur.com/ Albums, individual Imag Instagram https://www.instagram.com/ Images from Users, individual Images, Tag-Searches Jaimini's Box https://jaiminisbox.com/reader/ Chapters, Manga Joyreactor http://joyreactor.cc/ |joyreactor-C| +Keenspot http://www.keenspot.com/ Comics Khinsider https://downloads.khinsider.com/ Soundtracks Kirei Cake https://reader.kireicake.com/ Chapters, Manga KissManga https://kissmanga.com/ Chapters, Manga diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 3ee6d3db..ea3c8bfe 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -44,6 +44,7 @@ modules = [ "imgth", "imgur", "instagram", + "keenspot", "khinsider", "kissmanga", "komikcast", diff --git a/gallery_dl/extractor/keenspot.py b/gallery_dl/extractor/keenspot.py new file mode 100644 index 00000000..1bcb72b5 --- /dev/null +++ b/gallery_dl/extractor/keenspot.py @@ -0,0 +1,103 @@ +# -*- coding: utf-8 -*- + +# Copyright 2019 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for http://www.keenspot.com/""" + +from .common import Extractor, Message +from .. import text + + +class KeenspotComicExtractor(Extractor): + """Extractor for webcomics from keenspot.com""" + category = "keenspot" + subcategory = "comic" + directory_fmt = ("{category}", "{comic}") + filename_fmt = "{filename}.{extension}" + archive_fmt = "{comic}_{filename}" + pattern = r"(?:https?://)?(?!www\.|forums\.)([^.]+)\.keenspot\.com" + test = ( + ("http://marksmen.keenspot.com/", { # link + "range": "1-3", + "url": "83bcf029103bf8bc865a1988afa4aaeb23709ba6", + }), + ("http://barkercomic.keenspot.com/", { # id + "range": "1-3", + "url": "c4080926db18d00bac641fdd708393b7d61379e6", + }), + ("http://crowscare.keenspot.com/", { # id v2 + "range": "1-3", + "url": "a00e66a133dd39005777317da90cef921466fcaa" + }), + ("http://supernovas.keenspot.com/", { # ks + "range": "1-3", + "url": "de21b12887ef31ff82edccbc09d112e3885c3aab" + }), + ) + + def __init__(self, match): + Extractor.__init__(self, match) + self._next = None + self.comic = match.group(1) + self.root = "http://" + self.comic + ".keenspot.com" + + def items(self): + data = {"comic": self.comic} + yield Message.Version, 1 + yield Message.Directory, data + + url = self._first(self.request(self.root + "/").text) + while url: + if url[0] == "/": + url = self.root + url + page = self.request(url).text + + for img in text.extract_iter(page, 'class="ksc"', '>'): + img = text.extract(img, 'src="', '"')[0] + if img[0] == "/": + img = self.root + img + yield Message.Url, img, text.nameext_from_url(img, data) + + url = self._next(page) + + def _first(self, page): + url = text.extract(page, '= 0: + self._next = self._next_id + return text.rextract(page, 'FIRST PAGE<') + if pos >= 0: + self._next = self._next_id + return text.rextract(page, '= 0: + self._next = self._next_ks + return text.extract(page, 'href="', '"', pos)[0] + + self.log.error("Unrecognized page layout") + return None + + @staticmethod + def _next_link(page): + return text.extract(page, '= 0 else None + + @staticmethod + def _next_ks(page): + pos = page.index('