# -*- coding: utf-8 -*- # Copyright 2016-2019 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for FoOlSlide based sites""" from .common import Extractor, MangaExtractor, Message, SharedConfigMixin from .. import text, util, config import base64 import json import re class FoolslideBase(SharedConfigMixin): """Base class for FoOlSlide extractors""" basecategory = "foolslide" def request(self, url): return Extractor.request( self, url, encoding="utf-8", method="POST", data={"adult": "true"}) @staticmethod def parse_chapter_url(url, data): info = url.partition("/read/")[2].rstrip("/").split("/") lang = info[1].partition("-")[0] data["lang"] = lang data["language"] = util.code_to_language(lang) data["volume"] = text.parse_int(info[2]) data["chapter"] = text.parse_int(info[3]) data["chapter_minor"] = "." + info[4] if len(info) >= 5 else "" return data class FoolslideChapterExtractor(FoolslideBase, Extractor): """Base class for chapter extractors for FoOlSlide based sites""" subcategory = "chapter" directory_fmt = [ "{category}", "{manga}", "{chapter_string}"] filename_fmt = ( "{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}") archive_fmt = "{id}" decode = "default" def __init__(self, match): Extractor.__init__(self) self.url = self.root + match.group(1) def items(self): page = self.request(self.url).text data = self.get_metadata(page) imgs = self.get_images(page) data["count"] = len(imgs) data["chapter_id"] = text.parse_int(imgs[0]["chapter_id"]) yield Message.Version, 1 yield Message.Directory, data for data["page"], image in enumerate(imgs, 1): try: url = image["url"] del image["url"] del image["chapter_id"] del image["thumb_url"] except KeyError: pass for key in ("height", "id", "size", "width"): image[key] = text.parse_int(image[key]) data.update(image) text.nameext_from_url(data["filename"], data) yield Message.Url, url, data def get_metadata(self, page): """Collect metadata for extractor-job""" _ , pos = text.extract(page, '

', '') manga , pos = text.extract(page, 'title="', '"', pos) chapter, pos = text.extract(page, 'title="', '"', pos) chapter = text.unescape(chapter) return self.parse_chapter_url(self.url, { "manga": text.unescape(manga).strip(), "title": chapter.partition(":")[2].strip(), "chapter_string": chapter, }) def get_images(self, page): """Return a list of all images in this chapter""" if self.decode == "base64": base64_data = text.extract(page, 'atob("', '"')[0].encode() data = base64.b64decode(base64_data).decode() elif self.decode == "double": pos = page.find("[{") data = text.extract(page, " = ", ";", pos)[0] else: data = text.extract(page, "var pages = ", ";")[0] return json.loads(data) class FoolslideMangaExtractor(FoolslideBase, MangaExtractor): """Base class for manga extractors for FoOlSlide based sites""" def init(self, match): MangaExtractor.init(self, match, self.root + match.group(1)) def chapters(self, page): """Return a list of all chapter urls""" manga , pos = text.extract(page, '

', '

') author, pos = text.extract(page, 'Author: ', 'Artist: ', '= 1", "keyword": { "chapter": int, "chapter_minor": str, "chapter_string": str, "group": "PowerManga", "lang": "en", "language": "English", "manga": "One Piece Digital Colour Comics", "title": str, "volume": int, }, }), ], }, "seaotterscans": { "root": "https://reader.seaotterscans.com", "test-chapter": [ ("https://reader.seaotterscans.com/read/100_days/en/0/5/", { "url": "63d46b8883cc652dfe8bd5be4492160dd31f06a8", "keyword": "5349c2fbaa88070e6af600de17a6c4e212243e8e", }), ], "test-manga": [ ("https://reader.seaotterscans.com/series/marry_me/", { "url": "fdbacabfa566a6baeb3f01bb46cbda0577bd4bbe", "keyword": "61d3388d73df12f64361892b47a9398df4a5947c", }), ], }, "sensescans": { "root": "http://sensescans.com/reader", "pattern": r"(?:(?:www\.)?sensescans\.com/reader" r"|reader\.sensescans\.com)", "test-chapter": [ (("http://reader.sensescans.com/read/" "magi__labyrinth_of_magic/en/37/369/"), { "url": "a399ef037cdfbc25b09d435cc2ea1e3e454a6812", "keyword": "43ba75615d3e77d507808b0f3a8fd7fc72232a60", }), (("http://sensescans.com/reader/read/" "magi__labyrinth_of_magic/en/37/369/"), { "url": "a399ef037cdfbc25b09d435cc2ea1e3e454a6812", "keyword": "43ba75615d3e77d507808b0f3a8fd7fc72232a60", }), ], "test-manga": [ ("http://sensescans.com/reader/series/hakkenden/", { "url": "2360ccb0ead0ff2f5e27b7aef7eb17b9329de2f2", "keyword": "122cf92c32e6428c50f56ffaf29d06b96750ed71", }), ], }, "worldthree": { "root": "http://www.slide.world-three.org", "pattern": r"(?:www\.)?slide\.world-three\.org", "test-chapter": [ (("http://www.slide.world-three.org" "/read/black_bullet/en/2/7/page/1"), { "url": "be2f04f6e2d311b35188094cfd3e768583271584", "keyword": "28edfeccc92f7ea29546d5616e689dcfcbac59d9", }), (("http://www.slide.world-three.org" "/read/idolmster_cg_shuffle/en/0/4/2/"), { "url": "6028ea5ca282744f925dfad92eeb98509f9cc78c", "keyword": "d478e9f20847deb1844dba318acaa8b91c19468a", }), ], "test-manga": [ ("http://www.slide.world-three.org/series/black_bullet/", { "url": "5743b93512d26e6b540d90a7a5d69208b6d4a738", "keyword": "3a24f1088b4d7f3b798a96163f21ca251293a120", }), ], }, } generate_extractors()