# -*- coding: utf-8 -*- # Copyright 2018 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extract manga-chapters and entire manga from https://mangadex.org/""" from .common import Extractor, Message from .. import text, util class MangadexExtractor(Extractor): """Base class for mangadex extractors""" category = "mangadex" root = "https://mangadex.org" # mangadex-to-iso639-1 codes iso639_map = { "br": "pt", "ct": "ca", "gb": "en", "vn": "vi", } def chapter_data(self, chapter_id): """Request API results for 'chapter_id'""" url = "{}/api/chapter/{}".format(self.root, chapter_id) return self.request(url).json() def manga_data(self, manga_id, *, cache={}): """Request API results for 'manga_id'""" if manga_id not in cache: url = "{}/api/manga/{}".format(self.root, manga_id) cache[manga_id] = self.request(url).json() return cache[manga_id] class MangadexChapterExtractor(MangadexExtractor): """Extractor for manga-chapters from mangadex.org""" subcategory = "chapter" directory_fmt = [ "{category}", "{manga}", "{volume:?v/ />02}c{chapter:>03}{chapter_minor}{title:?: //}"] filename_fmt = ( "{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}") archive_fmt = "{chapter_id}_{page}" pattern = [r"(?:https?://)?(?:www\.)?mangadex\.(?:org|com)/chapter/(\d+)"] test = [ ("https://mangadex.org/chapter/122094", { "keyword": "7bd7f82ab9d3f06976c4b68afe78d0040851ac3c", "content": "7ab3bef5caccb62b881f8e6e70359d3c7be8137f", }), # oneshot ("https://mangadex.org/chapter/138086", { "count": 64, "keyword": "435e157dc5529d152458ba751ffe5bfbaf4850fb", }), ] def __init__(self, match): MangadexExtractor.__init__(self) self.chapter_id = match.group(1) self.data = None def items(self): data = self.get_metadata() imgs = self.get_images() data["count"] = len(imgs) yield Message.Version, 1 yield Message.Directory, data for data["page"], url in enumerate(imgs, 1): yield Message.Url, url, text.nameext_from_url(url, data) def get_metadata(self): """Return a dict with general metadata""" cdata = self.chapter_data(self.chapter_id) mdata = self.manga_data(cdata["manga_id"]) self.data = cdata chapter, sep, minor = cdata["chapter"].partition(".") return { "manga": mdata["manga"]["title"], "manga_id": cdata["manga_id"], "artist": mdata["manga"]["artist"], "author": mdata["manga"]["author"], "title": text.unescape(cdata["title"]), "volume": text.parse_int(cdata["volume"]), "chapter": text.parse_int(chapter), "chapter_minor": sep + minor, "chapter_id": cdata["id"], "group": mdata["chapter"][self.chapter_id]["group_name"], "date": cdata["timestamp"], "lang": util.language_to_code(cdata["lang_name"]), "language": cdata["lang_name"], } def get_images(self): """Return a list of all image URLs""" base = self.data["server"] + self.data["hash"] + "/" if base.startswith("/"): base = text.urljoin(self.root, base) return [base + page for page in self.data["page_array"]] class MangadexMangaExtractor(MangadexExtractor): """Extractor for manga from mangadex.org""" subcategory = "manga" categorytransfer = True pattern = [r"(?:https?://)?(?:www\.)?mangadex\.(?:org|com)" r"/(?:title|manga)/(\d+)"] test = [ ("https://mangadex.org/manga/2946/souten-no-koumori", { "count": ">= 1", "keywords": { "manga": "Souten no Koumori", "manga_id": 2946, "title": "Oneshot", "volume": 0, "chapter": 0, "chapter_minor": "", "chapter_id": int, "group": str, "date": int, "lang": str, "language": str, }, }), ("https://mangadex.org/manga/13318/dagashi-kashi/chapters/2/", { "count": ">= 100", }), ("https://mangadex.org/title/2946/souten-no-koumori", None), ] def __init__(self, match): MangadexExtractor.__init__(self) self.manga_id = text.parse_int(match.group(1)) def items(self): yield Message.Version, 1 for data in self.chapters(): url = "{}/chapter/{}".format(self.root, data["chapter_id"]) yield Message.Queue, url, data def chapters(self): """Return a sorted list of chapter-metadata dicts""" data = self.manga_data(self.manga_id) manga = data["manga"] results = [] for chid, info in data["chapter"].items(): chapter, sep, minor = info["chapter"].partition(".") lang = self.iso639_map.get(info["lang_code"], info["lang_code"]) results.append({ "manga": manga["title"], "manga_id": self.manga_id, "artist": manga["artist"], "author": manga["author"], "title": text.unescape(info["title"]), "volume": text.parse_int(info["volume"]), "chapter": text.parse_int(chapter), "chapter_minor": sep + minor, "chapter_id": text.parse_int(chid), "group": text.unescape(info["group_name"]), "date": info["timestamp"], "lang": lang, "language": util.code_to_language(lang), }) results.sort(key=lambda x: (x["chapter"], x["chapter_minor"])) return results