# -*- coding: utf-8 -*- # Copyright 2014-2016 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extract manga chapters from https://bato.to/""" from .common import AsynchronousExtractor, Message from .. import text, iso639_1, config, exception from ..cache import cache import re class BatotoChapterExtractor(AsynchronousExtractor): """Extractor for manga-chapters from bato.to""" category = "batoto" subcategory = "chapter" directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"] filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}" pattern = [r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)"] test = [("http://bato.to/reader#459878c8fda07502", { "url": "432d7958506ad913b0a9e42664a89e46a63e9296", "keyword": "75a3a86d32aecfc21c44865b4043490757f73d77", })] url = "https://bato.to/" reader_url = "https://bato.to/areader" def __init__(self, match): AsynchronousExtractor.__init__(self) self.token = match.group(1) def items(self): self.login() self.session.headers.update({ "X-Requested-With": "XMLHttpRequest", "Referer": self.url + "reader", }) params = { "id": self.token, "p": 1, "supress_webtoon": "t", } response = self.session.get(self.reader_url, params=params) if response.status_code == 405: error = text.extract(response.text, "ERROR [", "]")[0] if error == "10030": raise exception.AuthorizationError() elif error == "10020": raise exception.NotFoundError("chapter") else: raise Exception("[batoto] unexpected error code: " + error) page = response.text data = self.get_job_metadata(page) yield Message.Version, 1 yield Message.Directory, data.copy() for i in range(int(data["count"])): next_url, image_url = self.get_page_urls(page) text.nameext_from_url(image_url, data) data["page"] = i+1 yield Message.Url, image_url, data.copy() if next_url: params["p"] += 1 page = self.request(self.reader_url, params=params).text def get_job_metadata(self, page): """Collect metadata for extractor-job""" extr = text.extract _ , pos = extr(page, '', ' - ', pos) lang , pos = extr(page, '', '', pos) _ , pos = extr(page, '