# -*- coding: utf-8 -*- # Copyright 2015 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extract manga pages from http://www.thespectrum.net/manga_scans/""" from .common import Extractor, AsynchronousExtractor, Message from .. import text class SpectrumNexusMangaExtractor(Extractor): """Extract all manga-chapters and -volumes from spectrumnexus""" category = "spectrumnexus" subcategory = "manga" pattern = [r"(?:https?://)?view\.thespectrum\.net/series/([^\.]+)\.html$"] test = [("http://view.thespectrum.net/series/kare-kano-volume-01.html", { "url": "b2b175aad5ef1701cc4aee7c24f1ca3a93aba9cb", })] url_base = "http://view.thespectrum.net/series/" def __init__(self, match): Extractor.__init__(self) self.url = self.url_base + match.group(1) + ".html" def items(self): yield Message.Version, 1 for chapter in self.get_chapters(): yield Message.Queue, self.url + "?ch=" + chapter.replace(" ", "+") def get_chapters(self): """Return a list of all chapter identifiers""" page = self.request(self.url).text page = text.extract(page, '