From f226417420b44cb105d8e23fedf94c30e25193fd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 20 May 2017 11:27:43 +0200 Subject: [PATCH] simplify code by using a MangaExtractor base class --- gallery_dl/extractor/batoto.py | 32 +++++++--------------- gallery_dl/extractor/common.py | 34 ++++++++++++++++++++++++ gallery_dl/extractor/foolslide.py | 25 +++++++----------- gallery_dl/extractor/hbrowse.py | 25 +++++------------- gallery_dl/extractor/hentai2read.py | 22 +++++++--------- gallery_dl/extractor/hentaicdn.py | 15 +---------- gallery_dl/extractor/hentaihere.py | 23 +++++++--------- gallery_dl/extractor/kisscomic.py | 10 +++---- gallery_dl/extractor/kissmanga.py | 20 ++++---------- gallery_dl/extractor/mangahere.py | 28 ++++++-------------- gallery_dl/extractor/mangapanda.py | 9 +++---- gallery_dl/extractor/mangapark.py | 30 ++++++--------------- gallery_dl/extractor/mangareader.py | 34 +++++++++--------------- gallery_dl/extractor/readcomiconline.py | 8 ++---- gallery_dl/extractor/spectrumnexus.py | 35 +++++++++---------------- 15 files changed, 134 insertions(+), 216 deletions(-) diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py index 4f19ebe1..ced9b4a7 100644 --- a/gallery_dl/extractor/batoto.py +++ b/gallery_dl/extractor/batoto.py @@ -8,15 +8,16 @@ """Extract manga chapters from https://bato.to/""" -from .common import Extractor, AsynchronousExtractor, Message +from .common import MangaExtractor, AsynchronousExtractor, Message from .. import text, util, exception from ..cache import cache import re -class BatotoExtractor(Extractor): +class BatotoExtractor(): """Base class for batoto extractors""" category = "batoto" + scheme = "https" root = "https://bato.to" def login(self): @@ -56,34 +57,19 @@ class BatotoExtractor(Extractor): return {c: response.cookies[c] for c in ("member_id", "pass_hash")} -class BatotoMangaExtractor(BatotoExtractor): - """Extractor for mangas from bato.to""" - subcategory = "manga" - pattern = [r"(?:https?://)?(?:www\.)?bato\.to/comic/_/comics/.*-r\d+"] +class BatotoMangaExtractor(BatotoExtractor, MangaExtractor): + """Extractor for manga from bato.to""" + pattern = [r"(?:https?://)?(?:www\.)?(bato\.to/comic/_/comics/.*-r\d+)"] test = [("http://bato.to/comic/_/comics/aria-r2007", { "url": "a38585b0339587666d772ee06f2a60abdbf42a97", })] - def __init__(self, match): - BatotoExtractor.__init__(self) - self.url = match.group(0) - - def items(self): - self.login() - yield Message.Version, 1 - for chapter in self.get_chapters(): - yield Message.Queue, chapter - - def get_chapters(self): - """Return a list of all chapter urls""" + def chapters(self, page): # TODO: filter by language / translator needle = ('\n ' '', '\n' + page, '\n' )[0] - return text.extract_iter(page, '
  • \n\n\n\n') - return reversed(list( - text.extract_iter(page, '
  • \n\n') - ))) + )) class MangahereChapterExtractor(AsynchronousExtractor): diff --git a/gallery_dl/extractor/mangapanda.py b/gallery_dl/extractor/mangapanda.py index f3307b17..1b21637c 100644 --- a/gallery_dl/extractor/mangapanda.py +++ b/gallery_dl/extractor/mangapanda.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015 Mike Fährmann +# Copyright 2015-2017 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -14,13 +14,12 @@ from .mangareader import MangareaderMangaExtractor, MangareaderChapterExtractor class MangapandaBase(): """Base class for mangapanda extractors""" category = "mangapanda" - url_base = "http://www.mangapanda.com" + root = "http://www.mangapanda.com" class MangapandaMangaExtractor(MangapandaBase, MangareaderMangaExtractor): - """Extractor for mangas from mangapanda.com""" - subcategory = "manga" - pattern = [r"(?:https?://)?(?:www\.)?mangapanda\.com(/[^/]+)$"] + """Extractor for manga from mangapanda.com""" + pattern = [r"(?:https?://)?((?:www\.)?mangapanda\.com/[^/]+)$"] test = [("http://www.mangapanda.com/mushishi", { "url": "50a1ba730b85426b904da256c80f68ba6a8a2566", })] diff --git a/gallery_dl/extractor/mangapark.py b/gallery_dl/extractor/mangapark.py index f7681833..9dc91e8a 100644 --- a/gallery_dl/extractor/mangapark.py +++ b/gallery_dl/extractor/mangapark.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015, 2016 Mike Fährmann +# Copyright 2015-2017 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -8,37 +8,23 @@ """Extract manga-chapters and entire manga from http://mangapark.me/""" -from .common import Extractor, Message +from .common import Extractor, MangaExtractor, Message from .. import text -class MangaparkMangaExtractor(Extractor): - """Extractor for mangas from mangapark.me""" +class MangaparkMangaExtractor(MangaExtractor): + """Extractor for manga from mangapark.me""" category = "mangapark" - subcategory = "manga" - pattern = [r"(?:https?://)?(?:www\.)?mangapark\.me/manga/([^/]+)$"] + pattern = [r"(?:https?://)?(?:www\.)?(mangapark\.me/manga/[^/]+)$"] + root = "http://mangapark.me" test = [("http://mangapark.me/manga/mushishi", { "url": "9902e342af71af19a5ac20fcd01950b165acf119", })] - url_base = "http://mangapark.me" - def __init__(self, match): - Extractor.__init__(self) - self.url_title = match.group(1) - - def items(self): - yield Message.Version, 1 - for chapter in self.get_chapters(): - yield Message.Queue, self.url_base + chapter - - def get_chapters(self): - """Return a list of all chapter urls""" - page = self.request(self.url_base + "/manga/" + self.url_title).text + def chapter_paths(self, page): needle = '') - return reversed(list( - text.extract_iter(page, needle, '"', pos) - )) + return text.extract_iter(page, needle, '"', pos) class MangaparkChapterExtractor(Extractor): diff --git a/gallery_dl/extractor/mangareader.py b/gallery_dl/extractor/mangareader.py index b10d6b76..bb658afe 100644 --- a/gallery_dl/extractor/mangareader.py +++ b/gallery_dl/extractor/mangareader.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015 Mike Fährmann +# Copyright 2015-2017 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -8,7 +8,7 @@ """Extract manga-chapters and entire manga from http://www.mangareader.net/""" -from .common import AsynchronousExtractor, Extractor, Message +from .common import AsynchronousExtractor, MangaExtractor, Message from .. import text @@ -17,29 +17,21 @@ class MangareaderBase(): category = "mangareader" directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"] filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}" - url_base = "http://www.mangareader.net" + root = "http://www.mangareader.net" -class MangareaderMangaExtractor(MangareaderBase, Extractor): - """Extractor for mangas from mangareader.net""" - subcategory = "manga" - pattern = [r"(?:https?://)?(?:www\.)?mangareader\.net(/[^/]+)$"] +class MangareaderMangaExtractor(MangareaderBase, MangaExtractor): + """Extractor for manga from mangareader.net""" + pattern = [r"(?:https?://)?((?:www\.)?mangareader\.net/[^/]+)$"] + reverse = False test = [("http://www.mangareader.net/mushishi", { "url": "249042420b67a07b32e7f6be4c7410b6d810b808", })] - def __init__(self, match): - Extractor.__init__(self) - self.url_title = match.group(1) - - def items(self): - yield Message.Version, 1 - url = self.url_base + self.url_title - page = self.request(url).text - needle = '\n') - for chapter in text.extract_iter(page, needle, '"', pos): - yield Message.Queue, url + chapter + return text.extract_iter(page, needle, '"', pos) class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor): @@ -61,7 +53,7 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor): self.part, self.url_title, self.chapter = match.groups() def items(self): - page = self.request(self.url_base + self.part).text + page = self.request(self.root + self.part).text data = self.get_job_metadata(page) yield Message.Version, 1 yield Message.Directory, data @@ -75,7 +67,7 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor): def get_job_metadata(self, chapter_page): """Collect metadata for extractor-job""" - page = self.request(self.url_base + self.url_title).text + page = self.request(self.root + self.url_title).text data = { "chapter": self.chapter, "lang": "en", @@ -119,7 +111,7 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor): width , pos = extr(page, '' - )[0] - return text.extract_iter(page, '