From 94e10f249a152106132b5ef690399e3307ff5325 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 1 Feb 2017 00:53:19 +0100 Subject: [PATCH] code adjustments according to pep8 nr2 --- gallery_dl/extractor/3dbooru.py | 18 ++++++--- gallery_dl/extractor/4chan.py | 1 + gallery_dl/extractor/8chan.py | 1 + gallery_dl/extractor/__init__.py | 6 ++- gallery_dl/extractor/batoto.py | 3 +- gallery_dl/extractor/booru.py | 9 +++-- gallery_dl/extractor/chan.py | 1 + gallery_dl/extractor/danbooru.py | 4 ++ gallery_dl/extractor/deviantart.py | 6 ++- gallery_dl/extractor/dokireader.py | 4 +- gallery_dl/extractor/dynastyscans.py | 9 +++-- gallery_dl/extractor/e621.py | 4 ++ gallery_dl/extractor/exhentai.py | 49 +++++++++++++++++-------- gallery_dl/extractor/foolslide.py | 1 + gallery_dl/extractor/gelbooru.py | 17 ++++++--- gallery_dl/extractor/gomanga.py | 1 + gallery_dl/extractor/hbrowse.py | 4 +- gallery_dl/extractor/hentai2read.py | 5 ++- gallery_dl/extractor/hentaibox.py | 13 +++++-- gallery_dl/extractor/hentaicdn.py | 4 +- gallery_dl/extractor/hentaifoundry.py | 29 ++++++++++----- gallery_dl/extractor/hentaihere.py | 7 +++- gallery_dl/extractor/hitomi.py | 8 ++-- gallery_dl/extractor/imagebam.py | 18 +++++---- gallery_dl/extractor/imagefap.py | 27 ++++++++------ gallery_dl/extractor/imagehosts.py | 16 +++++++- gallery_dl/extractor/imgbox.py | 1 + gallery_dl/extractor/imgchili.py | 10 +++-- gallery_dl/extractor/imgth.py | 1 + gallery_dl/extractor/imgur.py | 10 +++-- gallery_dl/extractor/jaiminisbox.py | 1 + gallery_dl/extractor/khinsider.py | 17 ++++++--- gallery_dl/extractor/kisscomic.py | 4 +- gallery_dl/extractor/kissmanga.py | 11 ++++-- gallery_dl/extractor/konachan.py | 4 ++ gallery_dl/extractor/luscious.py | 6 ++- gallery_dl/extractor/mangafox.py | 6 ++- gallery_dl/extractor/mangahere.py | 22 +++++++---- gallery_dl/extractor/mangamint.py | 18 +++++---- gallery_dl/extractor/mangapanda.py | 7 +++- gallery_dl/extractor/mangapark.py | 20 ++++++---- gallery_dl/extractor/mangareader.py | 9 +++-- gallery_dl/extractor/mangashare.py | 8 +++- gallery_dl/extractor/mangastream.py | 5 ++- gallery_dl/extractor/message.py | 1 + gallery_dl/extractor/nhentai.py | 10 +++-- gallery_dl/extractor/nijie.py | 5 ++- gallery_dl/extractor/pinterest.py | 7 +++- gallery_dl/extractor/pixiv.py | 28 ++++++++++---- gallery_dl/extractor/powermanga.py | 5 ++- gallery_dl/extractor/readcomiconline.py | 4 +- gallery_dl/extractor/readcomics.py | 1 + gallery_dl/extractor/recursive.py | 1 + gallery_dl/extractor/rule34.py | 5 ++- gallery_dl/extractor/safebooru.py | 5 ++- gallery_dl/extractor/sankaku.py | 8 ++-- gallery_dl/extractor/seiga.py | 3 +- gallery_dl/extractor/senmanga.py | 1 + gallery_dl/extractor/sensescans.py | 4 +- gallery_dl/extractor/spectrumnexus.py | 15 +++++--- gallery_dl/extractor/test.py | 1 + gallery_dl/extractor/tumblr.py | 1 + gallery_dl/extractor/twitter.py | 4 +- gallery_dl/extractor/whentai.py | 13 +++++-- gallery_dl/extractor/worldthree.py | 4 +- gallery_dl/extractor/yandere.py | 4 ++ gallery_dl/extractor/yomanga.py | 1 + gallery_dl/extractor/yonkouprod.py | 1 + 68 files changed, 390 insertions(+), 167 deletions(-) diff --git a/gallery_dl/extractor/3dbooru.py b/gallery_dl/extractor/3dbooru.py index 7f728891..1b25d266 100644 --- a/gallery_dl/extractor/3dbooru.py +++ b/gallery_dl/extractor/3dbooru.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015 Mike Fährmann +# Copyright 2015-2017 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -10,6 +10,7 @@ from . import booru + class ThreedeebooruExtractor(booru.JSONBooruExtractor): """Base class for 3dbooru extractors""" category = "3dbooru" @@ -19,16 +20,21 @@ class ThreedeebooruExtractor(booru.JSONBooruExtractor): "User-Agent": "Mozilla/5.0", } -class ThreedeebooruTagExtractor(ThreedeebooruExtractor, booru.BooruTagExtractor): + +class ThreedeebooruTagExtractor(ThreedeebooruExtractor, + booru.BooruTagExtractor): """Extractor for images from behoimi.org based on search-tags""" subcategory = "tag" - pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post(?:/(?:index)?)?\?tags=([^&]+)"] + pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post" + r"(?:/(?:index)?)?\?tags=([^&]+)"] test = [("http://behoimi.org/post?tags=himekawa_azuru dress", { "url": "ecb30c6aaaf8a6ff8f55255737a9840832a483c1", "content": "11cbda40c287e026c1ce4ca430810f761f2d0b2a", })] -class ThreedeebooruPoolExtractor(ThreedeebooruExtractor, booru.BooruPoolExtractor): + +class ThreedeebooruPoolExtractor(ThreedeebooruExtractor, + booru.BooruPoolExtractor): """Extractor for image-pools from behoimi.org""" subcategory = "pool" pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(\d+)"] @@ -37,7 +43,9 @@ class ThreedeebooruPoolExtractor(ThreedeebooruExtractor, booru.BooruPoolExtracto "content": "fd5b37c5c6c2de4b4d6f1facffdefa1e28176554", })] -class ThreedeebooruPostExtractor(ThreedeebooruExtractor, booru.BooruPostExtractor): + +class ThreedeebooruPostExtractor(ThreedeebooruExtractor, + booru.BooruPostExtractor): """Extractor for single images from behoimi.org""" subcategory = "post" pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(\d+)"] diff --git a/gallery_dl/extractor/4chan.py b/gallery_dl/extractor/4chan.py index a74861c9..a679980e 100644 --- a/gallery_dl/extractor/4chan.py +++ b/gallery_dl/extractor/4chan.py @@ -10,6 +10,7 @@ from . import chan + class FourchanThreadExtractor(chan.ChanThreadExtractor): """Extractor for images from threads from 4chan.org""" category = "4chan" diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py index e5797bc5..c1c4486a 100644 --- a/gallery_dl/extractor/8chan.py +++ b/gallery_dl/extractor/8chan.py @@ -10,6 +10,7 @@ from . import chan + class InfinitychanThreadExtractor(chan.ChanThreadExtractor): """Extractor for images from threads from 8ch.net""" category = "8chan" diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 13a86630..66768e0e 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -8,7 +8,6 @@ import re import importlib -from .. import config modules = [ "pixiv", @@ -75,6 +74,7 @@ modules = [ "test", ] + def find(url): """Find suitable extractor for the given url""" for pattern, klass in _list_patterns(): @@ -83,6 +83,7 @@ def find(url): return klass(match) return None + def extractors(): """Yield all available extractor classes""" return sorted( @@ -90,12 +91,14 @@ def extractors(): key=lambda x: x.__name__ ) + # -------------------------------------------------------------------- # internals _cache = [] _module_iter = iter(modules) + def _list_patterns(): """Yield all available (pattern, class) tuples""" yield from _cache @@ -110,6 +113,7 @@ def _list_patterns(): _cache.extend(tuples) yield from tuples + def _get_classes(module): """Return a list of all extractor classes in a module""" return [ diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py index 95c4f918..0202dceb 100644 --- a/gallery_dl/extractor/batoto.py +++ b/gallery_dl/extractor/batoto.py @@ -26,7 +26,8 @@ class BatotoExtractor(Extractor): if username and password: cookies = self._login_impl(username, password) for key, value in cookies.items(): - self.session.cookies.set(key, value, domain=".bato.to", path="/") + self.session.cookies.set( + key, value, domain=".bato.to", path="/") @cache(maxage=360*24*60*60, keyarg=1) def _login_impl(self, username, password): diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py index a8da3faa..b5e29873 100644 --- a/gallery_dl/extractor/booru.py +++ b/gallery_dl/extractor/booru.py @@ -14,6 +14,7 @@ import xml.etree.ElementTree as ET import json import urllib.parse + class BooruExtractor(Extractor): """Base class for all booru extractors""" info = {} @@ -33,7 +34,9 @@ class BooruExtractor(Extractor): yield Message.Headers, self.headers for data in self.items_impl(): try: - yield Message.Url, self.get_file_url(data), self.get_file_metadata(data) + url = self.get_file_url(data) + data = self.get_file_metadata(data) + yield Message.Url, url, data except KeyError: continue @@ -75,7 +78,7 @@ class JSONBooruExtractor(BooruExtractor): self.update_page(reset=True) while True: images = json.loads( - self.request(self.api_url, verify=True, params=self.params, + self.request(self.api_url, params=self.params, headers=self.headers).text ) for data in images: @@ -91,7 +94,7 @@ class XMLBooruExtractor(BooruExtractor): self.update_page(reset=True) while True: root = ET.fromstring( - self.request(self.api_url, verify=True, params=self.params).text + self.request(self.api_url, params=self.params).text ) for item in root: yield item.attrib diff --git a/gallery_dl/extractor/chan.py b/gallery_dl/extractor/chan.py index d06d4b95..9553ebe5 100644 --- a/gallery_dl/extractor/chan.py +++ b/gallery_dl/extractor/chan.py @@ -11,6 +11,7 @@ from .common import Extractor, Message from .. import text + class ChanThreadExtractor(Extractor): """Base class for extractors for Futaba Channel boards""" category = "chan" diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 1c91dc5a..ff15e5d3 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -10,11 +10,13 @@ from . import booru + class DanbooruExtractor(booru.JSONBooruExtractor): """Base class for danbooru extractors""" category = "danbooru" api_url = "https://danbooru.donmai.us/posts.json" + class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor): """Extractor for images from danbooru based on search-tags""" subcategory = "tag" @@ -25,6 +27,7 @@ class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor): "content": "b196fb9f1668109d7774a0a82efea3ffdda07746", })] + class DanbooruPoolExtractor(DanbooruExtractor, booru.BooruPoolExtractor): """Extractor for image-pools from danbooru""" subcategory = "pool" @@ -34,6 +37,7 @@ class DanbooruPoolExtractor(DanbooruExtractor, booru.BooruPoolExtractor): "content": "b16bab12bea5f7ea9e0a836bf8045f280e113d99", })] + class DanbooruPostExtractor(DanbooruExtractor, booru.BooruPostExtractor): """Extractor for single images from danbooru""" subcategory = "post" diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 625a7061..f6741b9e 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -49,7 +49,8 @@ class DeviantartImageExtractor(Extractor): directory_fmt = ["{category}", "{artist}"] filename_fmt = "{category}_{index}_{title}.{extension}" pattern = [r"(?:https?://)?([^\.]+\.deviantart\.com/art/.+-(\d+))"] - test = [("http://shimoda7.deviantart.com/art/For-the-sake-of-a-memory-10073852", { + test = [(("http://shimoda7.deviantart.com/art/" + "For-the-sake-of-a-memory-10073852"), { "url": "71345ce3bef5b19bd2a56d7b96e6b5ddba747c2e", "keyword": "ccac27b8f740fc943afca9460608e02c6cbcdf96", "content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e", @@ -66,11 +67,12 @@ class DeviantartImageExtractor(Extractor): data = self.get_data(page) data.update(self.get_image(page)) + tlen = len(data["title"]) text.nameext_from_url(data["image"], data) data["title"] = text.unescape(data["title"]) data["description"] = text.unescape(text.unescape(data["description"])) data["artist"] = text.extract(data["url"], "//", ".")[0] - data["date"] = text.extract(data["date"], ", ", " in ", len(data["title"]))[0] + data["date"] = text.extract(data["date"], ", ", " in ", tlen)[0] yield Message.Version, 1 yield Message.Directory, data diff --git a/gallery_dl/extractor/dokireader.py b/gallery_dl/extractor/dokireader.py index cee0de79..5cbeae23 100644 --- a/gallery_dl/extractor/dokireader.py +++ b/gallery_dl/extractor/dokireader.py @@ -10,12 +10,14 @@ from .foolslide import FoolslideChapterExtractor + class DokireaderChapterExtractor(FoolslideChapterExtractor): """Extractor for manga-chapters from kobato.hologfx.com""" category = "dokireader" pattern = [(r"(?:https?://)?(kobato\.hologfx\.com/reader/read/" r"[^/]+/([a-z]{2})/\d+/\d+)")] - test = [("https://kobato.hologfx.com/reader/read/hitoribocchi_no_oo_seikatsu/en/3/34", { + test = [(("https://kobato.hologfx.com/reader/read/" + "hitoribocchi_no_oo_seikatsu/en/3/34"), { "keyword": "4ee981ae14c6643f6a03a14c9f2c0d4898202671", })] diff --git a/gallery_dl/extractor/dynastyscans.py b/gallery_dl/extractor/dynastyscans.py index 54bff92c..365bf2a7 100644 --- a/gallery_dl/extractor/dynastyscans.py +++ b/gallery_dl/extractor/dynastyscans.py @@ -13,6 +13,7 @@ from .. import text import re import json + class DynastyscansChapterExtractor(Extractor): """Extractor for manga-chapters from dynasty-scans.com""" category = "dynastyscans" @@ -21,11 +22,13 @@ class DynastyscansChapterExtractor(Extractor): filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}" pattern = [r"(?:https?://)?(?:www\.)?dynasty-scans\.com/chapters/([^/]+)"] test = [ - ("http://dynasty-scans.com/chapters/hitoribocchi_no_oo_seikatsu_ch33", { + (("http://dynasty-scans.com/chapters/" + "hitoribocchi_no_oo_seikatsu_ch33"), { "url": "63950fa1dfdef58ab842c1b9b854c5c1d650cfa0", "keyword": "81bfda5b98b34ac2a7324bd9e2abad3df9cc7673", }), - ("http://dynasty-scans.com/chapters/new_game_the_spinoff_special_13", { + (("http://dynasty-scans.com/chapters/" + "new_game_the_spinoff_special_13"), { "url": "6b28c733481ac498da341e85a9eb155864491731", "keyword": "93b75d0c0aaeb849c99f2225a4b97f466bc3ace9", }), @@ -55,7 +58,7 @@ class DynastyscansChapterExtractor(Extractor): """Collect metadata for extractor-job""" info , pos = text.extract(page, "

", "") author, pos = text.extract(page, " by ", "", pos) - date , pos = text.extract(page, ' ', '<', pos) + date , pos = text.extract(page, '"icon-calendar"> ', '<', pos) match = re.match( r"(?:]+>)?([^<]+)(?:)?(?: ch(\d+))?(?:: (.+))?", info diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py index 1305cb77..71914cf4 100644 --- a/gallery_dl/extractor/e621.py +++ b/gallery_dl/extractor/e621.py @@ -10,11 +10,13 @@ from . import booru + class E621Extractor(booru.JSONBooruExtractor): """Base class for e621 extractors""" category = "e621" api_url = "https://e621.net/post/index.json" + class E621TagExtractor(E621Extractor, booru.BooruTagExtractor): """Extractor for images from e621.net based on search-tags""" subcategory = "tag" @@ -27,6 +29,7 @@ class E621TagExtractor(E621Extractor, booru.BooruTagExtractor): "content": "501d1e5d922da20ee8ff9806f5ed3ce3a684fd58", })] + class E621PoolExtractor(E621Extractor, booru.BooruPoolExtractor): """Extractor for image-pools from e621.net""" subcategory = "pool" @@ -36,6 +39,7 @@ class E621PoolExtractor(E621Extractor, booru.BooruPoolExtractor): "content": "c2c87b7a9150509496cddc75ccab08109922876a", })] + class E621PostExtractor(E621Extractor, booru.BooruPostExtractor): """Extractor for single images from e621.net""" subcategory = "post" diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 0561d6e2..e0578303 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -15,6 +15,7 @@ import time import random import requests + class ExhentaiGalleryExtractor(Extractor): """Extractor for image-galleries from exhentai.org""" category = "exhentai" @@ -41,9 +42,12 @@ class ExhentaiGalleryExtractor(Extractor): self.key = {} self.count = 0 self.gid, self.token = match.groups() - self.original = config.interpolate(("extractor", "exhentai", "download-original"), True) - self.wait_min = config.interpolate(("extractor", "exhentai", "wait-min"), 3) - self.wait_max = config.interpolate(("extractor", "exhentai", "wait-max"), 6) + self.original = config.interpolate( + ("extractor", "exhentai", "download-original"), True) + self.wait_min = config.interpolate( + ("extractor", "exhentai", "wait-min"), 3) + self.wait_max = config.interpolate( + ("extractor", "exhentai", "wait-max"), 6) if self.wait_max < self.wait_min: self.wait_max = self.wait_min @@ -75,7 +79,8 @@ class ExhentaiGalleryExtractor(Extractor): """Initialize headers""" self.session.headers.update({ "User-Agent": "Mozilla/5.0", - "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8", + "Accept": "text/html,application/xhtml+xml," + "application/xml;q=0.9,*/*;q=0.8", "Accept-Language": "en-US,en;q=0.5", "Referer": "https://exhentai.org/", }) @@ -105,8 +110,8 @@ class ExhentaiGalleryExtractor(Extractor): def get_images(self, page): """Collect url and metadata for all images in this gallery""" - url = "https://exhentai.org/s/" + text.extract(page, 'hentai.org/s/', '"')[0] - yield self.image_from_page(url) + part = text.extract(page, 'hentai.org/s/', '"')[0] + yield self.image_from_page("https://exhentai.org/s/" + part) yield from self.images_from_api() def image_from_page(self, url): @@ -122,10 +127,15 @@ class ExhentaiGalleryExtractor(Extractor): ("showkey" , 'var showkey="', '";'), ))[0] self.key["start"] = data["startkey"] - self.key["show" ] = data["showkey"] - self.key["next" ] = data["nextkey"] - url = ("https://exhentai.org/fullimg.php" + text.unescape(data["origurl"]) - if self.original and data["origurl"] else data["url"]) + self.key["show"] = data["showkey"] + self.key["next"] = data["nextkey"] + + if self.original and data["origurl"]: + part = text.unescape(data["origurl"]) + url = "https://exhentai.org/fullimg.php" + part + else: + url = data["url"] + return url, text.nameext_from_url(data["url"], { "num": 1, "image-token": data["startkey"], @@ -133,14 +143,14 @@ class ExhentaiGalleryExtractor(Extractor): def images_from_api(self): """Get image url and data from api calls""" - nextkey = self.key["next" ] + nextkey = self.key["next"] request = { "method" : "showpage", "gid" : int(self.gid), "imgkey" : nextkey, "showkey": self.key["show"], } - for request["page"] in range(2, self.count+1): + for request["page"] in range(2, self.count + 1): while True: try: self.wait() @@ -150,9 +160,14 @@ class ExhentaiGalleryExtractor(Extractor): pass imgkey = nextkey nextkey, pos = text.extract(page["i3"], "'", "'") - imgurl , pos = text.extract(page["i3"], '05}"] - filename_fmt = "{category}_{gallery-id}_{chapter:>05}_{num:>03}.{extension}" + filename_fmt = ("{category}_{gallery-id}_{chapter:>05}_" + "{num:>03}.{extension}") pattern = [r"(?:https?://)?(?:www\.)?hbrowse\.com/(\d+)/(c\d+)"] test = [("http://www.hbrowse.com/10363/c00000", { "url": "634f4800858913f097bc3b62a8fedaf74b5254bd", diff --git a/gallery_dl/extractor/hentai2read.py b/gallery_dl/extractor/hentai2read.py index 8c01cfa2..06800558 100644 --- a/gallery_dl/extractor/hentai2read.py +++ b/gallery_dl/extractor/hentai2read.py @@ -12,6 +12,7 @@ from .. import text from . import hentaicdn import re + class Hentai2readMangaExtractor(hentaicdn.HentaicdnMangaExtractor): """Extractor for mangas from hentai2read.com""" category = "hentai2read" @@ -49,7 +50,9 @@ class Hentai2readChapterExtractor(hentaicdn.HentaicdnChapterExtractor): def __init__(self, match): hentaicdn.HentaicdnChapterExtractor.__init__(self) self.url_title, self.chapter = match.groups() - self.url = "http://hentai2read.com/{}/{}/".format(self.url_title, self.chapter) + self.url = "http://hentai2read.com/{}/{}/".format( + self.url_title, self.chapter + ) def get_job_metadata(self, page, images): title = text.extract(page, "", "")[0] diff --git a/gallery_dl/extractor/hentaibox.py b/gallery_dl/extractor/hentaibox.py index 886aa955..b721f807 100644 --- a/gallery_dl/extractor/hentaibox.py +++ b/gallery_dl/extractor/hentaibox.py @@ -11,21 +11,24 @@ from .common import Extractor, Message from .. import text, iso639_1 + class HentaiboxChapterExtractor(Extractor): """Extractor for a single manga chapter from hentaibox.net""" category = "hentaibox" subcategory = "chapter" directory_fmt = ["{category}", "{series}", "{title}"] filename_fmt = "{num:>03}.{extension}" - pattern = [r"(?:https?://)?(?:www\.)?hentaibox\.net/[^/]+/(\d+)_\d+_([^/&]+)"] - test = [("http://www.hentaibox.net/hentai-manga/16_18_Original_Amazon-No-Hiyaku-Amazon-Elixir-Decensored", { + pattern = [r"(?:https?://)?(?:www\.)?hentaibox\.net/" + r"[^/]+/(\d+)_\d+_([^/&]+)"] + test = [(("http://www.hentaibox.net/hentai-manga/" + "16_18_Original_Amazon-No-Hiyaku-Amazon-Elixir-Decensored"), { "url": "d1a50a9b289d284f178971e01cf312791888e057", "keyword": "b4b100f800b716e573e072f01b5d604d9b436b70", })] def __init__(self, match): Extractor.__init__(self) - self.url = match.group(0) + self.url = match.group(0) self.count = match.group(1) def items(self): @@ -51,4 +54,6 @@ class HentaiboxChapterExtractor(Extractor): @staticmethod def get_image_urls(page): """Extract and return a list of all image-urls""" - yield from text.extract_iter(page, '', '') + yield from text.extract_iter( + page, '', '' + ) diff --git a/gallery_dl/extractor/hentaicdn.py b/gallery_dl/extractor/hentaicdn.py index d4e3f127..d2a14d8d 100644 --- a/gallery_dl/extractor/hentaicdn.py +++ b/gallery_dl/extractor/hentaicdn.py @@ -12,6 +12,7 @@ from .common import Extractor, Message from .. import text import json + class HentaicdnMangaExtractor(Extractor): """Base class for extractors for mangas""" subcategory = "manga" @@ -29,7 +30,8 @@ class HentaicdnChapterExtractor(Extractor): """Base class for extractors for a single manga chapter""" subcategory = "chapter" directory_fmt = ["{category}", "{gallery-id} {title}"] - filename_fmt = "{category}_{gallery-id}_{chapter:>02}_{num:>03}.{extension}" + filename_fmt = ("{category}_{gallery-id}_{chapter:>02}_" + "{num:>03}.{extension}") url = "" def items(self): diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index 979de6f8..aa454b21 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -11,6 +11,7 @@ from .common import Extractor, Message from .. import text, exception + class HentaifoundryUserExtractor(Extractor): """Extractor for all images of a hentai-foundry-user""" category = "hentaifoundry" @@ -18,8 +19,10 @@ class HentaifoundryUserExtractor(Extractor): directory_fmt = ["{category}", "{artist}"] filename_fmt = "{category}_{index}_{title}.{extension}" pattern = [ - r"(?:https?://)?(?:www\.)?hentai-foundry\.com/pictures/user/([^/]+)/?$", - r"(?:https?://)?(?:www\.)?hentai-foundry\.com/user/([^/]+)/profile", + (r"(?:https?://)?(?:www\.)?hentai-foundry\.com/" + r"pictures/user/([^/]+)/?$"), + (r"(?:https?://)?(?:www\.)?hentai-foundry\.com/" + r"user/([^/]+)/profile"), ] test = [ ("http://www.hentai-foundry.com/pictures/user/Tenpura", { @@ -62,7 +65,8 @@ class HentaifoundryUserExtractor(Extractor): def get_job_metadata(self): """Collect metadata for extractor-job""" - response = self.session.get(self.url_base + self.artist + "?enterAgree=1") + url = self.url_base + self.artist + "?enterAgree=1" + response = self.session.get(url) if response.status_code == 404: raise exception.NotFoundError("user") page = response.text @@ -73,9 +77,12 @@ class HentaifoundryUserExtractor(Extractor): def get_image_metadata(self, url): """Collect metadata for an image""" page = self.request(url).text - index = text.extract(url, '/', '/', len(self.url_base) + len(self.artist))[0] - title, pos = text.extract(page, 'Pictures » ', '<') - url , pos = text.extract(page, '//pictures.hentai-foundry.com', '"', pos) + offset = len(self.url_base) + len(self.artist) + index = text.extract(url, '/', '/', offset)[0] + title, pos = text.extract( + page, 'Pictures » ', '<') + url, pos = text.extract( + page, '//pictures.hentai-foundry.com', '"', pos) data = {"index": index, "title": text.unescape(title)} text.nameext_from_url(url, data) return "http://pictures.hentai-foundry.com" + url, data @@ -118,7 +125,8 @@ class HentaifoundryImageExtractor(Extractor): r"(?:pictures/user/([^/]+)/(\d+)" r"|[^/]/([^/]+)/(\d+))")] test = [ - ("http://www.hentai-foundry.com/pictures/user/Tenpura/340854/notitle", { + (("http://www.hentai-foundry.com/" + "pictures/user/Tenpura/340854/notitle"), { "url": "f3c0739bf86543697deabbed4bf99eb95a04582b", "keyword": "96217c5becc1369c36dafa201c3c208518de8f1f", "content": "5c14cd10deaad79a5152f9de45c9203cf76165a0", @@ -146,10 +154,11 @@ class HentaifoundryImageExtractor(Extractor): response = self.session.get(url + "?enterAgree=1") if response.status_code == 404: raise exception.NotFoundError("image") + extr = text.extract page = response.text - artist, pos = text.extract(page, ' » ', '<', pos) - url , pos = text.extract(page, '//pictures.hentai-foundry.com', '"', pos) + artist, pos = extr(page, ' » ', '<', pos) + url , pos = extr(page, '//pictures.hentai-foundry.com', '"', pos) data = { "artist": artist, "index": self.index, diff --git a/gallery_dl/extractor/hentaihere.py b/gallery_dl/extractor/hentaihere.py index ea35afd5..dbea69a3 100644 --- a/gallery_dl/extractor/hentaihere.py +++ b/gallery_dl/extractor/hentaihere.py @@ -12,6 +12,7 @@ from .. import text from . import hentaicdn import re + class HentaihereMangaExtractor(hentaicdn.HentaicdnMangaExtractor): """Extractor for mangas from hentaihere.com""" category = "hentaihere" @@ -32,7 +33,7 @@ class HentaihereMangaExtractor(hentaicdn.HentaicdnMangaExtractor): def get_chapters(self): return text.extract_iter( self.request("http://hentaihere.com/m/S" + self.gid).text, - '
  • \n\n", "")[0] diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index cc7dcc94..c5385cb9 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -12,6 +12,7 @@ from .common import Extractor, Message from .. import text, iso639_1 import string + class HitomiGalleryExtractor(Extractor): """Extractor for image galleries from hitomi.la""" category = "hitomi" @@ -29,7 +30,8 @@ class HitomiGalleryExtractor(Extractor): self.gid = match.group(1) def items(self): - page = self.request("https://hitomi.la/galleries/" + self.gid + ".html").text + url = "https://hitomi.la/galleries/" + self.gid + ".html" + page = self.request(url).text data = self.get_job_metadata(page) images = self.get_image_urls(page) data["count"] = len(images) @@ -41,8 +43,8 @@ class HitomiGalleryExtractor(Extractor): def get_job_metadata(self, page): """Collect metadata for extractor-job""" - group = "" - gtype = "" + group = "" + gtype = "" series = "" _ , pos = text.extract(page, '

    ', "", pos) diff --git a/gallery_dl/extractor/imagebam.py b/gallery_dl/extractor/imagebam.py index d0fd07c8..3e27f591 100644 --- a/gallery_dl/extractor/imagebam.py +++ b/gallery_dl/extractor/imagebam.py @@ -11,6 +11,7 @@ from .common import Extractor, AsynchronousExtractor, Message from .. import text + class ImagebamGalleryExtractor(AsynchronousExtractor): """Extractor for image galleries from imagebam.com""" category = "imagebam" @@ -18,7 +19,8 @@ class ImagebamGalleryExtractor(AsynchronousExtractor): directory_fmt = ["{category}", "{title} - {gallery-key}"] filename_fmt = "{num:>03}-{filename}" pattern = [r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([^/]+).*"] - test = [("http://www.imagebam.com/gallery/adz2y0f9574bjpmonaismyrhtjgvey4o", { + test = [(("http://www.imagebam.com/" + "gallery/adz2y0f9574bjpmonaismyrhtjgvey4o"), { "url": "d7a4483b6d5ebba81950a349aad58ae034c60eda", "keyword": "e4a9395dbd06d4af3172a6a61c90601bc47ee18c", "content": "596e6bfa157f2c7169805d50075c2986549973a8", @@ -56,13 +58,15 @@ class ImagebamGalleryExtractor(AsynchronousExtractor): done = False while not done: page = self.request(self.url_base + url).text - _ , pos = text.extract(page, 'class="btn btn-default" title="Next">', '') + pos = text.extract( + page, 'class="btn btn-default" title="Next">', '' + )[1] if pos == 0: done = True else: url, pos = text.extract(page, ' href="', '"', pos-70) - image_id , pos = text.extract(page, '', '', pos) + section , pos = text.extract( + page, '', '', pos + ) json_dict = json.loads(json_data) json_dict["section"] = section return json_dict - class ImagefapUserExtractor(Extractor): """Extractor for all galleries from a user at imagefap.com""" category = "imagefap" subcategory = "user" directory_fmt = ["{category}", "{gallery-id} {title}"] filename_fmt = "{category}_{gallery-id}_{name}.{extension}" - pattern = [r"(?:https?://)?(?:www\.)?imagefap\.com/profile(?:\.php\?user=|/)([^/]+)", - r"(?:https?://)?(?:www\.)?imagefap\.com/usergallery\.php\?userid=(\d+)"] + pattern = [(r"(?:https?://)?(?:www\.)?imagefap\.com/" + r"profile(?:\.php\?user=|/)([^/]+)"), + (r"(?:https?://)?(?:www\.)?imagefap\.com/" + r"usergallery\.php\?userid=(\d+)")] test = [("http://www.imagefap.com/profile/Mr Bad Example/galleries", { "url": "145e98a8648c7695c150800ff8fd578ab26c28c1", })] @@ -165,8 +169,9 @@ class ImagefapUserExtractor(Extractor): if self.user: url = "http://www.imagefap.com/profile/" + self.user + "/galleries" else: - url = "http://www.imagefap.com/usergallery.php?userid=" + str(self.user_id) + url = ("http://www.imagefap.com/usergallery.php?userid=" + + str(self.user_id)) page = self.request(url).text self.user_id, pos = text.extract(page, '?userid=', '"') - folders , pos = text.extract(page, ' id="tgl_all" value="', '"', pos) + folders, pos = text.extract(page, ' id="tgl_all" value="', '"', pos) return folders.split("|")[:-1] diff --git a/gallery_dl/extractor/imagehosts.py b/gallery_dl/extractor/imagehosts.py index 37f0fea1..30ea3e46 100644 --- a/gallery_dl/extractor/imagehosts.py +++ b/gallery_dl/extractor/imagehosts.py @@ -13,6 +13,7 @@ from .. import text from os.path import splitext from urllib.parse import urljoin + class ImagehostImageExtractor(Extractor): """Base class for single-image extractors for various imagehosts""" subcategory = "image" @@ -76,13 +77,16 @@ class ImgytImageExtractor(ImagehostImageExtractor): filename += splitext(url)[1] if filename else url return url, filename + class RapidimgImageExtractor(ImgytImageExtractor): """Extractor for single images from rapidimg.net""" category = "rapidimg" - pattern = [r"(?:https?://)?((?:www\.)?rapidimg\.net/img-([a-z0-9]+)\.html)"] + pattern = [r"(?:https?://)?((?:www\.)?rapidimg\.net/" + r"img-([a-z0-9]+)\.html)"] test = [] https = False + class FapatImageExtractor(ImgytImageExtractor): """Extractor for single images from fapat.me""" category = "fapat" @@ -108,6 +112,7 @@ class ChronosImageExtractor(ImagehostImageExtractor): filename, pos = text.extract(page, ' alt="', '"', pos) return url, filename + class CoreimgImageExtractor(ChronosImageExtractor): """Extractor for single images from coreimg.net""" category = "coreimg" @@ -118,6 +123,7 @@ class CoreimgImageExtractor(ChronosImageExtractor): "content": "0c8768055e4e20e7c7259608b67799171b691140", })] + class ImgmaidImageExtractor(ChronosImageExtractor): """Extractor for single images from imgmaid.net""" category = "imgmaid" @@ -125,6 +131,7 @@ class ImgmaidImageExtractor(ChronosImageExtractor): test = [] https = True + class PicmaniacImageExtractor(ChronosImageExtractor): """Extractor for single images from pic-maniac.com""" category = "picmaniac" @@ -149,6 +156,7 @@ class HosturimageImageExtractor(ImagehostImageExtractor): url, pos = text.extract(page, "src='", "'", pos) return url, url + class ImageontimeImageExtractor(HosturimageImageExtractor): """Extractor for single images from imageontime.org""" category = "imageontime" @@ -157,6 +165,7 @@ class ImageontimeImageExtractor(HosturimageImageExtractor): test = [] https = False + class Img4everImageExtractor(HosturimageImageExtractor): """Extractor for single images from img4ever.net""" category = "img4ever" @@ -165,6 +174,7 @@ class Img4everImageExtractor(HosturimageImageExtractor): test = [] https = True + class ImguploadImageExtractor(HosturimageImageExtractor): """Extractor for single images from imgupload.yt""" category = "imgupload" @@ -184,10 +194,12 @@ class ImgspotImageExtractor(ImagehostImageExtractor): url = text.extract(page, "\n', pos) - _ , pos = text.extract(page, '\n', pos) + _ , pos = text.extract(page, '\r\n\t\t\r\n\t\t", "") - url , pos = text.extract(page, '

    03}{chapter-minor} - {title}"] - filename_fmt = "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}" + directory_fmt = ["{category}", "{manga}", + "c{chapter:>03}{chapter-minor} - {title}"] + filename_fmt = ("{manga}_c{chapter:>03}{chapter-minor}_" + "{page:>03}.{extension}") root = "http://kissmanga.com" def __init__(self, match): @@ -77,8 +80,8 @@ class KissmangaChapterExtractor(KissmangaExtractor): """Collect metadata for extractor-job""" manga, pos = text.extract(page, "Read manga\n", "\n") cinfo, pos = text.extract(page, "", "\n", pos) - match = re.match( - r"(?:Vol.0*(\d+) )?(?:Ch.)?0*(\d+)(?:\.0*(\d+))?(?:: (.+))?", cinfo) + match = re.match((r"(?:Vol.0*(\d+) )?(?:Ch.)?0*(\d+)" + r"(?:\.0*(\d+))?(?:: (.+))?"), cinfo) chminor = match.group(3) return { "manga": manga, diff --git a/gallery_dl/extractor/konachan.py b/gallery_dl/extractor/konachan.py index 833a31f8..8eda1095 100644 --- a/gallery_dl/extractor/konachan.py +++ b/gallery_dl/extractor/konachan.py @@ -10,11 +10,13 @@ from . import booru + class KonachanExtractor(booru.JSONBooruExtractor): """Base class for konachan extractors""" category = "konachan" api_url = "https://konachan.com/post.json" + class KonachanTagExtractor(KonachanExtractor, booru.BooruTagExtractor): """Extractor for images from konachan.com based on search-tags""" subcategory = "tag" @@ -23,6 +25,7 @@ class KonachanTagExtractor(KonachanExtractor, booru.BooruTagExtractor): "content": "838cfb815e31f48160855435655ddf7bfc4ecb8d", })] + class KonachanPoolExtractor(KonachanExtractor, booru.BooruPoolExtractor): """Extractor for image-pools from konachan.com""" subcategory = "pool" @@ -31,6 +34,7 @@ class KonachanPoolExtractor(KonachanExtractor, booru.BooruPoolExtractor): "content": "cf0546e38a93c2c510a478f8744e60687b7a8426", })] + class KonachanPostExtractor(KonachanExtractor, booru.BooruPostExtractor): """Extractor for single images from konachan.com""" subcategory = "post" diff --git a/gallery_dl/extractor/luscious.py b/gallery_dl/extractor/luscious.py index 3faf42dd..ee1df898 100644 --- a/gallery_dl/extractor/luscious.py +++ b/gallery_dl/extractor/luscious.py @@ -12,6 +12,7 @@ from .common import Extractor, Message from .. import text, iso639_1 import re + class LusciousAlbumExtractor(Extractor): """Extractor for image albums from luscious.net""" category = "luscious" @@ -21,7 +22,8 @@ class LusciousAlbumExtractor(Extractor): pattern = [(r"(?:https?://)?(?:www\.)?luscious\.net/" r"(?:c/[^/]+/)?(?:pictures/album|albums)/([^/]+_(\d+))")] test = [ - ("https://luscious.net/c/hentai_manga/albums/okinami-no-koigokoro_277031/view/", { + (("https://luscious.net/c/hentai_manga/albums/" + "okinami-no-koigokoro_277031/view/"), { "url": "7e4984a271a1072ac6483e4228a045895aff86f3", "keyword": "3b3d36b355fa6a1a6c24be374ae16e6e9b0c729e", "content": "b3a747a6464509440bd0ff6d1267e6959f8d6ff3", @@ -74,7 +76,7 @@ class LusciousAlbumExtractor(Extractor): while True: data = self.request(apiurl.format(pnum)).json() page = data["html"] - pos = 0 + pos = 0 while True: imgid, pos = text.extract(page, 'container" id="', '"', pos) if not imgid: diff --git a/gallery_dl/extractor/mangafox.py b/gallery_dl/extractor/mangafox.py index 7ac7dcfb..83a23563 100644 --- a/gallery_dl/extractor/mangafox.py +++ b/gallery_dl/extractor/mangafox.py @@ -18,10 +18,12 @@ class MangafoxChapterExtractor(AsynchronousExtractor): category = "mangafox" subcategory = "chapter" directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor}"] - filename_fmt = "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}" + filename_fmt = ("{manga}_c{chapter:>03}{chapter-minor}_" + "{page:>03}.{extension}") pattern = [(r"(?:https?://)?(?:www\.)?(mangafox\.me/manga/" r"[^/]+/(v\d+/)?c\d+[^/]*)")] - test = [("http://mangafox.me/manga/kidou_keisatsu_patlabor/v05/c006.2/1.html", { + test = [(("http://mangafox.me/manga/kidou_keisatsu_patlabor/" + "v05/c006.2/1.html"), { "keyword": "3bae0396e96868f5f24dff5e547a6bbfcbed7282", "content": "5c50c252dcf12ffecf68801f4db8a2167265f66c", })] diff --git a/gallery_dl/extractor/mangahere.py b/gallery_dl/extractor/mangahere.py index 3b7a3fb1..0e29b4b9 100644 --- a/gallery_dl/extractor/mangahere.py +++ b/gallery_dl/extractor/mangahere.py @@ -12,6 +12,7 @@ from .common import Extractor, AsynchronousExtractor, Message from .. import text import re + class MangahereMangaExtractor(Extractor): """Extractor for mangas from mangahere.co""" category = "mangahere" @@ -33,10 +34,10 @@ class MangahereMangaExtractor(Extractor): def get_chapters(self): """Return a list of all chapter urls""" page = self.request(self.url).text - return reversed(list( - text.extract_iter(page, '')) - )) + return reversed(list(text.extract_iter( + page, '') + ))) class MangahereChapterExtractor(AsynchronousExtractor): @@ -44,7 +45,8 @@ class MangahereChapterExtractor(AsynchronousExtractor): category = "mangahere" subcategory = "chapter" directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor}"] - filename_fmt = "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}" + filename_fmt = ("{manga}_c{chapter:>03}{chapter-minor}_" + "{page:>03}.{extension}") pattern = [(r"(?:https?://)?(?:www\.)?mangahere\.co/manga/" r"([^/]+(?:/v0*(\d+))?/c0*(\d+)(\.\d+)?)")] test = [("http://www.mangahere.co/manga/dongguo_xiaojie/c003.2/", { @@ -60,10 +62,13 @@ class MangahereChapterExtractor(AsynchronousExtractor): def items(self): page = self.request(self.url_fmt.format(self.part, 1)).text data = self.get_job_metadata(page) + urls = zip( + range(1, int(data["count"])+1), + self.get_image_urls(page), + ) yield Message.Version, 1 yield Message.Directory, data.copy() - for i, url in zip(range(int(data["count"])), self.get_image_urls(page)): - data["page"] = i+1 + for data["page"], url in urls: text.nameext_from_url(url, data) yield Message.Url, url, data.copy() @@ -74,7 +79,8 @@ class MangahereChapterExtractor(AsynchronousExtractor): _ , pos = text.extract(page, '')[0] + page = text.extract( + page, '