diff --git a/gallery_dl/extractor/3dbooru.py b/gallery_dl/extractor/3dbooru.py index c303a6d4..a7986880 100644 --- a/gallery_dl/extractor/3dbooru.py +++ b/gallery_dl/extractor/3dbooru.py @@ -8,24 +8,25 @@ """Extract image-urls from http://behoimi.org/""" -from .booru import JSONBooruExtractor +from . import booru -info = { - "category": "3dbooru", - "extractor": "ThreeDeeBooruExtractor", - "directory": ["{category}", "{tags}"], - "filename": "{category}_{id}_{md5}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?behoimi\.org/post(?:/(?:index)?)?\?tags=([^&]+).*", - ], -} +class ThreeDeeBooruExtractor(booru.JSONBooruExtractor): + """Base class for 3dbooru extractors""" + category = "3dbooru" + api_url = "http://behoimi.org/post/index.json" + headers = { + "Referer": "http://behoimi.org/post/show/", + "User-Agent": "Mozilla/5.0", + } -class ThreeDeeBooruExtractor(JSONBooruExtractor): +class ThreeDeeBooruTagExtractor(ThreeDeeBooruExtractor, booru.BooruTagExtractor): + """Extract images from 3dbooru based on search-tags""" + pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post(?:/(?:index)?)?\?tags=([^&]+)"] - def __init__(self, match): - JSONBooruExtractor.__init__(self, match, info) - self.api_url = "http://behoimi.org/post/index.json" - self.headers = { - "Referer": "http://behoimi.org/post/show/", - "User-Agent": "Mozilla/5.0" - } +class ThreeDeeBooruPoolExtractor(ThreeDeeBooruExtractor, booru.BooruPoolExtractor): + """Extract image-pools from 3dbooru""" + pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(\d+)"] + +class ThreeDeeBooruPostExtractor(ThreeDeeBooruExtractor, booru.BooruPostExtractor): + """Extract single images from 3dbooru""" + pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(\d+)"] diff --git a/gallery_dl/extractor/4chan.py b/gallery_dl/extractor/4chan.py index 9aab90a2..b28c25dc 100644 --- a/gallery_dl/extractor/4chan.py +++ b/gallery_dl/extractor/4chan.py @@ -10,23 +10,14 @@ from .chan import ChanExtractor -info = { - "category": "4chan", - "extractor": "FourChanExtractor", - "directory": ["{category}", "{board}-{thread}"], - "filename": "{tim}-{filename}{ext}", - "pattern": [ - r"(?:https?://)?boards\.4chan\.org/([^/]+)/thread/(\d+).*", - ], -} - class FourChanExtractor(ChanExtractor): + category = "4chan" + pattern = [r"(?:https?://)?boards\.4chan\.org/([^/]+)/thread/(\d+)"] api_url = "https://a.4cdn.org/{board}/thread/{thread}.json" file_url = "https://i.4cdn.org/{board}/{tim}{ext}" def __init__(self, match): ChanExtractor.__init__( - self, info["category"], - match.group(1), match.group(2) + self, match.group(1), match.group(2) ) diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py index c21b4595..5dcd200f 100644 --- a/gallery_dl/extractor/8chan.py +++ b/gallery_dl/extractor/8chan.py @@ -10,23 +10,14 @@ from .chan import ChanExtractor -info = { - "category": "8chan", - "extractor": "InfinityChanExtractor", - "directory": ["{category}", "{board}-{thread}"], - "filename": "{tim}-{filename}{ext}", - "pattern": [ - r"(?:https?://)?(?:www\.)?8ch\.net/([^/]+)/res/(\d+).*", - ], -} - class InfinityChanExtractor(ChanExtractor): + category = "8chan" + pattern = [r"(?:https?://)?(?:www\.)?8ch\.net/([^/]+)/res/(\d+)"] api_url = "https://8ch.net/{board}/res/{thread}.json" file_url = "https://8ch.net/{board}/src/{tim}{ext}" def __init__(self, match): ChanExtractor.__init__( - self, info["category"], - match.group(1), match.group(2) + self, match.group(1), match.group(2) ) diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 2898ffe5..7a69d976 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -46,11 +46,11 @@ modules = [ def find(url): """Find extractor suitable for handling the given url""" - for pattern, module, klass in _list_patterns(): + for pattern, klass in _list_patterns(): match = re.match(pattern, url) if match: - return klass(match), module.info - return None, None + return klass(match) + return None # -------------------------------------------------------------------- # internals @@ -59,15 +59,22 @@ _cache = [] _module_iter = iter(modules) def _list_patterns(): - """Yield all available (pattern, module, klass) tuples""" + """Yield all available (pattern, info, class) tuples""" for entry in _cache: yield entry for module_name in _module_iter: module = importlib.import_module("."+module_name, __package__) - klass = getattr(module, module.info["extractor"]) - userpatterns = config.get(("extractor", module_name, "pattern"), default=[]) - for pattern in userpatterns + module.info["pattern"]: - etuple = (pattern, module, klass) - _cache.append(etuple) - yield etuple + for klass in _get_classes(module): + for pattern in klass.pattern: + etuple = (pattern, klass) + _cache.append(etuple) + yield etuple + +def _get_classes(module): + """Return a list of all extractor classes in a module""" + return [ + klass for klass in module.__dict__.values() if ( + hasattr(klass, "pattern") and klass.__module__ == module.__name__ + ) + ] diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py index 0e1fce53..ec00eef8 100644 --- a/gallery_dl/extractor/batoto.py +++ b/gallery_dl/extractor/batoto.py @@ -10,21 +10,14 @@ from .common import AsynchronousExtractor, Message from .. import text, iso639_1 -import os.path import re -info = { - "category": "batoto", - "extractor": "BatotoExtractor", - "directory": ["{category}", "{manga}", "c{chapter:>03} - {title}"], - "filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)", - ], -} - class BatotoExtractor(AsynchronousExtractor): + category = "batoto" + directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"] + filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}" + pattern = [r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)"] url = "https://bato.to/areader" def __init__(self, match): @@ -68,7 +61,7 @@ class BatotoExtractor(AsynchronousExtractor): manga, pos = extr(page, "document.title = '", " - ", pos) match = re.match(r"(Vol.(\d+) )?Ch.(\d+)([^:]*)(: (.+))?", cinfo) return { - "category": info["category"], + "category": self.category, "token": self.token, "manga": manga, "volume": match.group(2) or "", diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py index 200bbf77..b5d7323e 100644 --- a/gallery_dl/extractor/booru.py +++ b/gallery_dl/extractor/booru.py @@ -12,20 +12,20 @@ from .common import Extractor, Message from .. import text import xml.etree.ElementTree as ET import json -import os.path import urllib.parse class BooruExtractor(Extractor): + info = {} + headers = {} + page = "page" api_url = "" + category = "" - def __init__(self, match, info): + def __init__(self): Extractor.__init__(self) - self.info = info - self.tags = text.unquote(match.group(1)) - self.page = "page" - self.params = {"tags": self.tags} - self.headers = {} + self.params = {"limit": 50} + self.setup() def items(self): yield Message.Version, 1 @@ -40,6 +40,9 @@ class BooruExtractor(Extractor): def items_impl(self): pass + def setup(self): + pass + def update_page(self, reset=False): """Update the value of the 'page' parameter""" # Override this method in derived classes if necessary. @@ -51,14 +54,14 @@ class BooruExtractor(Extractor): def get_job_metadata(self): """Collect metadata for extractor-job""" + # Override this method in derived classes return { - "category": self.info["category"], - "tags": self.tags + "category": self.category, } def get_file_metadata(self, data): """Collect metadata for a downloadable file""" - data["category"] = self.info["category"] + data["category"] = self.category return text.nameext_from_url(self.get_file_url(data), data) def get_file_url(self, data): @@ -78,10 +81,10 @@ class JSONBooruExtractor(BooruExtractor): self.request(self.api_url, verify=True, params=self.params, headers=self.headers).text ) - if len(images) == 0: - return for data in images: yield data + if len(images) < self.params["limit"]: + return self.update_page() @@ -93,8 +96,56 @@ class XMLBooruExtractor(BooruExtractor): root = ET.fromstring( self.request(self.api_url, verify=True, params=self.params).text ) - if len(root) == 0: - return for item in root: yield item.attrib + if len(root) < self.params["limit"]: + return self.update_page() + + +class BooruTagExtractor(BooruExtractor): + """Extract images based on search-tags""" + + directory_fmt = ["{category}", "{tags}"] + filename_fmt = "{category}_{id}_{md5}.{extension}" + + def __init__(self, match): + BooruExtractor.__init__(self) + self.tags = text.unquote(match.group(1)) + self.params["tags"] = self.tags + + def get_job_metadata(self): + return { + "category": self.category, + "tags": self.tags, + } + + +class BooruPoolExtractor(BooruExtractor): + """Extract image-pools""" + + directory_fmt = ["{category}", "pool", "{pool}"] + filename_fmt = "{category}_{id}_{md5}.{extension}" + + def __init__(self, match): + BooruExtractor.__init__(self) + self.pool = match.group(1) + self.params["tags"] = "pool:" + self.pool + + def get_job_metadata(self): + return { + "category": self.category, + "pool": self.pool, + } + + +class BooruPostExtractor(BooruExtractor): + """Extract single images""" + + directory_fmt = ["{category}"] + filename_fmt = "{category}_{id}_{md5}.{extension}" + + def __init__(self, match): + BooruExtractor.__init__(self) + self.post = match.group(1) + self.params["tags"] = "id:" + self.post diff --git a/gallery_dl/extractor/chan.py b/gallery_dl/extractor/chan.py index 68217d58..1dab5fd4 100644 --- a/gallery_dl/extractor/chan.py +++ b/gallery_dl/extractor/chan.py @@ -13,13 +13,15 @@ from .. import text class ChanExtractor(Extractor): + directory_fmt = ["{category}", "{board}-{thread}"] + filename_fmt = "{tim}-{filename}{ext}" api_url = "" file_url = "" - def __init__(self, category, board, thread): + def __init__(self, board, thread): Extractor.__init__(self) self.metadata = { - "category": category, + "category": self.category, "board": board, "thread": thread, } diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 47765d21..389fd4a2 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -8,20 +8,21 @@ """Extract image-urls from https://danbooru.donmai.us/""" -from .booru import JSONBooruExtractor +from . import booru -info = { - "category": "danbooru", - "extractor": "DanbooruExtractor", - "directory": ["{category}", "{tags}"], - "filename": "{category}_{id}_{md5}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts\?(?:utf8=%E2%9C%93&)?tags=([^&]+).*", - ], -} +class DanbooruExtractor(booru.JSONBooruExtractor): + """Base class for danbooru extractors""" + category = "danbooru" + api_url = "https://danbooru.donmai.us/posts.json" -class DanbooruExtractor(JSONBooruExtractor): +class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor): + """Extract images from danbooru based on search-tags""" + pattern = [r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts\?(?:utf8=%E2%9C%93&)?tags=([^&]+)"] - def __init__(self, match): - JSONBooruExtractor.__init__(self, match, info) - self.api_url = "https://danbooru.donmai.us/posts.json" +class DanbooruPoolExtractor(DanbooruExtractor, booru.BooruPoolExtractor): + """Extract image-pools from danbooru""" + pattern = [r"(?:https?://)?(?:www\.)?danbooru.donmai.us/pools/(\d+)"] + +class DanbooruPostExtractor(DanbooruExtractor, booru.BooruPostExtractor): + """Extract single images from danbooru""" + pattern = [r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts/(\d+)"] diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index d1002f02..4553f8c3 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -10,21 +10,15 @@ from .common import AsynchronousExtractor, Message from .. import text -import os.path import re -info = { - "category": "deviantart", - "extractor": "DeviantArtExtractor", - "directory": ["{category}", "{artist}"], - "filename": "{category}_{index}_{title}.{extension}", - "pattern": [ - r"(?:https?://)?([^\.]+)\.deviantart\.com/gallery/.*", - ], -} - class DeviantArtExtractor(AsynchronousExtractor): + category = "deviantart" + directory_fmt = ["{category}", "{artist}"] + filename_fmt = "{category}_{index}_{title}.{extension}" + pattern = [r"(?:https?://)?([^\.]+)\.deviantart\.com/gallery/.*"] + def __init__(self, match): AsynchronousExtractor.__init__(self) self.session.cookies["agegate_state"] = "1" @@ -57,14 +51,14 @@ class DeviantArtExtractor(AsynchronousExtractor): def get_job_metadata(self): """Collect metadata for extractor-job""" return { - "category": info["category"], + "category": self.category, "artist": self.artist, } def get_image_metadata(self, image): """Collect metadata for an image""" match = self.extract_data(image, 'title', - '(.+) by (.+), ([A-Z][a-z]{2} \d+, \d{4}) in') + r'(.+) by (.+), ([A-Z][a-z]{2} \d+, \d{4}) in') if image.startswith(" ismature"): # adult image url, _ = text.extract(image, 'href="', '"') @@ -76,7 +70,7 @@ class DeviantArtExtractor(AsynchronousExtractor): height, pos = text.extract(page, ' height="', '"', pos) else: # normal image - index = self.extract_data(image, 'href', '[^"]+-(\d+)').group(1) + index = self.extract_data(image, 'href', r'[^"]+-(\d+)').group(1) url, pos = text.extract(image, ' data-super-full-img="', '"', match.end()) if url: width , pos = text.extract(image, ' data-super-full-width="', '"', pos) diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py index 7b808f44..c39c86aa 100644 --- a/gallery_dl/extractor/e621.py +++ b/gallery_dl/extractor/e621.py @@ -8,21 +8,24 @@ """Extract image-urls from https://e621.net/""" -from .booru import JSONBooruExtractor +from . import booru -info = { - "category": "e621", - "extractor": "E621Extractor", - "directory": ["{category}", "{tags}"], - "filename": "{category}_{id}_{md5}.{extension}", - "pattern": [ +class E621Extractor(booru.JSONBooruExtractor): + """Base class for e621 extractors""" + category = "e621" + api_url = "https://e621.net/post/index.json" + +class E621TagExtractor(E621Extractor, booru.BooruTagExtractor): + """Extract images from e621 based on search-tags""" + pattern = [ r"(?:https?://)?(?:www\.)?e621\.net/post/index/\d+/([^?]+)", - r"(?:https?://)?(?:www\.)?e621\.net/post\?tags=([^&]+).*" - ], -} + r"(?:https?://)?(?:www\.)?e621\.net/post\?tags=([^&]+)", + ] -class E621Extractor(JSONBooruExtractor): +class E621PoolExtractor(E621Extractor, booru.BooruPoolExtractor): + """Extract image-pools from e621""" + pattern = [r"(?:https?://)?(?:www\.)?e621\.net/pool/show/(\d+)"] - def __init__(self, match): - JSONBooruExtractor.__init__(self, match, info) - self.api_url = "https://e621.net/post/index.json" +class E621PostExtractor(E621Extractor, booru.BooruPostExtractor): + """Extract single images from e621""" + pattern = [r"(?:https?://)?(?:www\.)?e621\.net/post/show/(\d+)"] diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 875ab1b4..b2d697d9 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -13,18 +13,12 @@ from .. import config, text, iso639_1 import time import random -info = { - "category": "exhentai", - "extractor": "ExhentaiExtractor", - "directory": ["{category}", "{gallery-id}"], - "filename": "{gallery-id}_{num:>04}_{imgkey}_{name}.{extension}", - "pattern": [ - r"(?:https?://)?(g\.e-|ex)hentai\.org/g/(\d+)/([\da-f]{10})", - ], -} - class ExhentaiExtractor(Extractor): + category = "exhentai" + directory_fmt = ["{category}", "{gallery-id}"] + filename_fmt = "{gallery-id}_{num:>04}_{imgkey}_{name}.{extension}" + pattern = [r"(?:https?://)?(g\.e-|ex)hentai\.org/g/(\d+)/([\da-f]{10})"] api_url = "http://exhentai.org/api.php" def __init__(self, match): @@ -70,7 +64,7 @@ class ExhentaiExtractor(Extractor): def get_job_metadata(self, page): """Collect metadata for extractor-job""" data = { - "category" : info["category"], + "category" : self.category, "gallery-id" : self.gid, "gallery-token": self.token, } diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py index 11a67f0c..bcf6acee 100644 --- a/gallery_dl/extractor/gelbooru.py +++ b/gallery_dl/extractor/gelbooru.py @@ -8,27 +8,19 @@ """Extract image-urls from http://gelbooru.com/""" -from .booru import XMLBooruExtractor +from . import booru from .. import config -info = { - "category": "gelbooru", - "extractor": "GelbooruExtractor", - "directory": ["{category}", "{tags}"], - "filename": "{category}_{id}_{md5}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?page=post&s=list&tags=([^&]+).*", - ], -} - -class GelbooruExtractor(XMLBooruExtractor): - - def __init__(self, match): - XMLBooruExtractor.__init__(self, match, info) - self.api_url = "http://gelbooru.com/" - self.params = {"page":"dapi", "s":"post", "q":"index", "tags":self.tags} +class GelbooruExtractor(booru.XMLBooruExtractor): + """Base class for gelbooru extractors""" + + category = "gelbooru" + api_url = "http://gelbooru.com/" + + def setup(self): + self.params.update({"page":"dapi", "s":"post", "q":"index"}) self.session.cookies.update( - config.get(("extractor", info["category"], "cookies")) + config.get(("extractor", self.category, "cookies")) ) def update_page(self, reset=False): @@ -36,3 +28,16 @@ class GelbooruExtractor(XMLBooruExtractor): self.params["pid"] += 1 else: self.params["pid"] = 0 + +class GelbooruTagExtractor(GelbooruExtractor, booru.BooruTagExtractor): + """Extract images from gelbooru based on search-tags""" + pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?page=post&s=list&tags=([^&]+)"] + +# TODO: find out how to access pools via gelbooru-api +# class GelbooruPoolExtractor(GelbooruExtractor, booru.BooruPoolExtractor): + # """Extract image-pools from gelbooru""" + # pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?page=pool&s=show&id=(\d+)"] + +class GelbooruPostExtractor(GelbooruExtractor, booru.BooruPostExtractor): + """Extract single images from gelbooru""" + pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?page=post&s=view&id=(\d+)"] diff --git a/gallery_dl/extractor/hbrowse.py b/gallery_dl/extractor/hbrowse.py index 59aa576e..67da9011 100644 --- a/gallery_dl/extractor/hbrowse.py +++ b/gallery_dl/extractor/hbrowse.py @@ -10,20 +10,13 @@ from .common import Extractor, Message from .. import text -import os.path - -info = { - "category": "hbrowse", - "extractor": "HbrowseExtractor", - "directory": ["{category}", "{gallery-id} {title}"], - "filename": "{category}_{gallery-id}_{num:>03}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?hbrowse\.com/(\d+)/(c\d+)", - ], -} class HbrowseExtractor(Extractor): + category = "hbrowse" + directory_fmt = ["{category}", "{gallery-id} {title}"] + filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}" + pattern = [r"(?:https?://)?(?:www\.)?hbrowse\.com/(\d+)/(c\d+)"] url_base = "http://www.hbrowse.com/thumbnails/" def __init__(self, match): @@ -43,7 +36,7 @@ class HbrowseExtractor(Extractor): def get_job_metadata(self, page): """Collect metadata for extractor-job""" data = { - "category": info["category"], + "category": self.category, 'gallery-id': self.gid, 'chapter': int(self.chapter[1:]), } diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index d3fcb362..5c916173 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -12,19 +12,15 @@ from .common import Extractor, Message from .. import text import os.path -info = { - "category": "hentaifoundry", - "extractor": "HentaiFoundryExtractor", - "directory": ["{category}", "{artist}"], - "filename": "{category}_{index}_{title}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?hentai-foundry\.com/pictures/user/([^/]+)", - r"(?:https?://)?(?:www\.)?hentai-foundry\.com/user/([^/]+)/profile", - ], -} - class HentaiFoundryExtractor(Extractor): + category = "hentaifoundry" + directory_fmt = ["{category}", "{artist}"] + filename_fmt = "{category}_{index}_{title}.{extension}" + pattern = [ + r"(?:https?://)?(?:www\.)?hentai-foundry\.com/pictures/user/([^/]+)", + r"(?:https?://)?(?:www\.)?hentai-foundry\.com/user/([^/]+)/profile", + ] url_base = "http://www.hentai-foundry.com/pictures/user/" def __init__(self, match): @@ -60,7 +56,7 @@ class HentaiFoundryExtractor(Extractor): token, pos = text.extract(page, 'hidden" value="', '"') count, pos = text.extract(page, 'class="active" >Pictures (', ')', pos) return { - "category": info["category"], + "category": self.category, "artist": self.artist, "count": count, }, token diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index c7dc49c8..dd341538 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -10,21 +10,15 @@ from .common import Extractor, Message from .. import text, iso639_1 -import os.path import string -info = { - "category": "hitomi", - "extractor": "HitomiExtractor", - "directory": ["{category}", "{gallery-id} {title}"], - "filename": "{category}_{gallery-id}_{num:>03}_{name}.{extension}", - "pattern": [ - r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)\.html", - ], -} - class HitomiExtractor(Extractor): + category = "hitomi" + directory_fmt = ["{category}", "{gallery-id} {title}"] + filename_fmt = "{category}_{gallery-id}_{num:>03}_{name}.{extension}" + pattern = [r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)\.html"] + def __init__(self, match): Extractor.__init__(self) self.gid = match.group(1) @@ -62,7 +56,7 @@ class HitomiExtractor(Extractor): series, pos = text.extract(page, '.html">', '', pos) lang = lang.capitalize() return { - "category": info["category"], + "category": self.category, "gallery-id": self.gid, "title": title, "artist": string.capwords(artist), diff --git a/gallery_dl/extractor/imagebam.py b/gallery_dl/extractor/imagebam.py index c39f74fc..3bd65dbd 100644 --- a/gallery_dl/extractor/imagebam.py +++ b/gallery_dl/extractor/imagebam.py @@ -10,20 +10,13 @@ from .common import AsynchronousExtractor, Message from .. import text -import os.path - -info = { - "category": "imagebam", - "extractor": "ImagebamExtractor", - "directory": ["{category}", "{title} - {gallery-key}"], - "filename": "{num:>03}-{filename}", - "pattern": [ - r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([^/]+).*", - ], -} class ImagebamExtractor(AsynchronousExtractor): + category = "imagebam" + directory_fmt = ["{category}", "{title} - {gallery-key}"] + filename_fmt = "{num:>03}-{filename}" + pattern = [r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([^/]+).*"] url_base = "http://www.imagebam.com" def __init__(self, match): @@ -47,7 +40,7 @@ class ImagebamExtractor(AsynchronousExtractor): response.encoding = "utf-8" page = response.text data = { - "category": info["category"], + "category": self.category, "gallery-key": self.gkey, } data, _ = text.extract_all(page, ( diff --git a/gallery_dl/extractor/imgbox.py b/gallery_dl/extractor/imgbox.py index b3a01dd7..edd4db58 100644 --- a/gallery_dl/extractor/imgbox.py +++ b/gallery_dl/extractor/imgbox.py @@ -12,18 +12,12 @@ from .common import AsynchronousExtractor, Message from .. import text import re -info = { - "category": "imgbox", - "extractor": "ImgboxExtractor", - "directory": ["{category}", "{title} - {gallery-key}"], - "filename": "{num:>03}-{name}", - "pattern": [ - r"(?:https?://)?(?:www\.)?imgbox\.com/g/(.+)", - ], -} - class ImgboxExtractor(AsynchronousExtractor): + category = "imgbox" + directory_fmt = ["{category}", "{title} - {gallery-key}"] + filename_fmt = "{num:>03}-{name}" + pattern = [r"(?:https?://)?(?:www\.)?imgbox\.com/g/(.+)"] url_base = "http://imgbox.com" def __init__(self, match): @@ -44,7 +38,7 @@ class ImgboxExtractor(AsynchronousExtractor): """Collect metadata for extractor-job""" match = re.search(r"

(.+) \(([^ ]+) ([^ ]+) \w+\) - (\d+)", page) return { - "category": info["category"], + "category": self.category, "gallery-key": self.key, "title": match.group(1), "date": match.group(2), @@ -62,7 +56,8 @@ class ImgboxExtractor(AsynchronousExtractor): ), values=data) return data - def get_file_url(self, page): + @staticmethod + def get_file_url(page): """Extract download-url""" base = "http://i.imgbox.com/" path, _ = text.extract(page, base, '"') diff --git a/gallery_dl/extractor/imgchili.py b/gallery_dl/extractor/imgchili.py index d913a0d3..8cd67e06 100644 --- a/gallery_dl/extractor/imgchili.py +++ b/gallery_dl/extractor/imgchili.py @@ -12,18 +12,13 @@ from .common import Extractor, Message from .. import text import re -info = { - "category": "imgchili", - "extractor": "ImgchiliExtractor", - "directory": ["{category}", "{title} - {key}"], - "filename": "{num:>03}-{name}", - "pattern": [ - r"(?:https?://)?(?:www\.)?imgchili\.net/album/([^/]+)", - ], -} - class ImgchiliExtractor(Extractor): + category = "imgchili" + directory_fmt = ["{category}", "{title} - {key}"] + filename_fmt = "{num:>03}-{name}" + pattern = [r"(?:https?://)?(?:www\.)?imgchili\.net/album/([^/]+)"] + def __init__(self, match): Extractor.__init__(self) self.match = match diff --git a/gallery_dl/extractor/imgth.py b/gallery_dl/extractor/imgth.py index 8622e579..7000b18c 100644 --- a/gallery_dl/extractor/imgth.py +++ b/gallery_dl/extractor/imgth.py @@ -10,20 +10,14 @@ from .common import Extractor, Message from .. import text -import os.path - -info = { - "category": "imgth", - "extractor": "ImgthExtractor", - "directory": ["{category}", "{gallery-id} {title}"], - "filename": "{category}_{gallery-id}_{num:>03}.{extension}", - "pattern": [ - r"(?:https?://)?imgth\.com/gallery/(\d+)", - ], -} class ImgthExtractor(Extractor): + category = "imgth" + directory_fmt = ["{category}", "{gallery-id} {title}"] + filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}" + pattern = [r"(?:https?://)?imgth\.com/gallery/(\d+)"] + def __init__(self, match): Extractor.__init__(self) self.gid = match.group(1) @@ -55,7 +49,7 @@ class ImgthExtractor(Extractor): def get_job_metadata(self, page): """Collect metadata for extractor-job""" data = { - "category": info["category"], + "category": self.category, "gallery-id": self.gid, } data, _ = text.extract_all(page, ( diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index 5515c509..355e201b 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -12,18 +12,13 @@ from .common import Extractor, Message from .. import text import os.path -info = { - "category": "imgur", - "extractor": "ImgurExtractor", - "directory": ["{category}", "{album-key} - {title}"], - "filename": "{category}_{album-key}_{num:>03}_{name}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?imgur\.com/(?:a|gallery)/([^/?&#]+)", - ], -} - class ImgurExtractor(Extractor): + category = "imgur" + directory_fmt = ["{category}", "{album-key} - {title}"] + filename_fmt = "{category}_{album-key}_{num:>03}_{name}.{extension}" + pattern = [r"(?:https?://)?(?:www\.)?imgur\.com/(?:a|gallery)/([^/?&#]+)"] + def __init__(self, match): Extractor.__init__(self) self.album = match.group(1) @@ -43,7 +38,7 @@ class ImgurExtractor(Extractor): """Collect metadata for extractor-job""" page = self.request("https://imgur.com/a/" + self.album).text data = { - "category": info["category"], + "category": self.category, "album-key": self.album, } return text.extract_all(page, ( diff --git a/gallery_dl/extractor/kissmanga.py b/gallery_dl/extractor/kissmanga.py index 5e1edcbc..4b1cad15 100644 --- a/gallery_dl/extractor/kissmanga.py +++ b/gallery_dl/extractor/kissmanga.py @@ -10,21 +10,15 @@ from .common import Extractor, Message from .. import text, cloudflare -import os.path import re -info = { - "category": "kissmanga", - "extractor": "KissmangaExtractor", - "directory": ["{category}", "{manga}", "c{chapter:>03}{chapter-minor} - {title}"], - "filename": "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?kissmanga\.com/Manga/.+/.+\?id=\d+", - ], -} - class KissmangaExtractor(Extractor): + category = "kissmanga" + directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor} - {title}"] + filename_fmt = "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}" + pattern = [r"(?:https?://)?(?:www\.)?kissmanga\.com/Manga/.+/.+\?id=\d+"] + def __init__(self, match): Extractor.__init__(self) self.url = match.group(0) @@ -41,8 +35,7 @@ class KissmangaExtractor(Extractor): data["page"] = num yield Message.Url, url, text.nameext_from_url(url, data) - @staticmethod - def get_job_metadata(page): + def get_job_metadata(self, page): """Collect metadata for extractor-job""" manga, pos = text.extract(page, "Read manga\n", "\n") cinfo, pos = text.extract(page, "", "\n", pos) @@ -50,7 +43,7 @@ class KissmangaExtractor(Extractor): r"(?:Vol.0*(\d+) )?(?:Ch.)?0*(\d+)(?:\.0*(\d+))?(?:: (.+))?", cinfo) chminor = match.group(3) return { - "category": info["category"], + "category": self.category, "manga": manga, "volume": match.group(1) or "", "chapter": match.group(2), diff --git a/gallery_dl/extractor/konachan.py b/gallery_dl/extractor/konachan.py index 63df2398..c6f0048e 100644 --- a/gallery_dl/extractor/konachan.py +++ b/gallery_dl/extractor/konachan.py @@ -8,20 +8,21 @@ """Extract image-urls from https://konachan.com/""" -from .booru import JSONBooruExtractor +from . import booru -info = { - "category": "konachan", - "extractor": "KonachanExtractor", - "directory": ["{category}", "{tags}"], - "filename": "{category}_{id}_{md5}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?konachan\.com/post\?tags=([^&]+).*", - ], -} +class KonachanExtractor(booru.JSONBooruExtractor): + """Base class for konachan extractors""" + category = "konachan" + api_url = "https://konachan.com/post.json" -class KonachanExtractor(JSONBooruExtractor): +class KonachanTagExtractor(KonachanExtractor, booru.BooruTagExtractor): + """Extract images from konachan based on search-tags""" + pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/post\?tags=([^&]+)"] - def __init__(self, match): - JSONBooruExtractor.__init__(self, match, info) - self.api_url = "http://konachan.com/post.json" +class KonachanPoolExtractor(KonachanExtractor, booru.BooruPoolExtractor): + """Extract image-pools from konachan""" + pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/pool/show/(\d+)"] + +class KonachanPostExtractor(KonachanExtractor, booru.BooruPostExtractor): + """Extract single images from konachan""" + pattern = [r"(?:https?://)?(?:www\.)?konachan\.com/post/show/(\d+)"] diff --git a/gallery_dl/extractor/mangapanda.py b/gallery_dl/extractor/mangapanda.py index b5db2679..5a09dd59 100644 --- a/gallery_dl/extractor/mangapanda.py +++ b/gallery_dl/extractor/mangapanda.py @@ -10,18 +10,13 @@ from .mangareader import MangaReaderExtractor -info = { - "category": "mangapanda", - "extractor": "MangaPandaExtractor", - "directory": ["{category}", "{manga}", "c{chapter:>03} - {title}"], - "filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/]+)/(\d+))", - r"(?:https?://)?(?:www\.)?mangapanda\.com(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)", - ], -} - class MangaPandaExtractor(MangaReaderExtractor): - category = info["category"] + category = "mangapanda" + directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"] + filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}" + pattern = [ + r"(?:https?://)?(?:www\.)?mangapanda\.com((/[^/]+)/(\d+))", + r"(?:https?://)?(?:www\.)?mangapanda\.com(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)", + ] url_base = "http://www.mangapanda.com" diff --git a/gallery_dl/extractor/mangareader.py b/gallery_dl/extractor/mangareader.py index 372969b3..111f9a10 100644 --- a/gallery_dl/extractor/mangareader.py +++ b/gallery_dl/extractor/mangareader.py @@ -10,22 +10,16 @@ from .common import AsynchronousExtractor, Message from .. import text -import os.path - -info = { - "category": "mangareader", - "extractor": "MangaReaderExtractor", - "directory": ["{category}", "{manga}", "c{chapter:>03} - {title}"], - "filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/]+)/(\d+))", - r"(?:https?://)?(?:www\.)?mangareader\.net(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)", - ], -} class MangaReaderExtractor(AsynchronousExtractor): - category = info["category"] + category = "mangareader" + directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"] + filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}" + pattern = [ + r"(?:https?://)?(?:www\.)?mangareader\.net((/[^/]+)/(\d+))", + r"(?:https?://)?(?:www\.)?mangareader\.net(/\d+-\d+-\d+(/[^/]+)/chapter-(\d+).html)", + ] url_base = "http://www.mangareader.net" def __init__(self, match): diff --git a/gallery_dl/extractor/mangashare.py b/gallery_dl/extractor/mangashare.py index 1665a017..67e2007e 100644 --- a/gallery_dl/extractor/mangashare.py +++ b/gallery_dl/extractor/mangashare.py @@ -10,20 +10,13 @@ from .common import AsynchronousExtractor, Message from .. import text -import os - -info = { - "category": "mangashare", - "extractor": "MangaShareExtractor", - "directory": ["{category}", "{manga}", "c{chapter:>03} - {title}"], - "filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}", - "pattern": [ - r"(?:https?://)?read\.mangashare\.com/([^/]+/chapter-\d+)", - ], -} class MangaShareExtractor(AsynchronousExtractor): + category = "mangashare" + directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"] + filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}" + pattern = [r"(?:https?://)?read\.mangashare\.com/([^/]+/chapter-\d+)"] url_fmt = "http://read.mangashare.com/{}/page{:>03}.html" def __init__(self, match): @@ -40,11 +33,10 @@ class MangaShareExtractor(AsynchronousExtractor): text.nameext_from_url(url, data) yield Message.Url, url, data.copy() - @staticmethod - def get_job_metadata(page): + def get_job_metadata(self, page): """Collect metadata for extractor-job""" data = { - "category": info["category"], + "category": self.category, "lang": "en", "language": "English", } diff --git a/gallery_dl/extractor/mangastream.py b/gallery_dl/extractor/mangastream.py index ca6c1175..ae009944 100644 --- a/gallery_dl/extractor/mangastream.py +++ b/gallery_dl/extractor/mangastream.py @@ -10,20 +10,13 @@ from .common import AsynchronousExtractor, Message from .. import text -import os.path - -info = { - "category": "mangastream", - "extractor": "MangaStreamExtractor", - "directory": ["{category}", "{manga}", "c{chapter:>03}{chapter-minor} - {title}"], - "filename": "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?readms\.com/r/([^/]*/(\d+)([^/]*)?/(\d+))", - ], -} class MangaStreamExtractor(AsynchronousExtractor): + category = "mangastream" + directory_fmt = ["{category}", "{manga}", "c{chapter:>03}{chapter-minor} - {title}"] + filename_fmt = "{manga}_c{chapter:>03}{chapter-minor}_{page:>03}.{extension}" + pattern = [r"(?:https?://)?(?:www\.)?readms\.com/r/([^/]*/(\d+)([^/]*)?/(\d+))"] url_base = "https://readms.com/r/" def __init__(self, match): @@ -47,7 +40,7 @@ class MangaStreamExtractor(AsynchronousExtractor): def get_job_metadata(self, page): """Collect metadata for extractor-job""" data = { - "category": info["category"], + "category": self.category, "chapter": self.chapter, "chapter-minor": self.ch_minor, "chapter-id": self.ch_id, @@ -61,7 +54,8 @@ class MangaStreamExtractor(AsynchronousExtractor): ), values=data) return data - def get_page_metadata(self, page): + @staticmethod + def get_page_metadata(page): """Collect next url, image-url and metadata for one manga-page""" nurl, pos = text.extract(page, '
\n03}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?nhentai\.net/g/(\d+)", - ], -} - class NhentaiExtractor(Extractor): + category = "nhentai" + directory_fmt = ["{category}", "{gallery-id} {title}"] + filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}" + pattern = [r"(?:https?://)?(?:www\.)?nhentai\.net/g/(\d+)"] + def __init__(self, match): Extractor.__init__(self) self.gid = match.group(1) @@ -57,7 +52,7 @@ class NhentaiExtractor(Extractor): title_en = ginfo["title"].get("english", "") title_ja = ginfo["title"].get("japanese", "") return { - "category": info["category"], + "category": self.category, "gallery-id": self.gid, "upload-date": ginfo["upload_date"], "media-id": ginfo["media_id"], diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index 8901c427..0249afb1 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -12,18 +12,12 @@ from .common import AsynchronousExtractor, Message from .. import config, text import re -info = { - "category": "nijie", - "extractor": "NijieExtractor", - "directory": ["{category}", "{artist-id}"], - "filename": "{category}_{artist-id}_{image-id}_p{index:>02}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?nijie\.info/members(?:_illust)?\.php\?id=(\d+)", - ], -} - class NijieExtractor(AsynchronousExtractor): + category = "nijie" + directory_fmt = ["{category}", "{artist-id}"] + filename_fmt = "{category}_{artist-id}_{image-id}_p{index:>02}.{extension}" + pattern = [r"(?:https?://)?(?:www\.)?nijie\.info/members(?:_illust)?\.php\?id=(\d+)"] popup_url = "https://nijie.info/view_popup.php?id=" def __init__(self, match): @@ -37,7 +31,7 @@ class NijieExtractor(AsynchronousExtractor): self.session.cookies["R18"] = "1" self.session.cookies["nijie_referer"] = "nijie.info" self.session.cookies.update( - config.get(("extractor", info["category"], "cookies")) + config.get(("extractor", self.category, "cookies")) ) def items(self): @@ -52,7 +46,7 @@ class NijieExtractor(AsynchronousExtractor): def get_job_metadata(self): """Collect metadata for extractor-job""" return { - "category": info["category"], + "category": self.category, "artist-id": self.artist_id, } diff --git a/gallery_dl/extractor/pixiv.py b/gallery_dl/extractor/pixiv.py index 975441ed..783d1c15 100644 --- a/gallery_dl/extractor/pixiv.py +++ b/gallery_dl/extractor/pixiv.py @@ -14,19 +14,12 @@ import re import json import time -info = { - "category": "pixiv", - "extractor": "PixivExtractor", - "directory": ["{category}", "{artist-id}-{artist-nick}"], - "filename": "{category}_{artist-id}_{id}{num}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?pixiv\.net/member(?:_illust)?\.php\?id=(\d+)", - ], -} - - class PixivExtractor(Extractor): + category = "pixiv" + directory_fmt = ["{category}", "{artist-id}-{artist-nick}"] + filename_fmt = "{category}_{artist-id}_{id}{num}.{extension}" + pattern = [r"(?:https?://)?(?:www\.)?pixiv\.net/member(?:_illust)?\.php\?id=(\d+)"] member_url = "http://www.pixiv.net/member_illust.php" illust_url = "http://www.pixiv.net/member_illust.php?mode=medium" @@ -121,7 +114,7 @@ class PixivExtractor(Extractor): """Collect metadata for extractor-job""" data = self.api.user(self.artist_id)["response"][0] return { - "category": info["category"], + "category": self.category, "artist-id": self.artist_id, "artist-name": data["name"], "artist-nick": data["account"], diff --git a/gallery_dl/extractor/safebooru.py b/gallery_dl/extractor/safebooru.py index 695fc760..4f09ae42 100644 --- a/gallery_dl/extractor/safebooru.py +++ b/gallery_dl/extractor/safebooru.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014, 2015 Mike Fährmann +# Copyright 2015 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -8,27 +8,27 @@ """Extract image-urls from http://safebooru.org/""" -from .booru import XMLBooruExtractor +from . import booru -info = { - "category": "safebooru", - "extractor": "SafebooruExtractor", - "directory": ["{category}", "{tags}"], - "filename": "{category}_{id}_{md5}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?\?page=post&s=list&tags=([^&]+).*", - ], -} +class SafebooruExtractor(booru.XMLBooruExtractor): + """Base class for safebooru extractors""" -class SafebooruExtractor(XMLBooruExtractor): + category = "safebooru" + api_url = "http://safebooru.org/index.php" - def __init__(self, match): - XMLBooruExtractor.__init__(self, match, info) - self.api_url = "http://safebooru.org/index.php" - self.params = {"page":"dapi", "s":"post", "q":"index", "tags":self.tags} + def setup(self): + self.params.update({"page":"dapi", "s":"post", "q":"index"}) def update_page(self, reset=False): if reset is False: self.params["pid"] += 1 else: self.params["pid"] = 0 + +class SafebooruTagExtractor(SafebooruExtractor, booru.BooruTagExtractor): + """Extract images from safebooru based on search-tags""" + pattern = [r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?\?page=post&s=list&tags=([^&]+)"] + +class SafebooruPostExtractor(SafebooruExtractor, booru.BooruPostExtractor): + """Extract single images from safebooru""" + pattern = [r"(?:https?://)?(?:www\.)?safebooru\.org/(?:index\.php)?\?page=post&s=view&id=(\d+)"] diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index 5d5d5d94..a821dbd6 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -10,20 +10,13 @@ from .common import AsynchronousExtractor, Message from .. import text -import os.path - -info = { - "category": "sankaku", - "extractor": "SankakuExtractor", - "directory": ["{category}", "{tags}"], - "filename": "{category}_{id}_{md5}.{extension}", - "pattern": [ - r"(?:https?://)?chan\.sankakucomplex\.com/\?tags=([^&]+)", - ], -} class SankakuExtractor(AsynchronousExtractor): + category = "sankaku" + directory_fmt = ["{category}", "{tags}"] + filename_fmt = "{category}_{id}_{md5}.{extension}" + pattern = [r"(?:https?://)?chan\.sankakucomplex\.com/\?tags=([^&]+)"] url = "https://chan.sankakucomplex.com/" def __init__(self, match): @@ -45,7 +38,7 @@ class SankakuExtractor(AsynchronousExtractor): def get_job_metadata(self): """Collect metadata for extractor-job""" return { - "category": info["category"], + "category": self.category, "tags": self.tags, } diff --git a/gallery_dl/extractor/spectrumnexus.py b/gallery_dl/extractor/spectrumnexus.py index 4235a439..c4446e5e 100644 --- a/gallery_dl/extractor/spectrumnexus.py +++ b/gallery_dl/extractor/spectrumnexus.py @@ -10,20 +10,16 @@ from .common import AsynchronousExtractor, Message from .. import text -import os.path -info = { - "category": "spectrumnexus", - "extractor": "SpectrumNexusExtractor", - "directory": ["{category}", "{manga}", "c{chapter:>03}"], - "filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}", - "pattern": [ +class SpectrumNexusExtractor(AsynchronousExtractor): + + category = "spectrumnexus" + directory_fmt = ["{category}", "{manga}", "c{chapter:>03}"] + filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}" + pattern = [ r"(?:https?://)?(view\.thespectrum\.net/series/[^\.]+.html)\?ch=Chapter\+(\d+)", r"(?:https?://)?(view\.thespectrum\.net/series/[^/]+-chapter-(\d+)\.html)", - ], -} - -class SpectrumNexusExtractor(AsynchronousExtractor): + ] def __init__(self, match): AsynchronousExtractor.__init__(self) @@ -52,7 +48,7 @@ class SpectrumNexusExtractor(AsynchronousExtractor): def get_job_metadata(self, page): """Collect metadata for extractor-job""" data = { - "category": info["category"], + "category": self.category, "chapter": self.chapter, } return text.extract_all(page, ( diff --git a/gallery_dl/extractor/yandere.py b/gallery_dl/extractor/yandere.py index cd58dad1..c14dabab 100644 --- a/gallery_dl/extractor/yandere.py +++ b/gallery_dl/extractor/yandere.py @@ -8,20 +8,21 @@ """Extract image-urls from https://yande.re/""" -from .booru import JSONBooruExtractor +from . import booru -info = { - "category": "yandere", - "extractor": "YandereExtractor", - "directory": ["{category}", "{tags}"], - "filename": "{category}_{id}_{md5}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?yande\.re/post\?tags=([^&]+).*", - ], -} +class YandereExtractor(booru.JSONBooruExtractor): + """Base class for yandere extractors""" + category = "yandere" + api_url = "https://yande.re/post.json" -class YandereExtractor(JSONBooruExtractor): +class YandereTagExtractor(YandereExtractor, booru.BooruTagExtractor): + """Extract images from yandere based on search-tags""" + pattern = [r"(?:https?://)?(?:www\.)?yande\.re/post\?tags=([^&]+)"] - def __init__(self, match): - JSONBooruExtractor.__init__(self, match, info) - self.api_url = "https://yande.re/post.json" +class YanderePoolExtractor(YandereExtractor, booru.BooruPoolExtractor): + """Extract image-pools from yandere""" + pattern = [r"(?:https?://)?(?:www\.)?yande.re/pool/show/(\d+)"] + +class YanderePostExtractor(YandereExtractor, booru.BooruPostExtractor): + """Extract single images from yandere""" + pattern = [r"(?:https?://)?(?:www\.)?yande.re/post/show/(\d+)"] diff --git a/gallery_dl/jobs.py b/gallery_dl/jobs.py index cc61c8ca..fa7676b1 100644 --- a/gallery_dl/jobs.py +++ b/gallery_dl/jobs.py @@ -14,19 +14,19 @@ from .extractor.common import Message class DownloadJob(): def __init__(self, url): - self.extractor, self.info = extractor.find(url) + self.extractor = extractor.find(url) if self.extractor is None: print(url, ": No extractor found", sep="", file=sys.stderr) return self.directory = self.get_base_directory() self.downloaders = {} self.filename_fmt = config.get( - ("extractor", self.info["category"], "filename"), - default=self.info["filename"] + ("extractor", self.extractor.category, "filename"), + default=self.extractor.filename_fmt ) segments = config.get( - ("extractor", self.info["category"], "directory"), - default=self.info["directory"] + ("extractor", self.extractor.category, "directory"), + default=self.extractor.directory_fmt ) self.directory_fmt = os.path.join(*segments) @@ -51,7 +51,7 @@ class DownloadJob(): elif msg[0] == Message.Version: if msg[1] != 1: raise "unsupported message-version ({}, {})".format( - self.info.category, msg[1] + self.extractor.category, msg[1] ) # TODO: support for multiple message versions @@ -118,7 +118,7 @@ class DownloadJob(): class KeywordJob(): def __init__(self, url): - self.extractor, self.info = extractor.find(url) + self.extractor = extractor.find(url) if self.extractor is None: print(url, ": No extractor found", sep="", file=sys.stderr) return