diff --git a/gallery_dl/extractor/3dbooru.py b/gallery_dl/extractor/3dbooru.py index c303a6d4..a7986880 100644 --- a/gallery_dl/extractor/3dbooru.py +++ b/gallery_dl/extractor/3dbooru.py @@ -8,24 +8,25 @@ """Extract image-urls from http://behoimi.org/""" -from .booru import JSONBooruExtractor +from . import booru -info = { - "category": "3dbooru", - "extractor": "ThreeDeeBooruExtractor", - "directory": ["{category}", "{tags}"], - "filename": "{category}_{id}_{md5}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?behoimi\.org/post(?:/(?:index)?)?\?tags=([^&]+).*", - ], -} +class ThreeDeeBooruExtractor(booru.JSONBooruExtractor): + """Base class for 3dbooru extractors""" + category = "3dbooru" + api_url = "http://behoimi.org/post/index.json" + headers = { + "Referer": "http://behoimi.org/post/show/", + "User-Agent": "Mozilla/5.0", + } -class ThreeDeeBooruExtractor(JSONBooruExtractor): +class ThreeDeeBooruTagExtractor(ThreeDeeBooruExtractor, booru.BooruTagExtractor): + """Extract images from 3dbooru based on search-tags""" + pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post(?:/(?:index)?)?\?tags=([^&]+)"] - def __init__(self, match): - JSONBooruExtractor.__init__(self, match, info) - self.api_url = "http://behoimi.org/post/index.json" - self.headers = { - "Referer": "http://behoimi.org/post/show/", - "User-Agent": "Mozilla/5.0" - } +class ThreeDeeBooruPoolExtractor(ThreeDeeBooruExtractor, booru.BooruPoolExtractor): + """Extract image-pools from 3dbooru""" + pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/pool/show/(\d+)"] + +class ThreeDeeBooruPostExtractor(ThreeDeeBooruExtractor, booru.BooruPostExtractor): + """Extract single images from 3dbooru""" + pattern = [r"(?:https?://)?(?:www\.)?behoimi\.org/post/show/(\d+)"] diff --git a/gallery_dl/extractor/4chan.py b/gallery_dl/extractor/4chan.py index 9aab90a2..b28c25dc 100644 --- a/gallery_dl/extractor/4chan.py +++ b/gallery_dl/extractor/4chan.py @@ -10,23 +10,14 @@ from .chan import ChanExtractor -info = { - "category": "4chan", - "extractor": "FourChanExtractor", - "directory": ["{category}", "{board}-{thread}"], - "filename": "{tim}-{filename}{ext}", - "pattern": [ - r"(?:https?://)?boards\.4chan\.org/([^/]+)/thread/(\d+).*", - ], -} - class FourChanExtractor(ChanExtractor): + category = "4chan" + pattern = [r"(?:https?://)?boards\.4chan\.org/([^/]+)/thread/(\d+)"] api_url = "https://a.4cdn.org/{board}/thread/{thread}.json" file_url = "https://i.4cdn.org/{board}/{tim}{ext}" def __init__(self, match): ChanExtractor.__init__( - self, info["category"], - match.group(1), match.group(2) + self, match.group(1), match.group(2) ) diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py index c21b4595..5dcd200f 100644 --- a/gallery_dl/extractor/8chan.py +++ b/gallery_dl/extractor/8chan.py @@ -10,23 +10,14 @@ from .chan import ChanExtractor -info = { - "category": "8chan", - "extractor": "InfinityChanExtractor", - "directory": ["{category}", "{board}-{thread}"], - "filename": "{tim}-{filename}{ext}", - "pattern": [ - r"(?:https?://)?(?:www\.)?8ch\.net/([^/]+)/res/(\d+).*", - ], -} - class InfinityChanExtractor(ChanExtractor): + category = "8chan" + pattern = [r"(?:https?://)?(?:www\.)?8ch\.net/([^/]+)/res/(\d+)"] api_url = "https://8ch.net/{board}/res/{thread}.json" file_url = "https://8ch.net/{board}/src/{tim}{ext}" def __init__(self, match): ChanExtractor.__init__( - self, info["category"], - match.group(1), match.group(2) + self, match.group(1), match.group(2) ) diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 2898ffe5..7a69d976 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -46,11 +46,11 @@ modules = [ def find(url): """Find extractor suitable for handling the given url""" - for pattern, module, klass in _list_patterns(): + for pattern, klass in _list_patterns(): match = re.match(pattern, url) if match: - return klass(match), module.info - return None, None + return klass(match) + return None # -------------------------------------------------------------------- # internals @@ -59,15 +59,22 @@ _cache = [] _module_iter = iter(modules) def _list_patterns(): - """Yield all available (pattern, module, klass) tuples""" + """Yield all available (pattern, info, class) tuples""" for entry in _cache: yield entry for module_name in _module_iter: module = importlib.import_module("."+module_name, __package__) - klass = getattr(module, module.info["extractor"]) - userpatterns = config.get(("extractor", module_name, "pattern"), default=[]) - for pattern in userpatterns + module.info["pattern"]: - etuple = (pattern, module, klass) - _cache.append(etuple) - yield etuple + for klass in _get_classes(module): + for pattern in klass.pattern: + etuple = (pattern, klass) + _cache.append(etuple) + yield etuple + +def _get_classes(module): + """Return a list of all extractor classes in a module""" + return [ + klass for klass in module.__dict__.values() if ( + hasattr(klass, "pattern") and klass.__module__ == module.__name__ + ) + ] diff --git a/gallery_dl/extractor/batoto.py b/gallery_dl/extractor/batoto.py index 0e1fce53..ec00eef8 100644 --- a/gallery_dl/extractor/batoto.py +++ b/gallery_dl/extractor/batoto.py @@ -10,21 +10,14 @@ from .common import AsynchronousExtractor, Message from .. import text, iso639_1 -import os.path import re -info = { - "category": "batoto", - "extractor": "BatotoExtractor", - "directory": ["{category}", "{manga}", "c{chapter:>03} - {title}"], - "filename": "{manga}_c{chapter:>03}_{page:>03}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)", - ], -} - class BatotoExtractor(AsynchronousExtractor): + category = "batoto" + directory_fmt = ["{category}", "{manga}", "c{chapter:>03} - {title}"] + filename_fmt = "{manga}_c{chapter:>03}_{page:>03}.{extension}" + pattern = [r"(?:https?://)?(?:www\.)?bato\.to/reader#([0-9a-f]+)"] url = "https://bato.to/areader" def __init__(self, match): @@ -68,7 +61,7 @@ class BatotoExtractor(AsynchronousExtractor): manga, pos = extr(page, "document.title = '", " - ", pos) match = re.match(r"(Vol.(\d+) )?Ch.(\d+)([^:]*)(: (.+))?", cinfo) return { - "category": info["category"], + "category": self.category, "token": self.token, "manga": manga, "volume": match.group(2) or "", diff --git a/gallery_dl/extractor/booru.py b/gallery_dl/extractor/booru.py index 200bbf77..b5d7323e 100644 --- a/gallery_dl/extractor/booru.py +++ b/gallery_dl/extractor/booru.py @@ -12,20 +12,20 @@ from .common import Extractor, Message from .. import text import xml.etree.ElementTree as ET import json -import os.path import urllib.parse class BooruExtractor(Extractor): + info = {} + headers = {} + page = "page" api_url = "" + category = "" - def __init__(self, match, info): + def __init__(self): Extractor.__init__(self) - self.info = info - self.tags = text.unquote(match.group(1)) - self.page = "page" - self.params = {"tags": self.tags} - self.headers = {} + self.params = {"limit": 50} + self.setup() def items(self): yield Message.Version, 1 @@ -40,6 +40,9 @@ class BooruExtractor(Extractor): def items_impl(self): pass + def setup(self): + pass + def update_page(self, reset=False): """Update the value of the 'page' parameter""" # Override this method in derived classes if necessary. @@ -51,14 +54,14 @@ class BooruExtractor(Extractor): def get_job_metadata(self): """Collect metadata for extractor-job""" + # Override this method in derived classes return { - "category": self.info["category"], - "tags": self.tags + "category": self.category, } def get_file_metadata(self, data): """Collect metadata for a downloadable file""" - data["category"] = self.info["category"] + data["category"] = self.category return text.nameext_from_url(self.get_file_url(data), data) def get_file_url(self, data): @@ -78,10 +81,10 @@ class JSONBooruExtractor(BooruExtractor): self.request(self.api_url, verify=True, params=self.params, headers=self.headers).text ) - if len(images) == 0: - return for data in images: yield data + if len(images) < self.params["limit"]: + return self.update_page() @@ -93,8 +96,56 @@ class XMLBooruExtractor(BooruExtractor): root = ET.fromstring( self.request(self.api_url, verify=True, params=self.params).text ) - if len(root) == 0: - return for item in root: yield item.attrib + if len(root) < self.params["limit"]: + return self.update_page() + + +class BooruTagExtractor(BooruExtractor): + """Extract images based on search-tags""" + + directory_fmt = ["{category}", "{tags}"] + filename_fmt = "{category}_{id}_{md5}.{extension}" + + def __init__(self, match): + BooruExtractor.__init__(self) + self.tags = text.unquote(match.group(1)) + self.params["tags"] = self.tags + + def get_job_metadata(self): + return { + "category": self.category, + "tags": self.tags, + } + + +class BooruPoolExtractor(BooruExtractor): + """Extract image-pools""" + + directory_fmt = ["{category}", "pool", "{pool}"] + filename_fmt = "{category}_{id}_{md5}.{extension}" + + def __init__(self, match): + BooruExtractor.__init__(self) + self.pool = match.group(1) + self.params["tags"] = "pool:" + self.pool + + def get_job_metadata(self): + return { + "category": self.category, + "pool": self.pool, + } + + +class BooruPostExtractor(BooruExtractor): + """Extract single images""" + + directory_fmt = ["{category}"] + filename_fmt = "{category}_{id}_{md5}.{extension}" + + def __init__(self, match): + BooruExtractor.__init__(self) + self.post = match.group(1) + self.params["tags"] = "id:" + self.post diff --git a/gallery_dl/extractor/chan.py b/gallery_dl/extractor/chan.py index 68217d58..1dab5fd4 100644 --- a/gallery_dl/extractor/chan.py +++ b/gallery_dl/extractor/chan.py @@ -13,13 +13,15 @@ from .. import text class ChanExtractor(Extractor): + directory_fmt = ["{category}", "{board}-{thread}"] + filename_fmt = "{tim}-{filename}{ext}" api_url = "" file_url = "" - def __init__(self, category, board, thread): + def __init__(self, board, thread): Extractor.__init__(self) self.metadata = { - "category": category, + "category": self.category, "board": board, "thread": thread, } diff --git a/gallery_dl/extractor/danbooru.py b/gallery_dl/extractor/danbooru.py index 47765d21..389fd4a2 100644 --- a/gallery_dl/extractor/danbooru.py +++ b/gallery_dl/extractor/danbooru.py @@ -8,20 +8,21 @@ """Extract image-urls from https://danbooru.donmai.us/""" -from .booru import JSONBooruExtractor +from . import booru -info = { - "category": "danbooru", - "extractor": "DanbooruExtractor", - "directory": ["{category}", "{tags}"], - "filename": "{category}_{id}_{md5}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts\?(?:utf8=%E2%9C%93&)?tags=([^&]+).*", - ], -} +class DanbooruExtractor(booru.JSONBooruExtractor): + """Base class for danbooru extractors""" + category = "danbooru" + api_url = "https://danbooru.donmai.us/posts.json" -class DanbooruExtractor(JSONBooruExtractor): +class DanbooruTagExtractor(DanbooruExtractor, booru.BooruTagExtractor): + """Extract images from danbooru based on search-tags""" + pattern = [r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts\?(?:utf8=%E2%9C%93&)?tags=([^&]+)"] - def __init__(self, match): - JSONBooruExtractor.__init__(self, match, info) - self.api_url = "https://danbooru.donmai.us/posts.json" +class DanbooruPoolExtractor(DanbooruExtractor, booru.BooruPoolExtractor): + """Extract image-pools from danbooru""" + pattern = [r"(?:https?://)?(?:www\.)?danbooru.donmai.us/pools/(\d+)"] + +class DanbooruPostExtractor(DanbooruExtractor, booru.BooruPostExtractor): + """Extract single images from danbooru""" + pattern = [r"(?:https?://)?(?:www\.)?danbooru.donmai.us/posts/(\d+)"] diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index d1002f02..4553f8c3 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -10,21 +10,15 @@ from .common import AsynchronousExtractor, Message from .. import text -import os.path import re -info = { - "category": "deviantart", - "extractor": "DeviantArtExtractor", - "directory": ["{category}", "{artist}"], - "filename": "{category}_{index}_{title}.{extension}", - "pattern": [ - r"(?:https?://)?([^\.]+)\.deviantart\.com/gallery/.*", - ], -} - class DeviantArtExtractor(AsynchronousExtractor): + category = "deviantart" + directory_fmt = ["{category}", "{artist}"] + filename_fmt = "{category}_{index}_{title}.{extension}" + pattern = [r"(?:https?://)?([^\.]+)\.deviantart\.com/gallery/.*"] + def __init__(self, match): AsynchronousExtractor.__init__(self) self.session.cookies["agegate_state"] = "1" @@ -57,14 +51,14 @@ class DeviantArtExtractor(AsynchronousExtractor): def get_job_metadata(self): """Collect metadata for extractor-job""" return { - "category": info["category"], + "category": self.category, "artist": self.artist, } def get_image_metadata(self, image): """Collect metadata for an image""" match = self.extract_data(image, 'title', - '(.+) by (.+), ([A-Z][a-z]{2} \d+, \d{4}) in') + r'(.+) by (.+), ([A-Z][a-z]{2} \d+, \d{4}) in') if image.startswith(" ismature"): # adult image url, _ = text.extract(image, 'href="', '"') @@ -76,7 +70,7 @@ class DeviantArtExtractor(AsynchronousExtractor): height, pos = text.extract(page, ' height="', '"', pos) else: # normal image - index = self.extract_data(image, 'href', '[^"]+-(\d+)').group(1) + index = self.extract_data(image, 'href', r'[^"]+-(\d+)').group(1) url, pos = text.extract(image, ' data-super-full-img="', '"', match.end()) if url: width , pos = text.extract(image, ' data-super-full-width="', '"', pos) diff --git a/gallery_dl/extractor/e621.py b/gallery_dl/extractor/e621.py index 7b808f44..c39c86aa 100644 --- a/gallery_dl/extractor/e621.py +++ b/gallery_dl/extractor/e621.py @@ -8,21 +8,24 @@ """Extract image-urls from https://e621.net/""" -from .booru import JSONBooruExtractor +from . import booru -info = { - "category": "e621", - "extractor": "E621Extractor", - "directory": ["{category}", "{tags}"], - "filename": "{category}_{id}_{md5}.{extension}", - "pattern": [ +class E621Extractor(booru.JSONBooruExtractor): + """Base class for e621 extractors""" + category = "e621" + api_url = "https://e621.net/post/index.json" + +class E621TagExtractor(E621Extractor, booru.BooruTagExtractor): + """Extract images from e621 based on search-tags""" + pattern = [ r"(?:https?://)?(?:www\.)?e621\.net/post/index/\d+/([^?]+)", - r"(?:https?://)?(?:www\.)?e621\.net/post\?tags=([^&]+).*" - ], -} + r"(?:https?://)?(?:www\.)?e621\.net/post\?tags=([^&]+)", + ] -class E621Extractor(JSONBooruExtractor): +class E621PoolExtractor(E621Extractor, booru.BooruPoolExtractor): + """Extract image-pools from e621""" + pattern = [r"(?:https?://)?(?:www\.)?e621\.net/pool/show/(\d+)"] - def __init__(self, match): - JSONBooruExtractor.__init__(self, match, info) - self.api_url = "https://e621.net/post/index.json" +class E621PostExtractor(E621Extractor, booru.BooruPostExtractor): + """Extract single images from e621""" + pattern = [r"(?:https?://)?(?:www\.)?e621\.net/post/show/(\d+)"] diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 875ab1b4..b2d697d9 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -13,18 +13,12 @@ from .. import config, text, iso639_1 import time import random -info = { - "category": "exhentai", - "extractor": "ExhentaiExtractor", - "directory": ["{category}", "{gallery-id}"], - "filename": "{gallery-id}_{num:>04}_{imgkey}_{name}.{extension}", - "pattern": [ - r"(?:https?://)?(g\.e-|ex)hentai\.org/g/(\d+)/([\da-f]{10})", - ], -} - class ExhentaiExtractor(Extractor): + category = "exhentai" + directory_fmt = ["{category}", "{gallery-id}"] + filename_fmt = "{gallery-id}_{num:>04}_{imgkey}_{name}.{extension}" + pattern = [r"(?:https?://)?(g\.e-|ex)hentai\.org/g/(\d+)/([\da-f]{10})"] api_url = "http://exhentai.org/api.php" def __init__(self, match): @@ -70,7 +64,7 @@ class ExhentaiExtractor(Extractor): def get_job_metadata(self, page): """Collect metadata for extractor-job""" data = { - "category" : info["category"], + "category" : self.category, "gallery-id" : self.gid, "gallery-token": self.token, } diff --git a/gallery_dl/extractor/gelbooru.py b/gallery_dl/extractor/gelbooru.py index 11a67f0c..bcf6acee 100644 --- a/gallery_dl/extractor/gelbooru.py +++ b/gallery_dl/extractor/gelbooru.py @@ -8,27 +8,19 @@ """Extract image-urls from http://gelbooru.com/""" -from .booru import XMLBooruExtractor +from . import booru from .. import config -info = { - "category": "gelbooru", - "extractor": "GelbooruExtractor", - "directory": ["{category}", "{tags}"], - "filename": "{category}_{id}_{md5}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?page=post&s=list&tags=([^&]+).*", - ], -} - -class GelbooruExtractor(XMLBooruExtractor): - - def __init__(self, match): - XMLBooruExtractor.__init__(self, match, info) - self.api_url = "http://gelbooru.com/" - self.params = {"page":"dapi", "s":"post", "q":"index", "tags":self.tags} +class GelbooruExtractor(booru.XMLBooruExtractor): + """Base class for gelbooru extractors""" + + category = "gelbooru" + api_url = "http://gelbooru.com/" + + def setup(self): + self.params.update({"page":"dapi", "s":"post", "q":"index"}) self.session.cookies.update( - config.get(("extractor", info["category"], "cookies")) + config.get(("extractor", self.category, "cookies")) ) def update_page(self, reset=False): @@ -36,3 +28,16 @@ class GelbooruExtractor(XMLBooruExtractor): self.params["pid"] += 1 else: self.params["pid"] = 0 + +class GelbooruTagExtractor(GelbooruExtractor, booru.BooruTagExtractor): + """Extract images from gelbooru based on search-tags""" + pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?page=post&s=list&tags=([^&]+)"] + +# TODO: find out how to access pools via gelbooru-api +# class GelbooruPoolExtractor(GelbooruExtractor, booru.BooruPoolExtractor): + # """Extract image-pools from gelbooru""" + # pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?page=pool&s=show&id=(\d+)"] + +class GelbooruPostExtractor(GelbooruExtractor, booru.BooruPostExtractor): + """Extract single images from gelbooru""" + pattern = [r"(?:https?://)?(?:www\.)?gelbooru\.com/(?:index\.php)?\?page=post&s=view&id=(\d+)"] diff --git a/gallery_dl/extractor/hbrowse.py b/gallery_dl/extractor/hbrowse.py index 59aa576e..67da9011 100644 --- a/gallery_dl/extractor/hbrowse.py +++ b/gallery_dl/extractor/hbrowse.py @@ -10,20 +10,13 @@ from .common import Extractor, Message from .. import text -import os.path - -info = { - "category": "hbrowse", - "extractor": "HbrowseExtractor", - "directory": ["{category}", "{gallery-id} {title}"], - "filename": "{category}_{gallery-id}_{num:>03}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?hbrowse\.com/(\d+)/(c\d+)", - ], -} class HbrowseExtractor(Extractor): + category = "hbrowse" + directory_fmt = ["{category}", "{gallery-id} {title}"] + filename_fmt = "{category}_{gallery-id}_{num:>03}.{extension}" + pattern = [r"(?:https?://)?(?:www\.)?hbrowse\.com/(\d+)/(c\d+)"] url_base = "http://www.hbrowse.com/thumbnails/" def __init__(self, match): @@ -43,7 +36,7 @@ class HbrowseExtractor(Extractor): def get_job_metadata(self, page): """Collect metadata for extractor-job""" data = { - "category": info["category"], + "category": self.category, 'gallery-id': self.gid, 'chapter': int(self.chapter[1:]), } diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index d3fcb362..5c916173 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -12,19 +12,15 @@ from .common import Extractor, Message from .. import text import os.path -info = { - "category": "hentaifoundry", - "extractor": "HentaiFoundryExtractor", - "directory": ["{category}", "{artist}"], - "filename": "{category}_{index}_{title}.{extension}", - "pattern": [ - r"(?:https?://)?(?:www\.)?hentai-foundry\.com/pictures/user/([^/]+)", - r"(?:https?://)?(?:www\.)?hentai-foundry\.com/user/([^/]+)/profile", - ], -} - class HentaiFoundryExtractor(Extractor): + category = "hentaifoundry" + directory_fmt = ["{category}", "{artist}"] + filename_fmt = "{category}_{index}_{title}.{extension}" + pattern = [ + r"(?:https?://)?(?:www\.)?hentai-foundry\.com/pictures/user/([^/]+)", + r"(?:https?://)?(?:www\.)?hentai-foundry\.com/user/([^/]+)/profile", + ] url_base = "http://www.hentai-foundry.com/pictures/user/" def __init__(self, match): @@ -60,7 +56,7 @@ class HentaiFoundryExtractor(Extractor): token, pos = text.extract(page, 'hidden" value="', '"') count, pos = text.extract(page, 'class="active" >Pictures (', ')', pos) return { - "category": info["category"], + "category": self.category, "artist": self.artist, "count": count, }, token diff --git a/gallery_dl/extractor/hitomi.py b/gallery_dl/extractor/hitomi.py index c7dc49c8..dd341538 100644 --- a/gallery_dl/extractor/hitomi.py +++ b/gallery_dl/extractor/hitomi.py @@ -10,21 +10,15 @@ from .common import Extractor, Message from .. import text, iso639_1 -import os.path import string -info = { - "category": "hitomi", - "extractor": "HitomiExtractor", - "directory": ["{category}", "{gallery-id} {title}"], - "filename": "{category}_{gallery-id}_{num:>03}_{name}.{extension}", - "pattern": [ - r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)\.html", - ], -} - class HitomiExtractor(Extractor): + category = "hitomi" + directory_fmt = ["{category}", "{gallery-id} {title}"] + filename_fmt = "{category}_{gallery-id}_{num:>03}_{name}.{extension}" + pattern = [r"(?:https?://)?hitomi\.la/(?:galleries|reader)/(\d+)\.html"] + def __init__(self, match): Extractor.__init__(self) self.gid = match.group(1) @@ -62,7 +56,7 @@ class HitomiExtractor(Extractor): series, pos = text.extract(page, '.html">', '', pos) lang = lang.capitalize() return { - "category": info["category"], + "category": self.category, "gallery-id": self.gid, "title": title, "artist": string.capwords(artist), diff --git a/gallery_dl/extractor/imagebam.py b/gallery_dl/extractor/imagebam.py index c39f74fc..3bd65dbd 100644 --- a/gallery_dl/extractor/imagebam.py +++ b/gallery_dl/extractor/imagebam.py @@ -10,20 +10,13 @@ from .common import AsynchronousExtractor, Message from .. import text -import os.path - -info = { - "category": "imagebam", - "extractor": "ImagebamExtractor", - "directory": ["{category}", "{title} - {gallery-key}"], - "filename": "{num:>03}-{filename}", - "pattern": [ - r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([^/]+).*", - ], -} class ImagebamExtractor(AsynchronousExtractor): + category = "imagebam" + directory_fmt = ["{category}", "{title} - {gallery-key}"] + filename_fmt = "{num:>03}-{filename}" + pattern = [r"(?:https?://)?(?:www\.)?imagebam\.com/gallery/([^/]+).*"] url_base = "http://www.imagebam.com" def __init__(self, match): @@ -47,7 +40,7 @@ class ImagebamExtractor(AsynchronousExtractor): response.encoding = "utf-8" page = response.text data = { - "category": info["category"], + "category": self.category, "gallery-key": self.gkey, } data, _ = text.extract_all(page, ( diff --git a/gallery_dl/extractor/imgbox.py b/gallery_dl/extractor/imgbox.py index b3a01dd7..edd4db58 100644 --- a/gallery_dl/extractor/imgbox.py +++ b/gallery_dl/extractor/imgbox.py @@ -12,18 +12,12 @@ from .common import AsynchronousExtractor, Message from .. import text import re -info = { - "category": "imgbox", - "extractor": "ImgboxExtractor", - "directory": ["{category}", "{title} - {gallery-key}"], - "filename": "{num:>03}-{name}", - "pattern": [ - r"(?:https?://)?(?:www\.)?imgbox\.com/g/(.+)", - ], -} - class ImgboxExtractor(AsynchronousExtractor): + category = "imgbox" + directory_fmt = ["{category}", "{title} - {gallery-key}"] + filename_fmt = "{num:>03}-{name}" + pattern = [r"(?:https?://)?(?:www\.)?imgbox\.com/g/(.+)"] url_base = "http://imgbox.com" def __init__(self, match): @@ -44,7 +38,7 @@ class ImgboxExtractor(AsynchronousExtractor): """Collect metadata for extractor-job""" match = re.search(r"