diff --git a/gallery_dl/download.py b/gallery_dl/download.py index f1ba96d6..1292cbe7 100644 --- a/gallery_dl/download.py +++ b/gallery_dl/download.py @@ -8,18 +8,15 @@ import os import sys -import re import importlib - +from . import config, extractor from .extractor.common import Message -from . import config class DownloadManager(): def __init__(self, opts): self.opts = opts self.modules = {} - self.extractors = ExtractorFinder() def add(self, url): job = DownloadJob(self, url) @@ -45,7 +42,7 @@ class DownloadJob(): def __init__(self, mngr, url): self.mngr = mngr - self.extractor, self.info = mngr.extractors.get_for_url(url) + self.extractor, self.info = extractor.find(url) if self.extractor is None: return self.directory = mngr.get_base_directory() @@ -136,62 +133,3 @@ class DownloadJob(): if tries == 0: print("\r", end="") print("\r\033[1;32m", path, "\033[0m", sep="") - - -class ExtractorFinder(): - - def get_for_url(self, url): - """Get an extractor-instance suitable for 'url'""" - name, match = self.find_pattern_match(url) - if match: - module = importlib.import_module(".extractor." + name, __package__) - klass = getattr(module, module.info["extractor"]) - return klass(match), module.info - else: - print("no suitable extractor found") - return None, None - - def find_pattern_match(self, url): - """Find a pattern that matches 'url' and return the (category,match) tuple""" - for category in config.get(("extractor",)): - patterns = config.get(("extractor", category, "pattern"), default=[]) - for pattern in patterns: - match = re.match(pattern, url) - if match: - return category, match - for category, info in self.extractor_metadata(): - for pattern in info["pattern"]: - match = re.match(pattern, url) - if match: - return category, match - return None, None - - def extractor_metadata(self): - """Yield all extractor-name, -metadata tuples""" - path = os.path.join(os.path.dirname(__file__), "extractor") - for name in os.listdir(path): - extractor_path = os.path.join(path, name) - info = self.get_info_dict(extractor_path) - if info is not None: - yield os.path.splitext(name)[0], info - - @staticmethod - def get_info_dict(extractor_path): - """Get info-/metadata-dictionary for an extractor""" - try: - with open(extractor_path) as file: - for _ in range(30): - line = next(file) - if line.startswith("info ="): - break - else: - return None - - info = [line[6:]] - for line in file: - info.append(line) - if line.startswith("}"): - break - except (StopIteration, OSError): - return None - return eval("".join(info)) diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 8b137891..7c7426c4 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -1 +1,57 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +import re +import importlib +from .. import config + +modules = [ + "pixiv", + # "exhentai", + "gelbooru", + "3dbooru", + "4chan", + "8chan", + "batoto", + "danbooru", + "e621", + "imagebam", + "imgbox", + "imgchili", + "mangareader", + "nijie", + "redhawkscans", + "yandere", +] + +def find(url): + """Find extractor suitable for handling the given url""" + for pattern, module, klass in _list_patterns(): + match = re.match(pattern, url) + if match: + return klass(match), module.info + +# -------------------------------------------------------------------- +# internals + +_cache = [] +_module_iter = iter(modules) + +def _list_patterns(): + """Yield all available (pattern, module, klass) tuples""" + for entry in _cache: + yield entry + + for module_name in _module_iter: + module = importlib.import_module("."+module_name, __package__) + klass = getattr(module, module.info["extractor"]) + userpatterns = config.get(("extractor", module_name, "pattern"), default=[]) + for pattern in userpatterns + module.info["pattern"]: + etuple = (pattern, module, klass) + _cache.append(etuple) + yield etuple