From 0abbee37101480d85448df574a8d36dde37209fc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 8 Apr 2015 01:51:48 +0200 Subject: [PATCH] update download-infrastructure --- gallery_dl/download.py | 125 ++++++++++++++++++++++++++++++++--------- 1 file changed, 97 insertions(+), 28 deletions(-) diff --git a/gallery_dl/download.py b/gallery_dl/download.py index d86e0610..8a6a414c 100644 --- a/gallery_dl/download.py +++ b/gallery_dl/download.py @@ -6,14 +6,13 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -""" """ - import os import sys import re import sqlite3 import importlib +from extractor.common import Message class DownloadManager(): @@ -21,35 +20,94 @@ class DownloadManager(): self.opts = opts self.conf = conf self.downloaders = {} + self.extractors = ExtractorFinder(conf) + + def add(self, url): + job = DownloadJob(self, url) + job.run() - def add(self, extr): + def get_downloader_module(self, scheme): + """Return a downloader module suitable for 'scheme'""" + module = self.downloaders.get(scheme) + if module is None: + module = importlib.import_module(".downloader."+scheme, __package__) + self.downloaders[scheme] = module + return module + + def get_base_directory(self): if self.opts.dest: - dest = self.opts.dest - elif extr.category in self.conf: - dest = self.conf[extr.category].get("destination", "/tmp/") + return self.opts.dest else: - dest = self.conf["general"].get("destination", "/tmp/") - dest = os.path.join(dest, extr.category, extr.directory) - os.makedirs(dest, exist_ok=True) - - for url, filename in extr: - path = os.path.join(dest, filename) - if os.path.exists(path): - self.print_skip(path) - continue - dl = self.get_downloader(extr, url) - self.print_start(path) - tries = dl.download(url, path) - self.print_success(path, tries) - - def get_downloader(self, extr, url): - end = url.find("://") - proto = url[:end] if end != -1 else "http" - if proto not in self.downloaders: - # import downloader - module = importlib.import_module("."+proto, __package__) - self.downloaders[proto] = module.Downloader - return self.downloaders[proto](extr) + return self.conf["general"].get("destination", "/tmp/") + + +class DownloadJob(): + + def __init__(self, mngr, url): + self.mngr = mngr + self.extractor, self.info = mngr.extractors.get_for_url(url) + self.directory = mngr.get_base_directory() + self.downloaders = {} + + def run(self): + """Execute/Run the downlaod job""" + if self.extractor is None: + return # TODO: error msg + + for msg in self.extractor: + print(msg) + print(type(msg)) + if msg[0] == Message.Url: + self.download(msg) + + elif msg[0] == Message.Directory: + self.set_directory(msg) + + elif msg[0] == Message.Version: + if msg[1] != 1: + raise "unsupported message-version ({}, {})".format( + self.info.category, msg[1] + ) + # TODO: support for multiple message versions + + def download(self, msg): + """Download the resource specified in 'msg'""" + _, url, metadata = msg + filename = self.info["filename"].format(**metadata) + path = os.path.join(self.directory, filename) + if os.path.exists(path): + self.print_skip(path) + return + dl = self.get_downloader(url) + self.print_start(path) + tries = dl.download(url, path) + self.print_success(path, tries) + + def set_directory(self, msg): + """Set and create the target directory for downloads""" + path = [] + for segment in self.info["directory"]: + path.append(segment.format(**msg[1])) + self.directory = os.path.join( + self.mngr.get_base_directory(), + *path + ) + os.makedirs(self.directory, exist_ok=True) + + def get_downloader(self, url): + """Return, and possibly construct, a downloader suitable for 'url'""" + pos = url.find(":") + scheme = url[:pos] if pos != -1 else "http" + if scheme == "https": + scheme = "http" + + downloader = self.downloaders.get(scheme) + if downloader is None: + module = self.mngr.get_downloader_module(scheme) + downloader = module.Downloader(self.extractor) + self.downloaders[scheme] = downloader + + return downloader @staticmethod def print_start(path): @@ -78,6 +136,17 @@ class ExtractorFinder(): self.load_from_database(conn) self.load_from_config(config) + def get_for_url(self, url): + # TODO: implement general case + module = importlib.import_module(".extractor.8chan", __package__) + for pattern in module.info["pattern"]: + match = re.match(pattern, url) + if match: + klass = getattr(module, module.info["extractor"]) + return klass(match, self.config), module.info + print("pattern mismatch") + sys.exit() + def match(self, url): for category, regex in self.match_list: match = regex.match(url)