diff --git a/docs/configuration.rst b/docs/configuration.rst index ed64ff02..bfdd32dc 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -1955,6 +1955,72 @@ Description Download video files. +extractor.ytdl.enabled +---------------------- +Type + ``bool`` +Default + ``false`` +Description + Match **all** URLs, even ones without a ``ytdl:`` prefix. + + +extractor.ytdl.format +--------------------- +Type + ``string`` +Default + youtube-dl's default, currently ``"bestvideo+bestaudio/best"`` +Description + Video `format selection + `__ + directly passed to youtube-dl. + + +extractor.ytdl.logging +---------------------- +Type + ``bool`` +Default + ``true`` +Description + Route youtube-dl's output through gallery-dl's logging system. + Otherwise youtube-dl will write its output directly to stdout/stderr. + + Note: Set ``quiet`` and ``no_warnings`` in + `extractor.ytdl.raw-options`_ to ``true`` to suppress all output. + + +extractor.ytdl.module +--------------------- +Type + ``string`` +Default + ``"youtube_dl"`` +Description + Name of the youtube-dl Python module to import. + + +extractor.ytdl.raw-options +-------------------------- +Type + ``object`` +Example + .. code:: json + + { + "quiet": true, + "writesubtitles": true, + "merge_output_format": "mkv" + } + +Description + Additional options passed directly to the ``YoutubeDL`` constructor. + + All available options can be found in `youtube-dl's docstrings + `__. + + extractor.[booru].tags ---------------------- Type @@ -1967,6 +2033,7 @@ Description Note: This requires 1 additional HTTP request for each post. + extractor.[booru].notes ----------------------- Type @@ -1978,6 +2045,7 @@ Description Note: This requires 1 additional HTTP request for each post. + extractor.[manga-extractor].chapter-reverse ------------------------------------------- Type diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 35a296e0..c97ecdf7 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -287,6 +287,14 @@ "retweets": true, "videos": true }, + "ytdl": + { + "enabled": false, + "format": null, + "logging": true, + "module": "youtube_dl", + "raw-options": null + }, "booru": { "tags": false, diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py index e1161885..704d8702 100644 --- a/gallery_dl/downloader/ytdl.py +++ b/gallery_dl/downloader/ytdl.py @@ -41,7 +41,10 @@ class YoutubeDLDownloader(DownloaderBase): "max_filesize": text.parse_bytes( self.config("filesize-max"), None), } - options.update(self.config("raw-options") or {}) + + raw_options = self.config("raw-options") + if raw_options: + options.update(raw_options) if self.config("logging", True): options["logger"] = self.log @@ -59,19 +62,22 @@ class YoutubeDLDownloader(DownloaderBase): for cookie in self.session.cookies: set_cookie(cookie) - try: - info_dict = self.ytdl.extract_info(url[5:], download=False) - except Exception: - return False + kwdict = pathfmt.kwdict + info_dict = kwdict.pop("_ytdl_info_dict", None) + if not info_dict: + try: + info_dict = self.ytdl.extract_info(url[5:], download=False) + except Exception: + return False if "entries" in info_dict: - index = pathfmt.kwdict.get("_ytdl_index") + index = kwdict.get("_ytdl_index") if index is None: return self._download_playlist(pathfmt, info_dict) else: info_dict = info_dict["entries"][index] - extra = pathfmt.kwdict.get("_ytdl_extra") + extra = kwdict.get("_ytdl_extra") if extra: info_dict.update(extra) @@ -121,6 +127,7 @@ class YoutubeDLDownloader(DownloaderBase): def compatible_formats(formats): + """Returns True if 'formats' are compatible for merge""" video_ext = formats[0].get("ext") audio_ext = formats[1].get("ext") diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index d74d06d2..42365884 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -143,6 +143,7 @@ modules = [ "recursive", "oauth", "test", + "ytdl", ] diff --git a/gallery_dl/extractor/ytdl.py b/gallery_dl/extractor/ytdl.py new file mode 100644 index 00000000..58db3ccc --- /dev/null +++ b/gallery_dl/extractor/ytdl.py @@ -0,0 +1,101 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for sites supported by youtube-dl""" + +from .common import Extractor, Message +from .. import config + + +class YoutubeDLExtractor(Extractor): + """Generic extractor for youtube-dl supported URLs""" + category = "ytdl" + directory_fmt = ("{category}", "{subcategory}") + filename_fmt = "{title}-{id}.{extension}" + archive_fmt = "{extractor_key} {id}" + ytdl_module = None + pattern = r"ytdl:(.*)" + test = ("ytdl:https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9",) + + def __init__(self, match): + # import youtube_dl module + module = self.ytdl_module + if not module: + name = config.get(("extractor", "ytdl"), "module") or "youtube_dl" + module = YoutubeDLExtractor.ytdl_module = __import__(name) + + # find suitable youtube_dl extractor + self.ytdl_url = url = match.group(1) + for ie in module.extractor.gen_extractor_classes(): + if ie.suitable(url): + self.ytdl_ie = ie + break + + # set subcategory to youtube_dl extractor's key + self.subcategory = ie.ie_key() + Extractor.__init__(self, match) + + def items(self): + # construct YoutubeDL object + options = { + "format": self.config("format"), + "socket_timeout": self._timeout, + "nocheckcertificate": not self._verify, + "proxy": self.session.proxies.get("http"), + } + + raw_options = self.config("raw-options") + if raw_options: + options.update(raw_options) + if self.config("logging", True): + options["logger"] = self.log + options["extract_flat"] = "in_playlist" + + ytdl = self.ytdl_module.YoutubeDL(options) + + # extract youtube_dl info_dict + info_dict = ytdl._YoutubeDL__extract_info( + self.ytdl_url, + ytdl.get_info_extractor(self.ytdl_ie.ie_key()), + False, {}, True) + + if "entries" in info_dict: + results = self._process_entries(ytdl, info_dict["entries"]) + else: + results = (info_dict,) + + # yield results + for info_dict in results: + info_dict["extension"] = None + info_dict["_ytdl_info_dict"] = info_dict + + url = "ytdl:" + (info_dict.get("url") or + info_dict.get("webpage_url") or + self.ytdl_url) + + yield Message.Directory, info_dict + yield Message.Url, url, info_dict + + def _process_entries(self, ytdl, entries): + for entry in entries: + if entry.get("_type") in ("url", "url_transparent"): + info_dict = ytdl.extract_info( + entry["url"], False, + ie_key=entry.get("ie_key")) + if "entries" in info_dict: + yield from self._process_entries( + ytdl, info_dict["entries"]) + else: + yield info_dict + else: + yield entry + + +if config.get(("extractor", "ytdl"), "enabled"): + # make 'ytdl:' prefix optional + YoutubeDLExtractor.pattern = r"(?:ytdl:)?(.*)" diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 0838e16b..46d51550 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.18.2-dev" +__version__ = "1.19.0-dev" diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 61da8c51..55a5604f 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -254,6 +254,7 @@ IGNORE_LIST = ( "oauth", "recursive", "test", + "ytdl", ) diff --git a/test/test_extractor.py b/test/test_extractor.py index f04e1c71..de43ff70 100644 --- a/test/test_extractor.py +++ b/test/test_extractor.py @@ -147,7 +147,7 @@ class TestExtractorModule(unittest.TestCase): return c.capitalize() for extr in extractor.extractors(): - if extr.category not in ("", "oauth"): + if extr.category not in ("", "oauth", "ytdl"): expected = "{}{}Extractor".format( capitalize(extr.category), capitalize(extr.subcategory),