You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
gallery-dl/gallery_dl/extractor/ytdl.py

137 lines
5.0 KiB

# -*- coding: utf-8 -*-
# Copyright 2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for sites supported by youtube-dl"""
from .common import Extractor, Message
from .. import text, config, exception
class YoutubeDLExtractor(Extractor):
"""Generic extractor for youtube-dl supported URLs"""
category = "ytdl"
directory_fmt = ("{category}", "{subcategory}")
filename_fmt = "{title}-{id}.{extension}"
archive_fmt = "{extractor_key} {id}"
pattern = r"ytdl:(.*)"
test = ("ytdl:https://www.youtube.com/watch?v=BaW_jenozKc&t=1s&end=9",)
def __init__(self, match):
# import main youtube_dl module
module_name = self.ytdl_module_name = config.get(
("extractor", "ytdl"), "module") or "youtube_dl"
module = __import__(module_name)
# find suitable youtube_dl extractor
self.ytdl_url = url = match.group(1)
generic = config.interpolate(("extractor", "ytdl"), "generic", True)
if generic == "force":
self.ytdl_ie_key = "Generic"
self.force_generic_extractor = True
else:
for ie in module.extractor.gen_extractor_classes():
if ie.suitable(url):
self.ytdl_ie_key = ie.ie_key()
break
if not generic and self.ytdl_ie_key == "Generic":
raise exception.NoExtractorError()
self.force_generic_extractor = False
# set subcategory to youtube_dl extractor's key
self.subcategory = self.ytdl_ie_key
Extractor.__init__(self, match)
def items(self):
# import subcategory module
ytdl_module = __import__(
config.get(("extractor", "ytdl", self.subcategory), "module") or
self.ytdl_module_name)
self.log.debug("Using %s", ytdl_module)
# construct YoutubeDL object
options = {
"format" : self.config("format"),
"retries" : self._retries,
"socket_timeout" : self._timeout,
"nocheckcertificate" : not self._verify,
"proxy" : self.session.proxies.get("http"),
"force_generic_extractor": self.force_generic_extractor,
"nopart" : not self.config("part", True),
"updatetime" : self.config("mtime", True),
"ratelimit" : text.parse_bytes(
self.config("rate"), None),
"min_filesize" : text.parse_bytes(
self.config("filesize-min"), None),
"max_filesize" : text.parse_bytes(
self.config("filesize-max"), None),
}
raw_options = self.config("raw-options")
if raw_options:
options.update(raw_options)
if self.config("logging", True):
options["logger"] = self.log
options["extract_flat"] = "in_playlist"
username, password = self._get_auth_info()
if username:
options["username"], options["password"] = username, password
del username, password
ytdl = ytdl_module.YoutubeDL(options)
# transfer cookies to ytdl
cookies = self.session.cookies
if cookies:
set_cookie = self.ytdl.cookiejar.set_cookie
for cookie in self.session.cookies:
set_cookie(cookie)
# extract youtube_dl info_dict
info_dict = ytdl._YoutubeDL__extract_info(
self.ytdl_url,
ytdl.get_info_extractor(self.ytdl_ie_key),
False, {}, True)
if "entries" in info_dict:
results = self._process_entries(ytdl, info_dict["entries"])
else:
results = (info_dict,)
# yield results
for info_dict in results:
info_dict["extension"] = None
info_dict["_ytdl_info_dict"] = info_dict
info_dict["_ytdl_instance"] = ytdl
url = "ytdl:" + (info_dict.get("url") or
info_dict.get("webpage_url") or
self.ytdl_url)
yield Message.Directory, info_dict
yield Message.Url, url, info_dict
def _process_entries(self, ytdl, entries):
for entry in entries:
if entry.get("_type") in ("url", "url_transparent"):
info_dict = ytdl.extract_info(
entry["url"], False,
ie_key=entry.get("ie_key"))
if "entries" in info_dict:
yield from self._process_entries(
ytdl, info_dict["entries"])
else:
yield info_dict
else:
yield entry
if config.get(("extractor", "ytdl"), "enabled"):
# make 'ytdl:' prefix optional
YoutubeDLExtractor.pattern = r"(?:ytdl:)?(.*)"