From 47cf05c4abf4e950d0896a220ff8558ea5baf79b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 10 Mar 2022 23:32:16 +0100 Subject: [PATCH] refactor proxy handling code (#2357) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - allow gallery-dl proxy settings to overwrite environment proxies - allow specifying different proxies for data extraction and download - add 'downloader.proxy' option - '-o extractor.proxy=–PROXY_URL -o downloader.proxy=null' now has the same effect as youtube-dl's '--geo-verification-proxy' --- docs/configuration.rst | 12 ++++++++++++ gallery_dl/downloader/common.py | 8 +++++++- gallery_dl/downloader/http.py | 3 ++- gallery_dl/downloader/ytdl.py | 1 + gallery_dl/extractor/common.py | 22 +++++----------------- gallery_dl/extractor/ytdl.py | 5 ++++- gallery_dl/util.py | 20 ++++++++++++++++++++ gallery_dl/ytdl.py | 4 +--- 8 files changed, 52 insertions(+), 23 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index 234d1433..d4fadf7e 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -477,6 +477,7 @@ Description | Can be either a simple ``string`` with just the local IP address | or a ``list`` with IP and explicit port number as elements. + extractor.*.user-agent ---------------------- Type @@ -2619,6 +2620,17 @@ Description Certificate validation during file downloads. +downloader.*.proxy +------------------ +Type + ``string`` or ``object`` +Default + `extractor.*.proxy`_ +Description + | Proxy server used for file downloads. + | Disable the use of a proxy by explicitly setting this option to ``null``. + + downloader.http.adjust-extensions --------------------------------- Type diff --git a/gallery_dl/downloader/common.py b/gallery_dl/downloader/common.py index d8580759..1168d83d 100644 --- a/gallery_dl/downloader/common.py +++ b/gallery_dl/downloader/common.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2020 Mike Fährmann +# Copyright 2014-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -27,6 +27,12 @@ class DownloaderBase(): self.partdir = util.expand_path(self.partdir) os.makedirs(self.partdir, exist_ok=True) + proxies = self.config("proxy", util.SENTINEL) + if proxies is util.SENTINEL: + self.proxies = job.extractor._proxies + else: + self.proxies = util.build_proxy_map(proxies, self.log) + def config(self, key, default=None): """Interpolate downloader config value for 'key'""" return config.interpolate(("downloader", self.scheme), key, default) diff --git a/gallery_dl/downloader/http.py b/gallery_dl/downloader/http.py index 91ce731a..b878f5f4 100644 --- a/gallery_dl/downloader/http.py +++ b/gallery_dl/downloader/http.py @@ -121,7 +121,8 @@ class HttpDownloader(DownloaderBase): try: response = self.session.request( "GET", url, stream=True, headers=headers, - timeout=self.timeout, verify=self.verify) + timeout=self.timeout, verify=self.verify, + proxies=self.proxies) except (ConnectionError, Timeout) as exc: msg = str(exc) continue diff --git a/gallery_dl/downloader/ytdl.py b/gallery_dl/downloader/ytdl.py index 462bbf8f..2badccf4 100644 --- a/gallery_dl/downloader/ytdl.py +++ b/gallery_dl/downloader/ytdl.py @@ -25,6 +25,7 @@ class YoutubeDLDownloader(DownloaderBase): "retries": retries+1 if retries >= 0 else float("inf"), "socket_timeout": self.config("timeout", extractor._timeout), "nocheckcertificate": not self.config("verify", extractor._verify), + "proxy": self.proxies.get("http") if self.proxies else None, } self.ytdl_instance = None diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 026034cb..e3559f9b 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -55,6 +55,7 @@ class Extractor(): self._retries = self.config("retries", 4) self._timeout = self.config("timeout", 30) self._verify = self.config("verify", True) + self._proxies = util.build_proxy_map(self.config("proxy"), self.log) self._interval = util.build_duration_func( self.config("sleep-request", self.request_interval), self.request_interval_min, @@ -65,7 +66,6 @@ class Extractor(): self._init_session() self._init_cookies() - self._init_proxies() @classmethod def from_url(cls, url): @@ -104,10 +104,12 @@ class Extractor(): def request(self, url, *, method="GET", session=None, retries=None, encoding=None, fatal=True, notfound=None, **kwargs): - if retries is None: - retries = self._retries if session is None: session = self.session + if retries is None: + retries = self._retries + if "proxies" not in kwargs: + kwargs["proxies"] = self._proxies if "timeout" not in kwargs: kwargs["timeout"] = self._timeout if "verify" not in kwargs: @@ -289,20 +291,6 @@ class Extractor(): session.mount("https://", adapter) session.mount("http://", adapter) - def _init_proxies(self): - """Update the session's proxy map""" - proxies = self.config("proxy") - if proxies: - if isinstance(proxies, str): - proxies = {"http": proxies, "https": proxies} - if isinstance(proxies, dict): - for scheme, proxy in proxies.items(): - if "://" not in proxy: - proxies[scheme] = "http://" + proxy.lstrip("/") - self.session.proxies = proxies - else: - self.log.warning("invalid proxy specifier: %s", proxies) - def _init_cookies(self): """Populate the session's cookiejar""" self._cookiefile = None diff --git a/gallery_dl/extractor/ytdl.py b/gallery_dl/extractor/ytdl.py index 8f3ef9a4..b3a16521 100644 --- a/gallery_dl/extractor/ytdl.py +++ b/gallery_dl/extractor/ytdl.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Mike Fährmann +# Copyright 2021-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -64,6 +64,9 @@ class YoutubeDLExtractor(Extractor): "nocheckcertificate" : not self._verify, } + if self._proxies: + user_opts["proxy"] = self._proxies.get("http") + username, password = self._get_auth_info() if username: user_opts["username"], user_opts["password"] = username, password diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 0cea2cb0..92d16200 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -522,6 +522,26 @@ def build_extractor_filter(categories, negate=True, special=None): return lambda extr: any(t(extr) for t in tests) +def build_proxy_map(proxies, log=None): + """Generate a proxy map""" + if not proxies: + return None + + if isinstance(proxies, str): + if "://" not in proxies: + proxies = "http://" + proxies.lstrip("/") + return {"http": proxies, "https": proxies} + + if isinstance(proxies, dict): + for scheme, proxy in proxies.items(): + if "://" not in proxy: + proxies[scheme] = "http://" + proxy.lstrip("/") + return proxies + + if log: + log.warning("invalid proxy specifier: %s", proxies) + + def build_predicate(predicates): if not predicates: return lambda url, kwdict: True diff --git a/gallery_dl/ytdl.py b/gallery_dl/ytdl.py index e6953ebd..45b98262 100644 --- a/gallery_dl/ytdl.py +++ b/gallery_dl/ytdl.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2021 Mike Fährmann +# Copyright 2021-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -46,8 +46,6 @@ def construct_YoutubeDL(module, obj, user_opts, system_opts=None): if opts.get("format") is None: opts["format"] = config("format") - if opts.get("proxy") is None: - opts["proxy"] = obj.session.proxies.get("http") if opts.get("nopart") is None: opts["nopart"] = not config("part", True) if opts.get("updatetime") is None: