refactor proxy handling code (#2357)

- allow gallery-dl proxy settings to overwrite environment proxies
- allow specifying different proxies for data extraction and download
  - add 'downloader.proxy' option
  - '-o extractor.proxy=–PROXY_URL -o downloader.proxy=null'
    now has the same effect as youtube-dl's '--geo-verification-proxy'
pull/2396/head
Mike Fährmann 3 years ago
parent d50a1ec2cc
commit 47cf05c4ab
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -477,6 +477,7 @@ Description
| Can be either a simple ``string`` with just the local IP address
| or a ``list`` with IP and explicit port number as elements.
extractor.*.user-agent
----------------------
Type
@ -2619,6 +2620,17 @@ Description
Certificate validation during file downloads.
downloader.*.proxy
------------------
Type
``string`` or ``object``
Default
`extractor.*.proxy`_
Description
| Proxy server used for file downloads.
| Disable the use of a proxy by explicitly setting this option to ``null``.
downloader.http.adjust-extensions
---------------------------------
Type

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014-2020 Mike Fährmann
# Copyright 2014-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -27,6 +27,12 @@ class DownloaderBase():
self.partdir = util.expand_path(self.partdir)
os.makedirs(self.partdir, exist_ok=True)
proxies = self.config("proxy", util.SENTINEL)
if proxies is util.SENTINEL:
self.proxies = job.extractor._proxies
else:
self.proxies = util.build_proxy_map(proxies, self.log)
def config(self, key, default=None):
"""Interpolate downloader config value for 'key'"""
return config.interpolate(("downloader", self.scheme), key, default)

@ -121,7 +121,8 @@ class HttpDownloader(DownloaderBase):
try:
response = self.session.request(
"GET", url, stream=True, headers=headers,
timeout=self.timeout, verify=self.verify)
timeout=self.timeout, verify=self.verify,
proxies=self.proxies)
except (ConnectionError, Timeout) as exc:
msg = str(exc)
continue

@ -25,6 +25,7 @@ class YoutubeDLDownloader(DownloaderBase):
"retries": retries+1 if retries >= 0 else float("inf"),
"socket_timeout": self.config("timeout", extractor._timeout),
"nocheckcertificate": not self.config("verify", extractor._verify),
"proxy": self.proxies.get("http") if self.proxies else None,
}
self.ytdl_instance = None

@ -55,6 +55,7 @@ class Extractor():
self._retries = self.config("retries", 4)
self._timeout = self.config("timeout", 30)
self._verify = self.config("verify", True)
self._proxies = util.build_proxy_map(self.config("proxy"), self.log)
self._interval = util.build_duration_func(
self.config("sleep-request", self.request_interval),
self.request_interval_min,
@ -65,7 +66,6 @@ class Extractor():
self._init_session()
self._init_cookies()
self._init_proxies()
@classmethod
def from_url(cls, url):
@ -104,10 +104,12 @@ class Extractor():
def request(self, url, *, method="GET", session=None, retries=None,
encoding=None, fatal=True, notfound=None, **kwargs):
if retries is None:
retries = self._retries
if session is None:
session = self.session
if retries is None:
retries = self._retries
if "proxies" not in kwargs:
kwargs["proxies"] = self._proxies
if "timeout" not in kwargs:
kwargs["timeout"] = self._timeout
if "verify" not in kwargs:
@ -289,20 +291,6 @@ class Extractor():
session.mount("https://", adapter)
session.mount("http://", adapter)
def _init_proxies(self):
"""Update the session's proxy map"""
proxies = self.config("proxy")
if proxies:
if isinstance(proxies, str):
proxies = {"http": proxies, "https": proxies}
if isinstance(proxies, dict):
for scheme, proxy in proxies.items():
if "://" not in proxy:
proxies[scheme] = "http://" + proxy.lstrip("/")
self.session.proxies = proxies
else:
self.log.warning("invalid proxy specifier: %s", proxies)
def _init_cookies(self):
"""Populate the session's cookiejar"""
self._cookiefile = None

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2021 Mike Fährmann
# Copyright 2021-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -64,6 +64,9 @@ class YoutubeDLExtractor(Extractor):
"nocheckcertificate" : not self._verify,
}
if self._proxies:
user_opts["proxy"] = self._proxies.get("http")
username, password = self._get_auth_info()
if username:
user_opts["username"], user_opts["password"] = username, password

@ -522,6 +522,26 @@ def build_extractor_filter(categories, negate=True, special=None):
return lambda extr: any(t(extr) for t in tests)
def build_proxy_map(proxies, log=None):
"""Generate a proxy map"""
if not proxies:
return None
if isinstance(proxies, str):
if "://" not in proxies:
proxies = "http://" + proxies.lstrip("/")
return {"http": proxies, "https": proxies}
if isinstance(proxies, dict):
for scheme, proxy in proxies.items():
if "://" not in proxy:
proxies[scheme] = "http://" + proxy.lstrip("/")
return proxies
if log:
log.warning("invalid proxy specifier: %s", proxies)
def build_predicate(predicates):
if not predicates:
return lambda url, kwdict: True

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2021 Mike Fährmann
# Copyright 2021-2022 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -46,8 +46,6 @@ def construct_YoutubeDL(module, obj, user_opts, system_opts=None):
if opts.get("format") is None:
opts["format"] = config("format")
if opts.get("proxy") is None:
opts["proxy"] = obj.session.proxies.get("http")
if opts.get("nopart") is None:
opts["nopart"] = not config("part", True)
if opts.get("updatetime") is None:

Loading…
Cancel
Save