implement functions to load/save cookies.txt files (closes #586)

The methods of the standard libraries' MozillaCookieJar have
several shortcomings (#HttpOnly_ cookies, 0 expiration timestamps, etc.)
and require construction of an ultimately pointless CookieJar object.
pull/599/head
Mike Fährmann 5 years ago
parent 5d73b7f29c
commit c1a6862863
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -1,5 +1,7 @@
# Changelog
## Unreleased
## 1.12.3 - 2020-01-19
### Additions
- [hentaifoundry] extract more metadata ([#565](https://github.com/mikf/gallery-dl/issues/565))

@ -16,7 +16,6 @@ import logging
import datetime
import requests
import threading
import http.cookiejar
from .message import Message
from .. import config, text, util, exception, cloudflare
@ -197,13 +196,12 @@ class Extractor():
self._update_cookies_dict(cookies, self.cookiedomain)
elif isinstance(cookies, str):
cookiefile = util.expand_path(cookies)
cookiejar = http.cookiejar.MozillaCookieJar()
try:
cookiejar.load(cookiefile)
except OSError as exc:
cookies = util.load_cookiestxt(cookiefile)
except Exception as exc:
self.log.warning("cookies: %s", exc)
else:
self._cookiejar.update(cookiejar)
self._update_cookies(cookies)
self._cookiefile = cookiefile
else:
self.log.warning(
@ -218,11 +216,8 @@ class Extractor():
def _store_cookies(self):
"""Store the session's cookiejar in a cookies.txt file"""
if self._cookiefile and self.config("cookies-update", True):
cookiejar = http.cookiejar.MozillaCookieJar()
for cookie in self._cookiejar:
cookiejar.set_cookie(cookie)
try:
cookiejar.save(self._cookiefile)
util.save_cookiestxt(self._cookiefile, self._cookiejar)
except OSError as exc:
self.log.warning("cookies: %s", exc)
@ -491,12 +486,6 @@ def generate_extractors(extractor_data, symtable, classes):
symtable[Extr.__name__] = prev = Extr
# Reduce strictness of the expected magic string in cookiejar files.
# (This allows the use of Wget-generated cookiejars without modification)
http.cookiejar.MozillaCookieJar.magic_re = re.compile(
"#( Netscape)? HTTP Cookie File", re.IGNORECASE)
# Undo automatic pyOpenSSL injection by requests
pyopenssl = config.get((), "pyopenssl", False)
if not pyopenssl:

@ -21,6 +21,7 @@ import datetime
import operator
import itertools
import urllib.parse
from http.cookiejar import Cookie
from email.utils import mktime_tz, parsedate_tz
from . import text, exception
@ -135,6 +136,68 @@ def remove_directory(path):
pass
def load_cookiestxt(path):
"""Parse a Netscape cookies.txt file and return a list of its Cookies"""
cookies = []
with open(path) as fp:
for line in fp:
# strip '#HttpOnly_' and trailing '\n'
if line.startswith("#HttpOnly_"):
line = line[10:]
if line[-1] == "\n":
line = line[:-1]
# ignore empty lines and comments
if not line or line[0] in ("#", "$"):
continue
domain, domain_specified, path, secure, expires, name, value = \
line.split("\t")
if not name:
name = value
value = None
cookies.append(Cookie(
0, name, value,
None, False,
domain,
domain_specified == "TRUE",
domain.startswith("."),
path, False,
secure == "TRUE",
expires or None,
False, None, None, {},
))
return cookies
def save_cookiestxt(path, cookies):
"""Store 'cookies' in Netscape cookies.txt format"""
with open(path, "w") as fp:
fp.write("# Netscape HTTP Cookie File\n\n")
for cookie in cookies:
if cookie.value is None:
name = ""
value = cookie.name
else:
name = cookie.name
value = cookie.value
fp.write("\t".join((
cookie.domain,
"TRUE" if cookie.domain.startswith(".") else "FALSE",
cookie.path,
"TRUE" if cookie.secure else "FALSE",
"0" if cookie.expires is None else str(cookie.expires),
name,
value,
)) + "\n")
def code_to_language(code, default=None):
"""Map an ISO 639-1 language code to its actual name"""
return CODES.get((code or "").lower(), default)

@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
__version__ = "1.12.3"
__version__ = "1.13.0-dev"

Loading…
Cancel
Save