implement functions to load/save cookies.txt files (closes #586)

The methods of the standard libraries' MozillaCookieJar have
several shortcomings (#HttpOnly_ cookies, 0 expiration timestamps, etc.)
and require construction of an ultimately pointless CookieJar object.
pull/599/head
Mike Fährmann 5 years ago
parent 5d73b7f29c
commit c1a6862863
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -1,5 +1,7 @@
# Changelog # Changelog
## Unreleased
## 1.12.3 - 2020-01-19 ## 1.12.3 - 2020-01-19
### Additions ### Additions
- [hentaifoundry] extract more metadata ([#565](https://github.com/mikf/gallery-dl/issues/565)) - [hentaifoundry] extract more metadata ([#565](https://github.com/mikf/gallery-dl/issues/565))

@ -16,7 +16,6 @@ import logging
import datetime import datetime
import requests import requests
import threading import threading
import http.cookiejar
from .message import Message from .message import Message
from .. import config, text, util, exception, cloudflare from .. import config, text, util, exception, cloudflare
@ -197,13 +196,12 @@ class Extractor():
self._update_cookies_dict(cookies, self.cookiedomain) self._update_cookies_dict(cookies, self.cookiedomain)
elif isinstance(cookies, str): elif isinstance(cookies, str):
cookiefile = util.expand_path(cookies) cookiefile = util.expand_path(cookies)
cookiejar = http.cookiejar.MozillaCookieJar()
try: try:
cookiejar.load(cookiefile) cookies = util.load_cookiestxt(cookiefile)
except OSError as exc: except Exception as exc:
self.log.warning("cookies: %s", exc) self.log.warning("cookies: %s", exc)
else: else:
self._cookiejar.update(cookiejar) self._update_cookies(cookies)
self._cookiefile = cookiefile self._cookiefile = cookiefile
else: else:
self.log.warning( self.log.warning(
@ -218,11 +216,8 @@ class Extractor():
def _store_cookies(self): def _store_cookies(self):
"""Store the session's cookiejar in a cookies.txt file""" """Store the session's cookiejar in a cookies.txt file"""
if self._cookiefile and self.config("cookies-update", True): if self._cookiefile and self.config("cookies-update", True):
cookiejar = http.cookiejar.MozillaCookieJar()
for cookie in self._cookiejar:
cookiejar.set_cookie(cookie)
try: try:
cookiejar.save(self._cookiefile) util.save_cookiestxt(self._cookiefile, self._cookiejar)
except OSError as exc: except OSError as exc:
self.log.warning("cookies: %s", exc) self.log.warning("cookies: %s", exc)
@ -491,12 +486,6 @@ def generate_extractors(extractor_data, symtable, classes):
symtable[Extr.__name__] = prev = Extr symtable[Extr.__name__] = prev = Extr
# Reduce strictness of the expected magic string in cookiejar files.
# (This allows the use of Wget-generated cookiejars without modification)
http.cookiejar.MozillaCookieJar.magic_re = re.compile(
"#( Netscape)? HTTP Cookie File", re.IGNORECASE)
# Undo automatic pyOpenSSL injection by requests # Undo automatic pyOpenSSL injection by requests
pyopenssl = config.get((), "pyopenssl", False) pyopenssl = config.get((), "pyopenssl", False)
if not pyopenssl: if not pyopenssl:

@ -21,6 +21,7 @@ import datetime
import operator import operator
import itertools import itertools
import urllib.parse import urllib.parse
from http.cookiejar import Cookie
from email.utils import mktime_tz, parsedate_tz from email.utils import mktime_tz, parsedate_tz
from . import text, exception from . import text, exception
@ -135,6 +136,68 @@ def remove_directory(path):
pass pass
def load_cookiestxt(path):
"""Parse a Netscape cookies.txt file and return a list of its Cookies"""
cookies = []
with open(path) as fp:
for line in fp:
# strip '#HttpOnly_' and trailing '\n'
if line.startswith("#HttpOnly_"):
line = line[10:]
if line[-1] == "\n":
line = line[:-1]
# ignore empty lines and comments
if not line or line[0] in ("#", "$"):
continue
domain, domain_specified, path, secure, expires, name, value = \
line.split("\t")
if not name:
name = value
value = None
cookies.append(Cookie(
0, name, value,
None, False,
domain,
domain_specified == "TRUE",
domain.startswith("."),
path, False,
secure == "TRUE",
expires or None,
False, None, None, {},
))
return cookies
def save_cookiestxt(path, cookies):
"""Store 'cookies' in Netscape cookies.txt format"""
with open(path, "w") as fp:
fp.write("# Netscape HTTP Cookie File\n\n")
for cookie in cookies:
if cookie.value is None:
name = ""
value = cookie.name
else:
name = cookie.name
value = cookie.value
fp.write("\t".join((
cookie.domain,
"TRUE" if cookie.domain.startswith(".") else "FALSE",
cookie.path,
"TRUE" if cookie.secure else "FALSE",
"0" if cookie.expires is None else str(cookie.expires),
name,
value,
)) + "\n")
def code_to_language(code, default=None): def code_to_language(code, default=None):
"""Map an ISO 639-1 language code to its actual name""" """Map an ISO 639-1 language code to its actual name"""
return CODES.get((code or "").lower(), default) return CODES.get((code or "").lower(), default)

@ -6,4 +6,4 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
__version__ = "1.12.3" __version__ = "1.13.0-dev"

Loading…
Cancel
Save