Remove control characters from filesystem paths

- add 'path-remove' option to specify the set of characters that
 should be removed
- rename 'restrict-filenames' to 'path-restrict'
- #348, #380
deviantart-rewrite
Mike Fährmann 5 years ago
parent c50d60a53d
commit 5a210991b6
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -108,21 +108,36 @@ Description Directory path used as the base for all download destinations.
=========== =====
extractor.*.restrict-filenames
------------------------------
extractor.*.path-restrict
-------------------------
=========== =====
Type ``string``
Default ``"auto"``
Example ``"/!? ()[]{}"``
Description Characters to replace with underscores (``_``) when generating
directory and file names.
Example ``"/!? (){}"``
Description Set of characters to replace with underscores (``_``)
in generated path segment names.
Special values:
* ``"auto"``: Use characters from ``"unix"`` or ``"windows"``
depending on the local operating system
* ``"unix"``: ``"/"``
* ``"windows"``: ``"<>:\"\\|/?*"``
* ``"windows"``: ``"\\\\|/<>:\"?*"``
Note: In a set with 2 or more characters, ``[]^-\`` need to be
escaped with backslashes, e.g. ``"\\[\\]"``
=========== =====
extractor.*.path-remove
-----------------------
=========== =====
Type ``string``
Default ``"\\u0000-\\u001f\\u007f"`` (ASCII control characters)
Description Set of characters to remove from generated path names.
Note: In a set with 2 or more characters, ``[]^-\`` need to be
escaped with backslashes, e.g. ``"\\[\\]"``
=========== =====

@ -8,8 +8,9 @@
"proxy": null,
"skip": true,
"sleep": 0,
"path-restrict": "auto",
"path-remove": "\\u0000-\\u001f\\u007f",
"user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0",
"restrict-filenames": "auto",
"artstation":
{

@ -535,25 +535,29 @@ class PathFormat():
if os.altsep and os.altsep in self.basedirectory:
self.basedirectory = self.basedirectory.replace(os.altsep, os.sep)
restrict = extractor.config("restrict-filenames", "auto")
restrict = extractor.config("path-restrict", "auto")
if restrict == "auto":
restrict = "<>:\"\\/|?*" if os.name == "nt" else "/"
restrict = "\\\\|/<>:\"?*" if os.name == "nt" else "/"
elif restrict == "unix":
restrict = "/"
elif restrict == "windows":
restrict = "<>:\"\\/|?*"
self.clean_path = self._build_cleanfunc(restrict)
restrict = "\\\\|/<>:\"?*"
remove = extractor.config("path-remove", "\x00-\x1f\x7f")
self.clean_segment = self._build_cleanfunc(restrict, "_")
self.clean_path = self._build_cleanfunc(remove, "")
@staticmethod
def _build_cleanfunc(repl):
if not repl:
def _build_cleanfunc(chars, repl):
if not chars:
return lambda x: x
elif len(repl) == 1:
def func(x, r=repl):
return x.replace(r, "_")
elif len(chars) == 1:
def func(x, c=chars, r=repl):
return x.replace(c, r)
else:
def func(x, sub=re.compile("[" + re.escape(repl) + "]").sub):
return sub("_", x)
def func(x, sub=re.compile("[" + chars + "]").sub, r=repl):
return sub(r, x)
return func
def open(self, mode="wb"):
@ -586,16 +590,19 @@ class PathFormat():
# Build path segments by applying 'kwdict' to directory format strings
try:
segments = [
self.clean_path(
self.clean_segment(
Formatter(segment, self.kwdefault)
.format_map(kwdict).strip())
.format_map(kwdict)
.strip()
)
for segment in self.directory_fmt
]
except Exception as exc:
raise exception.FormatError(exc, "directory")
# Join path segements
self.directory = os.path.join(self.basedirectory, *segments)
self.directory = self.clean_path(os.path.join(
self.basedirectory, *segments))
# Remove trailing path separator;
# occurs if the last argument to os.path.join() is an empty string
@ -641,8 +648,8 @@ class PathFormat():
# Apply 'kwdict' to filename format string
try:
self.filename = self.clean_path(
self.formatter.format_map(self.kwdict))
self.filename = self.clean_path(self.clean_segment(
self.formatter.format_map(self.kwdict)))
except Exception as exc:
raise exception.FormatError(exc, "filename")

Loading…
Cancel
Save