diff --git a/docs/configuration.rst b/docs/configuration.rst index eb3144b1..0e2e3557 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -108,21 +108,36 @@ Description Directory path used as the base for all download destinations. =========== ===== -extractor.*.restrict-filenames ------------------------------- +extractor.*.path-restrict +------------------------- =========== ===== Type ``string`` Default ``"auto"`` -Example ``"/!? ()[]{}"`` -Description Characters to replace with underscores (``_``) when generating - directory and file names. +Example ``"/!? (){}"`` +Description Set of characters to replace with underscores (``_``) + in generated path segment names. Special values: * ``"auto"``: Use characters from ``"unix"`` or ``"windows"`` depending on the local operating system * ``"unix"``: ``"/"`` - * ``"windows"``: ``"<>:\"\\|/?*"`` + * ``"windows"``: ``"\\\\|/<>:\"?*"`` + + Note: In a set with 2 or more characters, ``[]^-\`` need to be + escaped with backslashes, e.g. ``"\\[\\]"`` +=========== ===== + + +extractor.*.path-remove +----------------------- +=========== ===== +Type ``string`` +Default ``"\\u0000-\\u001f\\u007f"`` (ASCII control characters) +Description Set of characters to remove from generated path names. + + Note: In a set with 2 or more characters, ``[]^-\`` need to be + escaped with backslashes, e.g. ``"\\[\\]"`` =========== ===== diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index b77310a0..b9ff32d3 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -8,8 +8,9 @@ "proxy": null, "skip": true, "sleep": 0, + "path-restrict": "auto", + "path-remove": "\\u0000-\\u001f\\u007f", "user-agent": "Mozilla/5.0 (X11; Linux x86_64; rv:68.0) Gecko/20100101 Firefox/68.0", - "restrict-filenames": "auto", "artstation": { diff --git a/gallery_dl/util.py b/gallery_dl/util.py index 6c984d6a..d526f3eb 100644 --- a/gallery_dl/util.py +++ b/gallery_dl/util.py @@ -535,25 +535,29 @@ class PathFormat(): if os.altsep and os.altsep in self.basedirectory: self.basedirectory = self.basedirectory.replace(os.altsep, os.sep) - restrict = extractor.config("restrict-filenames", "auto") + restrict = extractor.config("path-restrict", "auto") if restrict == "auto": - restrict = "<>:\"\\/|?*" if os.name == "nt" else "/" + restrict = "\\\\|/<>:\"?*" if os.name == "nt" else "/" elif restrict == "unix": restrict = "/" elif restrict == "windows": - restrict = "<>:\"\\/|?*" - self.clean_path = self._build_cleanfunc(restrict) + restrict = "\\\\|/<>:\"?*" + + remove = extractor.config("path-remove", "\x00-\x1f\x7f") + + self.clean_segment = self._build_cleanfunc(restrict, "_") + self.clean_path = self._build_cleanfunc(remove, "") @staticmethod - def _build_cleanfunc(repl): - if not repl: + def _build_cleanfunc(chars, repl): + if not chars: return lambda x: x - elif len(repl) == 1: - def func(x, r=repl): - return x.replace(r, "_") + elif len(chars) == 1: + def func(x, c=chars, r=repl): + return x.replace(c, r) else: - def func(x, sub=re.compile("[" + re.escape(repl) + "]").sub): - return sub("_", x) + def func(x, sub=re.compile("[" + chars + "]").sub, r=repl): + return sub(r, x) return func def open(self, mode="wb"): @@ -586,16 +590,19 @@ class PathFormat(): # Build path segments by applying 'kwdict' to directory format strings try: segments = [ - self.clean_path( + self.clean_segment( Formatter(segment, self.kwdefault) - .format_map(kwdict).strip()) + .format_map(kwdict) + .strip() + ) for segment in self.directory_fmt ] except Exception as exc: raise exception.FormatError(exc, "directory") # Join path segements - self.directory = os.path.join(self.basedirectory, *segments) + self.directory = self.clean_path(os.path.join( + self.basedirectory, *segments)) # Remove trailing path separator; # occurs if the last argument to os.path.join() is an empty string @@ -641,8 +648,8 @@ class PathFormat(): # Apply 'kwdict' to filename format string try: - self.filename = self.clean_path( - self.formatter.format_map(self.kwdict)) + self.filename = self.clean_path(self.clean_segment( + self.formatter.format_map(self.kwdict))) except Exception as exc: raise exception.FormatError(exc, "filename")