allow specifying number of skips before abort/exit (closes #115)

In addition to 'abort' and 'exit', it is now possible to specify 'abort:N' and 'exit:N' (where N is any integer) as value for 'skip' to abort/exit after consecutively skipping N downloads.
6 years ago · 6ed629f2b6
parent e1d306cc48
commit 6ed629f2b6
3 changed files with 35 additions and 19 deletions
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@ -111,13 +111,19 @@ extractor.*.skip
 =========== =====
 Type        ``bool`` or ``string``
 Default     ``true``
-Description Controls the behavior when downloading a file whose filename
+Description Controls the behavior when downloading files whose filename
            already exists.

-            * ``true``: Skip the download
-            * ``false``: Overwrite the already existing file
+            * ``true``: Skip downloads
+            * ``false``: Overwrite already existing files
+
            * ``"abort"``: Abort the current extractor run
+            * ``"abort:N"``: Skip downloads and abort extractor run
+              after ``N`` consecutive skips
+
            * ``"exit"``: Exit the program altogether
+            * ``"exit:N"``: Skip downloads and exit the program
+              after ``N`` consecutive skips
 =========== =====


--- a/gallery_dl/job.py
+++ b/gallery_dl/job.py
@ -12,7 +12,7 @@ import json
 import hashlib
 import logging
 from . import extractor, downloader, postprocessor
-from . import config, util, output, exception
+from . import config, text, util, output, exception
 from .extractor.message import Message


@ -183,7 +183,7 @@ class DownloadJob(Job):
        self.pathfmt.set_keywords(keywords)

        if self.pathfmt.exists(self.archive):
-            self.out.skip(self.pathfmt.path)
+            self.handle_skip()
            return

        if self.sleep:
@ -204,7 +204,7 @@ class DownloadJob(Job):
                return

        if not self.pathfmt.temppath:
-            self.out.skip(self.pathfmt.path)
+            self.handle_skip()
            return

        # run post processors
@ -217,6 +217,7 @@ class DownloadJob(Job):
        self.out.success(self.pathfmt.path, 0)
        if self.archive:
            self.archive.add(keywords)
+        self._skipcnt = 0

    def handle_urllist(self, urls, keywords):
        """Download the resource specified in 'url'"""
@ -241,6 +242,13 @@ class DownloadJob(Job):
            for pp in self.postprocessors:
                pp.finalize()

+    def handle_skip(self):
+        self.out.skip(self.pathfmt.path)
+        if self._skipexc:
+            self._skipcnt += 1
+            if self._skipcnt >= self._skipmax:
+                raise self._skipexc()
+
    def download(self, url):
        """Download 'url'"""
        scheme = url.partition(":")[0]
@ -272,6 +280,20 @@ class DownloadJob(Job):
        self.pathfmt = util.PathFormat(self.extractor)
        self.sleep = self.extractor.config("sleep")

+        skip = self.extractor.config("skip", True)
+        if skip:
+            self._skipexc = None
+            if isinstance(skip, str):
+                skip, _, smax = skip.partition(":")
+                if skip == "abort":
+                    self._skipexc = exception.StopExtraction
+                elif skip == "exit":
+                    self._skipexc = sys.exit
+                self._skipcnt = 0
+                self._skipmax = text.parse_int(smax)
+        else:
+            self.pathfmt.exists = lambda x=None: False
+
        archive = self.extractor.config("archive")
        if archive:
            path = util.expand_path(archive)
--- a/gallery_dl/util.py
+++ b/gallery_dl/util.py
@ -452,17 +452,6 @@ class PathFormat():
        if os.altsep:
            self.basedirectory = self.basedirectory.replace(os.altsep, os.sep)

-        skip = extractor.config("skip", True)
-        if skip:
-            if skip == "abort":
-                self._skipexc = exception.StopExtraction
-            elif skip == "exit":
-                self._skipexc = sys.exit
-            else:
-                self._skipexc = None
-        else:
-            self.exists = lambda x=None: False
-
    def open(self, mode="wb"):
        """Open file and return a corresponding file object"""
        return open(self.temppath, mode)
@ -471,9 +460,8 @@ class PathFormat():
        """Return True if the file exists on disk or in 'archive'"""
        if (archive and archive.check(self.keywords) or
                self.has_extension and os.path.exists(self.realpath)):
-            if self._skipexc:
-                raise self._skipexc()
            if not self.has_extension:
+                # adjust display name
                self.set_extension("")
                if self.path[-1] == ".":
                    self.path = self.path[:-1]