allow specifying number of skips before abort/exit (closes #115)

In addition to 'abort' and 'exit', it is now possible to specify
'abort:N' and 'exit:N' (where N is any integer) as value for 'skip'
to abort/exit after consecutively skipping N downloads.
pull/133/head
Mike Fährmann 6 years ago
parent e1d306cc48
commit 6ed629f2b6
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -111,13 +111,19 @@ extractor.*.skip
=========== =====
Type ``bool`` or ``string``
Default ``true``
Description Controls the behavior when downloading a file whose filename
Description Controls the behavior when downloading files whose filename
already exists.
* ``true``: Skip the download
* ``false``: Overwrite the already existing file
* ``true``: Skip downloads
* ``false``: Overwrite already existing files
* ``"abort"``: Abort the current extractor run
* ``"abort:N"``: Skip downloads and abort extractor run
after ``N`` consecutive skips
* ``"exit"``: Exit the program altogether
* ``"exit:N"``: Skip downloads and exit the program
after ``N`` consecutive skips
=========== =====

@ -12,7 +12,7 @@ import json
import hashlib
import logging
from . import extractor, downloader, postprocessor
from . import config, util, output, exception
from . import config, text, util, output, exception
from .extractor.message import Message
@ -183,7 +183,7 @@ class DownloadJob(Job):
self.pathfmt.set_keywords(keywords)
if self.pathfmt.exists(self.archive):
self.out.skip(self.pathfmt.path)
self.handle_skip()
return
if self.sleep:
@ -204,7 +204,7 @@ class DownloadJob(Job):
return
if not self.pathfmt.temppath:
self.out.skip(self.pathfmt.path)
self.handle_skip()
return
# run post processors
@ -217,6 +217,7 @@ class DownloadJob(Job):
self.out.success(self.pathfmt.path, 0)
if self.archive:
self.archive.add(keywords)
self._skipcnt = 0
def handle_urllist(self, urls, keywords):
"""Download the resource specified in 'url'"""
@ -241,6 +242,13 @@ class DownloadJob(Job):
for pp in self.postprocessors:
pp.finalize()
def handle_skip(self):
self.out.skip(self.pathfmt.path)
if self._skipexc:
self._skipcnt += 1
if self._skipcnt >= self._skipmax:
raise self._skipexc()
def download(self, url):
"""Download 'url'"""
scheme = url.partition(":")[0]
@ -272,6 +280,20 @@ class DownloadJob(Job):
self.pathfmt = util.PathFormat(self.extractor)
self.sleep = self.extractor.config("sleep")
skip = self.extractor.config("skip", True)
if skip:
self._skipexc = None
if isinstance(skip, str):
skip, _, smax = skip.partition(":")
if skip == "abort":
self._skipexc = exception.StopExtraction
elif skip == "exit":
self._skipexc = sys.exit
self._skipcnt = 0
self._skipmax = text.parse_int(smax)
else:
self.pathfmt.exists = lambda x=None: False
archive = self.extractor.config("archive")
if archive:
path = util.expand_path(archive)

@ -452,17 +452,6 @@ class PathFormat():
if os.altsep:
self.basedirectory = self.basedirectory.replace(os.altsep, os.sep)
skip = extractor.config("skip", True)
if skip:
if skip == "abort":
self._skipexc = exception.StopExtraction
elif skip == "exit":
self._skipexc = sys.exit
else:
self._skipexc = None
else:
self.exists = lambda x=None: False
def open(self, mode="wb"):
"""Open file and return a corresponding file object"""
return open(self.temppath, mode)
@ -471,9 +460,8 @@ class PathFormat():
"""Return True if the file exists on disk or in 'archive'"""
if (archive and archive.check(self.keywords) or
self.has_extension and os.path.exists(self.realpath)):
if self._skipexc:
raise self._skipexc()
if not self.has_extension:
# adjust display name
self.set_extension("")
if self.path[-1] == ".":
self.path = self.path[:-1]

Loading…
Cancel
Save