allow specifying number of skips before abort/exit (closes #115)

In addition to 'abort' and 'exit', it is now possible to specify
'abort:N' and 'exit:N' (where N is any integer) as value for 'skip'
to abort/exit after consecutively skipping N downloads.
pull/133/head
Mike Fährmann 6 years ago
parent e1d306cc48
commit 6ed629f2b6
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -111,13 +111,19 @@ extractor.*.skip
=========== ===== =========== =====
Type ``bool`` or ``string`` Type ``bool`` or ``string``
Default ``true`` Default ``true``
Description Controls the behavior when downloading a file whose filename Description Controls the behavior when downloading files whose filename
already exists. already exists.
* ``true``: Skip the download * ``true``: Skip downloads
* ``false``: Overwrite the already existing file * ``false``: Overwrite already existing files
* ``"abort"``: Abort the current extractor run * ``"abort"``: Abort the current extractor run
* ``"abort:N"``: Skip downloads and abort extractor run
after ``N`` consecutive skips
* ``"exit"``: Exit the program altogether * ``"exit"``: Exit the program altogether
* ``"exit:N"``: Skip downloads and exit the program
after ``N`` consecutive skips
=========== ===== =========== =====

@ -12,7 +12,7 @@ import json
import hashlib import hashlib
import logging import logging
from . import extractor, downloader, postprocessor from . import extractor, downloader, postprocessor
from . import config, util, output, exception from . import config, text, util, output, exception
from .extractor.message import Message from .extractor.message import Message
@ -183,7 +183,7 @@ class DownloadJob(Job):
self.pathfmt.set_keywords(keywords) self.pathfmt.set_keywords(keywords)
if self.pathfmt.exists(self.archive): if self.pathfmt.exists(self.archive):
self.out.skip(self.pathfmt.path) self.handle_skip()
return return
if self.sleep: if self.sleep:
@ -204,7 +204,7 @@ class DownloadJob(Job):
return return
if not self.pathfmt.temppath: if not self.pathfmt.temppath:
self.out.skip(self.pathfmt.path) self.handle_skip()
return return
# run post processors # run post processors
@ -217,6 +217,7 @@ class DownloadJob(Job):
self.out.success(self.pathfmt.path, 0) self.out.success(self.pathfmt.path, 0)
if self.archive: if self.archive:
self.archive.add(keywords) self.archive.add(keywords)
self._skipcnt = 0
def handle_urllist(self, urls, keywords): def handle_urllist(self, urls, keywords):
"""Download the resource specified in 'url'""" """Download the resource specified in 'url'"""
@ -241,6 +242,13 @@ class DownloadJob(Job):
for pp in self.postprocessors: for pp in self.postprocessors:
pp.finalize() pp.finalize()
def handle_skip(self):
self.out.skip(self.pathfmt.path)
if self._skipexc:
self._skipcnt += 1
if self._skipcnt >= self._skipmax:
raise self._skipexc()
def download(self, url): def download(self, url):
"""Download 'url'""" """Download 'url'"""
scheme = url.partition(":")[0] scheme = url.partition(":")[0]
@ -272,6 +280,20 @@ class DownloadJob(Job):
self.pathfmt = util.PathFormat(self.extractor) self.pathfmt = util.PathFormat(self.extractor)
self.sleep = self.extractor.config("sleep") self.sleep = self.extractor.config("sleep")
skip = self.extractor.config("skip", True)
if skip:
self._skipexc = None
if isinstance(skip, str):
skip, _, smax = skip.partition(":")
if skip == "abort":
self._skipexc = exception.StopExtraction
elif skip == "exit":
self._skipexc = sys.exit
self._skipcnt = 0
self._skipmax = text.parse_int(smax)
else:
self.pathfmt.exists = lambda x=None: False
archive = self.extractor.config("archive") archive = self.extractor.config("archive")
if archive: if archive:
path = util.expand_path(archive) path = util.expand_path(archive)

@ -452,17 +452,6 @@ class PathFormat():
if os.altsep: if os.altsep:
self.basedirectory = self.basedirectory.replace(os.altsep, os.sep) self.basedirectory = self.basedirectory.replace(os.altsep, os.sep)
skip = extractor.config("skip", True)
if skip:
if skip == "abort":
self._skipexc = exception.StopExtraction
elif skip == "exit":
self._skipexc = sys.exit
else:
self._skipexc = None
else:
self.exists = lambda x=None: False
def open(self, mode="wb"): def open(self, mode="wb"):
"""Open file and return a corresponding file object""" """Open file and return a corresponding file object"""
return open(self.temppath, mode) return open(self.temppath, mode)
@ -471,9 +460,8 @@ class PathFormat():
"""Return True if the file exists on disk or in 'archive'""" """Return True if the file exists on disk or in 'archive'"""
if (archive and archive.check(self.keywords) or if (archive and archive.check(self.keywords) or
self.has_extension and os.path.exists(self.realpath)): self.has_extension and os.path.exists(self.realpath)):
if self._skipexc:
raise self._skipexc()
if not self.has_extension: if not self.has_extension:
# adjust display name
self.set_extension("") self.set_extension("")
if self.path[-1] == ".": if self.path[-1] == ".":
self.path = self.path[:-1] self.path = self.path[:-1]

Loading…
Cancel
Save