[archive] implement 'archive-event' option (#5784)

With this, IDs of skipped files will no longer be written to an archive
by default. Use "archive-event": "file,skip" to restore the previous
behavior.
pull/5802/head
Mike Fährmann 3 months ago
parent 51fdfbe6fc
commit ea81fa985f
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -865,6 +865,25 @@ Description
may pose a security risk.
extractor.*.archive-event
-------------------------
Type
+ ``string``
+ ``list`` of ``strings``
Default
``"file"``
Example
* ``"file,skip"``
* ``["file", "skip"]``
Description
`Event(s) <metadata.event_>`__
for which IDs get written to an
`archive <extractor.*.archive_>`__.
Available events are:
``file``, ``skip``
extractor.*.archive-format
--------------------------
Type

@ -315,7 +315,7 @@ class DownloadJob(Job):
pathfmt.build_path()
if pathfmt.exists():
if archive:
if archive and self._archive_write_skip:
archive.add(kwdict)
self.handle_skip()
return
@ -345,7 +345,7 @@ class DownloadJob(Job):
return
if not pathfmt.temppath:
if archive:
if archive and self._archive_write_skip:
archive.add(kwdict)
self.handle_skip()
return
@ -359,7 +359,7 @@ class DownloadJob(Job):
pathfmt.finalize()
self.out.success(pathfmt.path)
self._skipcnt = 0
if archive:
if archive and self._archive_write_file:
archive.add(kwdict)
if "after" in hooks:
for callback in hooks["after"]:
@ -561,6 +561,16 @@ class DownloadJob(Job):
else:
extr.log.debug("Using download archive '%s'", archive_path)
events = cfg("archive-event")
if events is None:
self._archive_write_file = True
self._archive_write_skip = False
else:
if isinstance(events, str):
events = events.split(",")
self._archive_write_file = ("file" in events)
self._archive_write_skip = ("skip" in events)
skip = cfg("skip", True)
if skip:
self._skipexc = None
@ -676,7 +686,7 @@ class SimulationJob(DownloadJob):
kwdict["extension"] = "jpg"
if self.sleep:
self.extractor.sleep(self.sleep(), "download")
if self.archive:
if self.archive and self._archive_write_skip:
self.archive.add(kwdict)
self.out.skip(self.pathfmt.build_filename(kwdict))

Loading…
Cancel
Save