add general 'blacklist' and 'whitelist' options (#492, #844)

pull/997/head
Mike Fährmann 4 years ago
parent abda352a5b
commit c78aa17506
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -363,6 +363,20 @@ Description Transfer an extractor's (sub)category values to all child
=========== =====
extractor.*.blacklist & .whitelist
----------------------------------
=========== =====
Type ``list`` of ``strings``
Default ``["oauth", "recursive", "test"]`` + current extractor category
Description A list of extractor categories to ignore (or allow)
when spawning child extractors for unknown URLs,
e.g. from ``reddit`` or ``plurk``.
Note: Any ``blacklist`` setting will automatically include
``"oauth"``, ``"recursive"``, and ``"test"``.
=========== =====
extractor.*.archive
-------------------
=========== =====

@ -197,6 +197,7 @@ class DownloadJob(Job):
def __init__(self, url, parent=None):
Job.__init__(self, url, parent)
self.log = self.get_logger("download")
self.blacklist = None
self.archive = None
self.sleep = None
self.downloaders = {}
@ -308,6 +309,12 @@ class DownloadJob(Job):
extr = kwdict["_extractor"].from_url(url)
else:
extr = extractor.find(url)
if extr:
if self.blacklist is None:
self.blacklist = self._build_blacklist()
if extr.category in self.blacklist:
extr = None
if extr:
self.status |= self.__class__(extr, self).run()
else:
@ -437,6 +444,25 @@ class DownloadJob(Job):
self.extractor.log.debug(
"Active postprocessor modules: %s", pp_list)
def _build_blacklist(self):
wlist = self.extractor.config("whitelist")
if wlist:
if isinstance(wlist, str):
wlist = wlist.split(",")
blist = {e.category for e in extractor._list_classes()}
blist.difference_update(wlist)
return blist
blist = self.extractor.config("blacklist")
if blist:
if isinstance(blist, str):
blist = blist.split(",")
blist = set(blist)
else:
blist = {self.extractor.category}
blist |= util.SPECIAL_EXTRACTORS
return blist
class SimulationJob(DownloadJob):
"""Simulate the extraction process without downloading anything"""

Loading…
Cancel
Save