[job] add 'resolve' argument to DataJob (#5864)

pull/5870/head
Mike Fährmann 2 months ago
parent 287a7d13cf
commit 84a634fc14
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -857,15 +857,20 @@ class InfoJob(Job):
class DataJob(Job): class DataJob(Job):
"""Collect extractor results and dump them""" """Collect extractor results and dump them"""
def __init__(self, url, parent=None, file=sys.stdout, ensure_ascii=True): def __init__(self, url, parent=None, file=sys.stdout, ensure_ascii=True,
resolve=False):
Job.__init__(self, url, parent) Job.__init__(self, url, parent)
self.file = file self.file = file
self.data = [] self.data = []
self.ascii = config.get(("output",), "ascii", ensure_ascii) self.ascii = config.get(("output",), "ascii", ensure_ascii)
self.resolve = 128 if resolve is True else resolve
private = config.get(("output",), "private") private = config.get(("output",), "private")
self.filter = dict.copy if private else util.filter_dict self.filter = dict.copy if private else util.filter_dict
if resolve:
self.handle_queue = self.handle_queue_resolve
def run(self): def run(self):
self._init() self._init()
@ -891,12 +896,13 @@ class DataJob(Job):
for msg in self.data: for msg in self.data:
util.transform_dict(msg[-1], util.number_to_string) util.transform_dict(msg[-1], util.number_to_string)
# dump to 'file' if self.file:
try: # dump to 'file'
util.dump_json(self.data, self.file, self.ascii, 2) try:
self.file.flush() util.dump_json(self.data, self.file, self.ascii, 2)
except Exception: self.file.flush()
pass except Exception:
pass
return 0 return 0
@ -908,3 +914,17 @@ class DataJob(Job):
def handle_queue(self, url, kwdict): def handle_queue(self, url, kwdict):
self.data.append((Message.Queue, url, self.filter(kwdict))) self.data.append((Message.Queue, url, self.filter(kwdict)))
def handle_queue_resolve(self, url, kwdict):
cls = kwdict.get("_extractor")
if cls:
extr = cls.from_url(url)
else:
extr = extractor.find(url)
if not extr:
return self.data.append((Message.Queue, url, self.filter(kwdict)))
job = self.__class__(extr, self, None, self.ascii, self.resolve-1)
job.data = self.data
job.run()

Loading…
Cancel
Save