add '-j/--dump-json' option

this outputs the extractor-results in JSON format rather then
downloading files
pull/13/head
Mike Fährmann 8 years ago
parent c9a5650cf8
commit b43cd88101
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -83,6 +83,8 @@ def main():
jobtype.maxdepth = args.list_urls jobtype.maxdepth = args.list_urls
elif args.list_keywords: elif args.list_keywords:
jobtype = job.KeywordJob jobtype = job.KeywordJob
elif args.list_data:
jobtype = job.DataJob
else: else:
jobtype = job.DownloadJob jobtype = job.DownloadJob

@ -6,6 +6,7 @@
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation. # published by the Free Software Foundation.
import sys
import json import json
import hashlib import hashlib
from . import extractor, downloader, config, util, output, exception from . import extractor, downloader, config, util, output, exception
@ -79,7 +80,7 @@ class Job():
) )
# TODO: support for multiple message versions # TODO: support for multiple message versions
def handle_url(self, url, kexwords): def handle_url(self, url, keywords):
"""Handle Message.Url""" """Handle Message.Url"""
def handle_directory(self, keywords): def handle_directory(self, keywords):
@ -265,3 +266,35 @@ class TestJob(DownloadJob):
"""Update the content hash""" """Update the content hash"""
if self.content: if self.content:
self.get_downloader(url).download(url, self.fileobj) self.get_downloader(url).download(url, self.fileobj)
class DataJob(Job):
"""Collect extractor results and dump them"""
def __init__(self, url, file=sys.stdout):
Job.__init__(self, url)
self.file = file
self.data = []
self.ensure_ascii = config.get(("output", "ascii"), True)
def run(self):
# collect data
try:
for msg in self.extractor:
if msg[0] in (Message.Headers, Message.Cookies):
copy = (msg[0], dict(msg[1]))
else:
copy = [
part.copy() if hasattr(part, "copy") else part
for part in msg
]
self.data.append(copy)
except Exception as exc:
self.data.append((exc.__class__.__name__, str(exc)))
# dump to 'file'
json.dump(
self.data, self.file,
sort_keys=True, indent=2, ensure_ascii=self.ensure_ascii
)
self.file.write("\n")

@ -63,6 +63,10 @@ def build_parser():
"-g", "--get-urls", dest="list_urls", action="count", "-g", "--get-urls", dest="list_urls", action="count",
help="Print download urls", help="Print download urls",
) )
parser.add_argument(
"-j", "--dump-json", dest="list_data", action="store_true",
help="Print JSON information",
)
parser.add_argument( parser.add_argument(
"-d", "--dest", "-d", "--dest",
metavar="DEST", action=ConfigAction, dest="base-directory", metavar="DEST", action=ConfigAction, dest="base-directory",

Loading…
Cancel
Save