Fix serialization of 'datetime' objects in '--write-metadata'

Simplified universal serialization support in json.dump() can be achieved
by passing 'default=str', which was already the case in DataJob.run()
for -j/--dump-json, but not for the 'metadata' post-processor.

This commit introduces util.dump_json() that (more or less) unifies the
JSON output procedure of both --write-metadata and --dump-json.

(#251, #252)
pull/266/head
Mike Fährmann 5 years ago
parent 8de5866fd2
commit 523ebc9b0b
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -8,7 +8,6 @@
import sys import sys
import time import time
import json
import logging import logging
from . import extractor, downloader, postprocessor from . import extractor, downloader, postprocessor
from . import config, text, util, output, exception from . import config, text, util, output, exception
@ -469,16 +468,13 @@ class DataJob(Job):
except BaseException: except BaseException:
pass pass
# convert numbers to string
if config.get(("output", "num-to-str"), False): if config.get(("output", "num-to-str"), False):
for msg in self.data: for msg in self.data:
util.transform_dict(msg[-1], util.number_to_string) util.transform_dict(msg[-1], util.number_to_string)
# dump to 'file' # dump to 'file'
json.dump( util.dump_json(self.data, self.file, self.ascii, 2)
self.data, self.file,
sort_keys=True, indent=2, ensure_ascii=self.ascii, default=str,
)
self.file.write("\n")
def handle_url(self, url, kwdict): def handle_url(self, url, kwdict):
self.data.append((Message.Url, url, self._filter(kwdict))) self.data.append((Message.Url, url, self._filter(kwdict)))

@ -10,7 +10,6 @@
from .common import PostProcessor from .common import PostProcessor
from .. import util from .. import util
import json
class MetadataPP(PostProcessor): class MetadataPP(PostProcessor):
@ -61,13 +60,7 @@ class MetadataPP(PostProcessor):
file.write("\n") file.write("\n")
def _write_json(self, file, pathfmt): def _write_json(self, file, pathfmt):
json.dump( util.dump_json(pathfmt.keywords, file, self.ascii, self.indent)
pathfmt.keywords,
file,
sort_keys=True,
indent=self.indent,
ensure_ascii=self.ascii,
)
__postprocessor__ = MetadataPP __postprocessor__ = MetadataPP

@ -11,6 +11,7 @@
import re import re
import os import os
import sys import sys
import json
import shutil import shutil
import string import string
import _string import _string
@ -92,6 +93,18 @@ def to_string(value):
return str(value) return str(value)
def dump_json(obj, fp=sys.stdout, ensure_ascii=True, indent=4):
"""Serialize 'obj' as JSON and write it to 'fp'"""
json.dump(
obj, fp,
ensure_ascii=ensure_ascii,
indent=indent,
default=str,
sort_keys=True,
)
fp.write("\n")
def expand_path(path): def expand_path(path):
"""Expand environment variables and tildes (~)""" """Expand environment variables and tildes (~)"""
if not path: if not path:

Loading…
Cancel
Save