From 4b94b7d4776cc9625af218bcb0ebe3fe278f03eb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 19 Aug 2024 21:18:39 +0200 Subject: [PATCH] [pp:metadata] add 'include' and 'exclude' options (#6058) --- docs/configuration.rst | 24 ++++++++++++++++++++++ gallery_dl/postprocessor/metadata.py | 29 ++++++++++++++++++++++++--- test/test_postprocessor.py | 30 ++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 3 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index 33141a30..208e4028 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -5692,6 +5692,30 @@ Description After downloading all files of a `post` +metadata.include +---------------- +Type + ``list`` of ``strings`` +Example + ``["id", "width", "height", "description"]`` +Description + Include only the given top-level keys when writing JSON data. + + Note: Missing or undefined fields will be silently ignored. + + +metadata.exclude +---------------- +Type + ``list`` of ``strings`` +Example + ``["blocked", "watching", "status"]`` +Description + Exclude all given keys from written JSON data. + + Note: Cannot be used with `metadata.include`_. + + metadata.fields --------------- Type diff --git a/gallery_dl/postprocessor/metadata.py b/gallery_dl/postprocessor/metadata.py index e89b1700..08a08f11 100644 --- a/gallery_dl/postprocessor/metadata.py +++ b/gallery_dl/postprocessor/metadata.py @@ -103,10 +103,10 @@ class MetadataPP(PostProcessor): job.register_hooks({event: self.run for event in events}, options) self._init_archive(job, options, "_MD_") + self.filter = self._make_filter(options) self.mtime = options.get("mtime") self.omode = options.get("open", omode) self.encoding = options.get("encoding", "utf-8") - self.private = options.get("private", False) self.skip = options.get("skip", False) def run(self, pathfmt): @@ -231,10 +231,33 @@ class MetadataPP(PostProcessor): fp.write("\n".join(tags) + "\n") def _write_json(self, fp, kwdict): - if not self.private: - kwdict = util.filter_dict(kwdict) + if self.filter: + kwdict = self.filter(kwdict) fp.write(self._json_encode(kwdict) + "\n") + def _make_filter(self, options): + include = options.get("include") + if include: + if isinstance(include, str): + include = include.split(",") + return lambda d: {k: d[k] for k in include if k in d} + + exclude = options.get("exclude") + private = options.get("private") + if exclude: + if isinstance(exclude, str): + exclude = exclude.split(",") + exclude = set(exclude) + + if private: + return lambda d: {k: v for k, v in d.items() + if k not in exclude} + return lambda d: {k: v for k, v in util.filter_dict(d).items() + if k not in exclude} + + if not private: + return util.filter_dict + @staticmethod def _make_encoder(options, indent=None): return json.JSONEncoder( diff --git a/test/test_postprocessor.py b/test/test_postprocessor.py index edd85756..841ebf0a 100644 --- a/test/test_postprocessor.py +++ b/test/test_postprocessor.py @@ -585,6 +585,36 @@ class MetadataTest(BasePostprocessorTest): self.assertTrue(not e.called) self.assertTrue(m.called) + def test_metadata_option_include(self): + self._create( + {"include": ["_private", "filename", "foo"], "sort": True}, + {"public": "hello ワールド", "_private": "foo バー"}, + ) + + with patch("builtins.open", mock_open()) as m: + self._trigger() + + self.assertEqual(self._output(m), """{ + "_private": "foo バー", + "filename": "file" +} +""") + + def test_metadata_option_exclude(self): + self._create( + {"exclude": ["category", "filename", "foo"], "sort": True}, + {"public": "hello ワールド", "_private": "foo バー"}, + ) + + with patch("builtins.open", mock_open()) as m: + self._trigger() + + self.assertEqual(self._output(m), """{ + "extension": "ext", + "public": "hello ワールド" +} +""") + @staticmethod def _output(mock): return "".join(