From ce93c460a67900e24cbb450ca953428d84f3bb2a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 15 Jun 2023 13:07:51 +0200 Subject: [PATCH] [formatter] implement 'H' conversion (#4164) to remove HTML tags and unescape HTML entities --- docs/formatting.md | 12 ++++++++++++ gallery_dl/formatter.py | 1 + test/test_formatter.py | 9 +++++++-- 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/docs/formatting.md b/docs/formatting.md index 86abc3ef..f188a538 100644 --- a/docs/formatting.md +++ b/docs/formatting.md @@ -94,6 +94,18 @@ Conversion specifiers allow to *convert* the value to a different form or type. {created!d} 2010-01-01 00:00:00 + + U + Convert HTML entities + {html!U} + <p>foo & bar</p> + + + H + Convert HTML entities & remove HTML tags + {html!H} + foo & bar + s Convert value to str diff --git a/gallery_dl/formatter.py b/gallery_dl/formatter.py index 2ff48c32..500eaa19 100644 --- a/gallery_dl/formatter.py +++ b/gallery_dl/formatter.py @@ -437,6 +437,7 @@ _CONVERSIONS = { "T": util.datetime_to_timestamp_string, "d": text.parse_timestamp, "U": text.unescape, + "H": lambda s: text.unescape(text.remove_html(s)), "g": text.slugify, "S": util.to_string, "s": str, diff --git a/test/test_formatter.py b/test/test_formatter.py index 1bda9d9c..0992f4ba 100644 --- a/test/test_formatter.py +++ b/test/test_formatter.py @@ -28,6 +28,7 @@ class TestFormatter(unittest.TestCase): "l": ["a", "b", "c"], "n": None, "s": " \n\r\tSPACE ", + "h": "

foo

& bar

", "u": "'< / >'", "t": 1262304000, "dt": datetime.datetime(2010, 1, 1), @@ -47,6 +48,10 @@ class TestFormatter(unittest.TestCase): self._run_test("{s!t}", "SPACE") self._run_test("{a!U}", self.kwdict["a"]) self._run_test("{u!U}", "'< / >'") + self._run_test("{a!H}", self.kwdict["a"]) + self._run_test("{h!H}", "foo & bar") + self._run_test("{u!H}", "'< / >'") + self._run_test("{n!H}", "") self._run_test("{a!s}", self.kwdict["a"]) self._run_test("{a!r}", "'" + self.kwdict["a"] + "'") self._run_test("{a!a}", "'" + self.kwdict["a"] + "'") @@ -434,10 +439,10 @@ def noarg(): fmt4 = formatter.parse("\fM " + path + ":lengths") self.assertEqual(fmt1.format_map(self.kwdict), "'Title' by Name") - self.assertEqual(fmt2.format_map(self.kwdict), "96") + self.assertEqual(fmt2.format_map(self.kwdict), "126") self.assertEqual(fmt3.format_map(self.kwdict), "'Title' by Name") - self.assertEqual(fmt4.format_map(self.kwdict), "96") + self.assertEqual(fmt4.format_map(self.kwdict), "126") with self.assertRaises(TypeError): self.assertEqual(fmt0.format_map(self.kwdict), "")