From b171befa87c5aec1a14895eb724d0959007b2a36 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sun, 16 Jun 2019 21:46:26 +0200 Subject: [PATCH] implement 'parse_unicode_escapes()' --- CHANGELOG.md | 2 ++ gallery_dl/text.py | 11 +++++++++++ gallery_dl/version.py | 2 +- test/test_text.py | 12 ++++++++++++ 4 files changed, 26 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a2d99ae4..28678d54 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ # Changelog +## Unreleased + ## 1.8.6 - 2019-06-14 ### Additions - Support for diff --git a/gallery_dl/text.py b/gallery_dl/text.py index afe4cb18..151fa303 100644 --- a/gallery_dl/text.py +++ b/gallery_dl/text.py @@ -174,6 +174,17 @@ def extract_from(txt, pos=0, default=""): return extr +def parse_unicode_escapes(txt): + """Convert JSON Unicode escapes in 'txt' into actual characters""" + if "\\u" in txt: + return re.sub(r"\\u([0-9a-fA-F]{4})", _hex_to_char, txt) + return txt + + +def _hex_to_char(match): + return chr(int(match.group(1), 16)) + + def parse_bytes(value, default=0, suffixes="bkmgtp"): """Convert a bytes-amount ("500k", "2.5M", ...) to int""" try: diff --git a/gallery_dl/version.py b/gallery_dl/version.py index 1f27301d..392f8179 100644 --- a/gallery_dl/version.py +++ b/gallery_dl/version.py @@ -6,4 +6,4 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -__version__ = "1.8.6" +__version__ = "1.8.7-dev" diff --git a/test/test_text.py b/test/test_text.py index 077ca6c5..405acd35 100644 --- a/test/test_text.py +++ b/test/test_text.py @@ -271,6 +271,18 @@ class TestText(unittest.TestCase): self.assertEqual(e("[", "]"), "END") self.assertEqual(e("[", "]"), "END") + def test_parse_unicode_escapes(self, f=text.parse_unicode_escapes): + self.assertEqual(f(""), "") + self.assertEqual(f("foobar"), "foobar") + self.assertEqual(f("foo’bar"), "foo’bar") + self.assertEqual(f("foo\\u2019bar"), "foo’bar") + self.assertEqual(f("foo\\u201bar"), "foo‛ar") + self.assertEqual(f("foo\\u201zar"), "foo\\u201zar") + self.assertEqual( + f("\\u2018foo\\u2019\\u2020bar\\u00ff"), + "‘foo’†barÿ", + ) + def test_parse_bytes(self, f=text.parse_bytes): self.assertEqual(f("0"), 0) self.assertEqual(f("50"), 50)