From 2d2953a5bfc5cbcd91e47be0cff1f1e8d12e8e4b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 29 Jan 2019 13:14:30 +0100 Subject: [PATCH] add 'text.parse_float()' + cleanup in text.py --- gallery_dl/extractor/behance.py | 2 +- gallery_dl/extractor/sankaku.py | 4 ++-- gallery_dl/text.py | 27 ++++++++++++++++++--------- test/test_text.py | 24 +++++++++++++++++++++++- 4 files changed, 44 insertions(+), 13 deletions(-) diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py index 23c6d0f2..5cd0be75 100644 --- a/gallery_dl/extractor/behance.py +++ b/gallery_dl/extractor/behance.py @@ -103,7 +103,7 @@ class BehanceGalleryExtractor(BehanceExtractor): "gallery_id": text.parse_int(self.gallery_id), "title": text.unescape(title or ""), "user": ", ".join(users), - "fields": [f for f in text.split_html(fields) if f != ", "], + "fields": [f for f in text.split_html(fields) if f != ","], "date": text.parse_int(date), "views": text.parse_int(stats[0]), "votes": text.parse_int(stats[1]), diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index 1b78f601..d6df528c 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -1,6 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2014-2018 Mike Fährmann +# Copyright 2014-2019 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as @@ -88,7 +88,7 @@ class SankakuExtractor(SharedConfigExtractor): "id": text.parse_int(post_id), "md5": file_url.rpartition("/")[2].partition(".")[0], "tags": text.unescape(tags), - "vote_average": float(vavg or 0), + "vote_average": text.parse_float(vavg), "vote_count": text.parse_int(vcnt), "created_at": created, "rating": (rating or "?")[0].lower(), diff --git a/gallery_dl/text.py b/gallery_dl/text.py index af906731..65e988ae 100644 --- a/gallery_dl/text.py +++ b/gallery_dl/text.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- -# Copyright 2015-2018 Mike Fährmann +# Copyright 2015-2019 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Collection of functions that work in strings/text""" +"""Collection of functions that work on strings/text""" import re import html @@ -47,7 +47,7 @@ def split_html(txt, sep=None): """Split input string by html-tags""" try: return [ - x for x in re.split("<[^>]+>", txt) + x.strip() for x in re.split("<[^>]+>", txt) if x and not x.isspace() ] except TypeError: @@ -165,6 +165,16 @@ def parse_int(value, default=0): return default +def parse_float(value, default=0.0): + """Convert 'value' to float""" + if not value: + return default + try: + return float(value) + except (ValueError, TypeError): + return default + + def parse_query(qs): """Parse a query string into key-value pairs""" result = {} @@ -182,12 +192,11 @@ if os.name == "nt": else: clean_path = clean_path_posix + urljoin = urllib.parse.urljoin + +quote = urllib.parse.quote unquote = urllib.parse.unquote -escape = html.escape -try: - unescape = html.unescape -except AttributeError: - import html.parser - unescape = html.parser.HTMLParser().unescape +escape = html.escape +unescape = html.unescape diff --git a/test/test_text.py b/test/test_text.py index 697d83b0..314578d0 100644 --- a/test/test_text.py +++ b/test/test_text.py @@ -71,8 +71,9 @@ class TestText(unittest.TestCase): # standard usage self.assertEqual(f(""), empty) self.assertEqual(f("Hello World."), ["Hello World."]) - self.assertEqual(f(" Hello World. "), [" Hello World. "]) + self.assertEqual(f(" Hello World. "), ["Hello World."]) self.assertEqual(f("Hello
World."), result) + self.assertEqual(f(" Hello
World. "), result) self.assertEqual( f("
HelloWorld.
"), result) @@ -260,6 +261,27 @@ class TestText(unittest.TestCase): self.assertEqual(f(value, default), default) self.assertEqual(f("zzz", default), default) + def test_parse_float(self, f=text.parse_float): + self.assertEqual(f(0), 0.0) + self.assertEqual(f("0"), 0.0) + self.assertEqual(f(123), 123.0) + self.assertEqual(f("123"), 123.0) + self.assertEqual(f(123.456), 123.456) + self.assertEqual(f("123.456"), 123.456) + + # invalid arguments + for value in INVALID_ALT: + self.assertEqual(f(value), 0.0) + self.assertEqual(f("zzz"), 0.0) + self.assertEqual(f([1, 2, 3]), 0.0) + self.assertEqual(f({1: 2, 3: 4}), 0.0) + + # 'default' argument + default = "default" + for value in INVALID_ALT: + self.assertEqual(f(value, default), default) + self.assertEqual(f("zzz", default), default) + def test_parse_query(self, f=text.parse_query): # standard usage self.assertEqual(f(""), {})