add 'text.parse_float()' + cleanup in text.py

pull/170/head
Mike Fährmann 6 years ago
parent 0c32dc5858
commit 2d2953a5bf
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -103,7 +103,7 @@ class BehanceGalleryExtractor(BehanceExtractor):
"gallery_id": text.parse_int(self.gallery_id),
"title": text.unescape(title or ""),
"user": ", ".join(users),
"fields": [f for f in text.split_html(fields) if f != ", "],
"fields": [f for f in text.split_html(fields) if f != ","],
"date": text.parse_int(date),
"views": text.parse_int(stats[0]),
"votes": text.parse_int(stats[1]),

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2014-2018 Mike Fährmann
# Copyright 2014-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -88,7 +88,7 @@ class SankakuExtractor(SharedConfigExtractor):
"id": text.parse_int(post_id),
"md5": file_url.rpartition("/")[2].partition(".")[0],
"tags": text.unescape(tags),
"vote_average": float(vavg or 0),
"vote_average": text.parse_float(vavg),
"vote_count": text.parse_int(vcnt),
"created_at": created,
"rating": (rating or "?")[0].lower(),

@ -1,12 +1,12 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2018 Mike Fährmann
# Copyright 2015-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Collection of functions that work in strings/text"""
"""Collection of functions that work on strings/text"""
import re
import html
@ -47,7 +47,7 @@ def split_html(txt, sep=None):
"""Split input string by html-tags"""
try:
return [
x for x in re.split("<[^>]+>", txt)
x.strip() for x in re.split("<[^>]+>", txt)
if x and not x.isspace()
]
except TypeError:
@ -165,6 +165,16 @@ def parse_int(value, default=0):
return default
def parse_float(value, default=0.0):
"""Convert 'value' to float"""
if not value:
return default
try:
return float(value)
except (ValueError, TypeError):
return default
def parse_query(qs):
"""Parse a query string into key-value pairs"""
result = {}
@ -182,12 +192,11 @@ if os.name == "nt":
else:
clean_path = clean_path_posix
urljoin = urllib.parse.urljoin
quote = urllib.parse.quote
unquote = urllib.parse.unquote
escape = html.escape
try:
unescape = html.unescape
except AttributeError:
import html.parser
unescape = html.parser.HTMLParser().unescape
escape = html.escape
unescape = html.unescape

@ -71,8 +71,9 @@ class TestText(unittest.TestCase):
# standard usage
self.assertEqual(f(""), empty)
self.assertEqual(f("Hello World."), ["Hello World."])
self.assertEqual(f(" Hello World. "), [" Hello World. "])
self.assertEqual(f(" Hello World. "), ["Hello World."])
self.assertEqual(f("Hello<br/>World."), result)
self.assertEqual(f(" Hello <br/> World. "), result)
self.assertEqual(
f("<div><b class='a'>Hello</b><i>World.</i></div>"), result)
@ -260,6 +261,27 @@ class TestText(unittest.TestCase):
self.assertEqual(f(value, default), default)
self.assertEqual(f("zzz", default), default)
def test_parse_float(self, f=text.parse_float):
self.assertEqual(f(0), 0.0)
self.assertEqual(f("0"), 0.0)
self.assertEqual(f(123), 123.0)
self.assertEqual(f("123"), 123.0)
self.assertEqual(f(123.456), 123.456)
self.assertEqual(f("123.456"), 123.456)
# invalid arguments
for value in INVALID_ALT:
self.assertEqual(f(value), 0.0)
self.assertEqual(f("zzz"), 0.0)
self.assertEqual(f([1, 2, 3]), 0.0)
self.assertEqual(f({1: 2, 3: 4}), 0.0)
# 'default' argument
default = "default"
for value in INVALID_ALT:
self.assertEqual(f(value, default), default)
self.assertEqual(f("zzz", default), default)
def test_parse_query(self, f=text.parse_query):
# standard usage
self.assertEqual(f(""), {})

Loading…
Cancel
Save