pre-compile regex pattern for remove_html() and split_html()

pull/644/head
Mike Fährmann 5 years ago
parent fdd2dd5136
commit f6c5edb76b
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -15,6 +15,8 @@ import datetime
import urllib.parse
HTML_RE = re.compile("<[^>]+>")
INVALID_XML_CHARS = (
"\x00", "\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07",
"\x08", "\x0b", "\x0c", "\x0e", "\x0f", "\x10", "\x11", "\x12",
@ -39,7 +41,7 @@ def clean_xml(xmldata, repl=""):
def remove_html(txt, repl=" ", sep=" "):
"""Remove html-tags from a string"""
try:
txt = re.sub("<[^>]+>", repl, txt)
txt = HTML_RE.sub(repl, txt)
except TypeError:
return ""
if sep:
@ -51,7 +53,7 @@ def split_html(txt, sep=None):
"""Split input string by html-tags"""
try:
return [
x.strip() for x in re.split("<[^>]+>", txt)
x.strip() for x in HTML_RE.split(txt)
if x and not x.isspace()
]
except TypeError:

Loading…
Cancel
Save