pre-compile regex pattern for remove_html() and split_html()

5 years ago · f6c5edb76b
parent fdd2dd5136
commit f6c5edb76b
1 changed files with 4 additions and 2 deletions
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@ -15,6 +15,8 @@ import datetime
 import urllib.parse


+HTML_RE = re.compile("<[^>]+>")
+
 INVALID_XML_CHARS = (
    "\x00", "\x01", "\x02", "\x03", "\x04", "\x05", "\x06", "\x07",
    "\x08", "\x0b", "\x0c", "\x0e", "\x0f", "\x10", "\x11", "\x12",
@ -39,7 +41,7 @@ def clean_xml(xmldata, repl=""):
 def remove_html(txt, repl=" ", sep=" "):
    """Remove html-tags from a string"""
    try:
-        txt = re.sub("<[^>]+>", repl, txt)
+        txt = HTML_RE.sub(repl, txt)
    except TypeError:
        return ""
    if sep:
@ -51,7 +53,7 @@ def split_html(txt, sep=None):
    """Split input string by html-tags"""
    try:
        return [
-            x.strip() for x in re.split("<[^>]+>", txt)
+            x.strip() for x in HTML_RE.split(txt)
            if x and not x.isspace()
        ]
    except TypeError: