diff --git a/gallery_dl/text.py b/gallery_dl/text.py
new file mode 100644
index 00000000..47fd7258
--- /dev/null
+++ b/gallery_dl/text.py
@@ -0,0 +1,60 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2015 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Collection of functions that work in strings/text"""
+
+import re
+import html.parser
+import urllib.parse
+import platform
+
+def remove_html(text):
+    """Remove html-tags from a string"""
+    return " ".join(re.sub("<[^>]+?>", " ", text).split())
+
+def filename_from_url(url):
+    """Extract the last part of an url to use as a filename"""
+    try:
+        path = urllib.parse.urlparse(url).path
+        pos = path.rindex("/")
+        return path[pos+1:]
+    except ValueError:
+        return url
+
+def clean_path_windows(path):
+    """Remove illegal characters from a path-segment (Windows)"""
+    return re.sub(r'[<>:"\\/|?*]', "_", path)
+
+def clean_path_posix(path):
+    """Remove illegal characters from a path-segment (Posix)"""
+    return path.replace("/", "_")
+
+def extract(txt, begin, end, pos=0):
+    try:
+        first = txt.index(begin, pos) + len(begin)
+        last = txt.index(end, first)
+        return txt[first:last], last+len(end)
+    except ValueError:
+        return None, pos
+
+def extract_all(txt, begin, end, pos=0):
+    try:
+        first = txt.index(begin, pos)
+        last = txt.index(end, first + len(begin)) + len(end)
+        return txt[first:last], last
+    except ValueError:
+        return None, pos
+
+if platform.system() == "Windows":
+    clean_path = clean_path_windows
+else:
+    clean_path = clean_path_posix
+
+unquote = urllib.parse.unquote
+
+unescape = html.parser.HTMLParser().unescape