implement 'text.root_from_url()'

use domain from input URL for kemono
3 years ago · bddcec49f1
parent 92c492dc09
commit bddcec49f1
4 changed files with 21 additions and 3 deletions
--- a/gallery_dl/extractor/common.py
+++ b/gallery_dl/extractor/common.py
@ -621,8 +621,7 @@ class BaseExtractor(Extractor):
                    if index:
                        self.category, self.root = self.instances[index-1]
                        if not self.root:
-                            url = text.ensure_http_scheme(match.group(0))
-                            self.root = url[:url.index("/", 8)]
+                            self.root = text.root_from_url(match.group(0))
                    else:
                        self.root = group
                        self.category = group.partition("://")[2]
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@ -30,8 +30,8 @@ class KemonopartyExtractor(Extractor):
    def __init__(self, match):
        if match.group(1) == "coomer":
            self.category = "coomerparty"
-            self.root = "https://coomer.party"
            self.cookiedomain = ".coomer.party"
+        self.root = text.root_from_url(match.group(0))
        Extractor.__init__(self, match)

    def items(self):
--- a/gallery_dl/text.py
+++ b/gallery_dl/text.py
@ -46,6 +46,13 @@ def ensure_http_scheme(url, scheme="https://"):
    return url


+def root_from_url(url, scheme="https://"):
+    """Extract scheme and domain from a URL"""
+    if not url.startswith(("https://", "http://")):
+        return scheme + url[:url.index("/")]
+    return url[:url.index("/", 8)]
+
+
 def filename_from_url(url):
    """Extract the last part of an URL to use as a filename"""
    try:
--- a/test/test_text.py
+++ b/test/test_text.py
@ -102,6 +102,18 @@ class TestText(unittest.TestCase):
        for value in INVALID_ALT:
            self.assertEqual(f(value), value)

+    def test_root_from_url(self, f=text.root_from_url):
+        result = "https://example.org"
+        self.assertEqual(f("https://example.org/")    , result)
+        self.assertEqual(f("https://example.org/path"), result)
+        self.assertEqual(f("example.org/")            , result)
+        self.assertEqual(f("example.org/path/")       , result)
+
+        result = "http://example.org"
+        self.assertEqual(f("http://example.org/")     , result)
+        self.assertEqual(f("http://example.org/path/"), result)
+        self.assertEqual(f("example.org/", "http://") , result)
+
    def test_filename_from_url(self, f=text.filename_from_url):
        result = "filename.ext"