implement 'text.root_from_url()'

use domain from input URL for kemono
pull/2396/head
Mike Fährmann 3 years ago
parent 92c492dc09
commit bddcec49f1
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -621,8 +621,7 @@ class BaseExtractor(Extractor):
if index:
self.category, self.root = self.instances[index-1]
if not self.root:
url = text.ensure_http_scheme(match.group(0))
self.root = url[:url.index("/", 8)]
self.root = text.root_from_url(match.group(0))
else:
self.root = group
self.category = group.partition("://")[2]

@ -30,8 +30,8 @@ class KemonopartyExtractor(Extractor):
def __init__(self, match):
if match.group(1) == "coomer":
self.category = "coomerparty"
self.root = "https://coomer.party"
self.cookiedomain = ".coomer.party"
self.root = text.root_from_url(match.group(0))
Extractor.__init__(self, match)
def items(self):

@ -46,6 +46,13 @@ def ensure_http_scheme(url, scheme="https://"):
return url
def root_from_url(url, scheme="https://"):
"""Extract scheme and domain from a URL"""
if not url.startswith(("https://", "http://")):
return scheme + url[:url.index("/")]
return url[:url.index("/", 8)]
def filename_from_url(url):
"""Extract the last part of an URL to use as a filename"""
try:

@ -102,6 +102,18 @@ class TestText(unittest.TestCase):
for value in INVALID_ALT:
self.assertEqual(f(value), value)
def test_root_from_url(self, f=text.root_from_url):
result = "https://example.org"
self.assertEqual(f("https://example.org/") , result)
self.assertEqual(f("https://example.org/path"), result)
self.assertEqual(f("example.org/") , result)
self.assertEqual(f("example.org/path/") , result)
result = "http://example.org"
self.assertEqual(f("http://example.org/") , result)
self.assertEqual(f("http://example.org/path/"), result)
self.assertEqual(f("example.org/", "http://") , result)
def test_filename_from_url(self, f=text.filename_from_url):
result = "filename.ext"

Loading…
Cancel
Save