From bddcec49f1f58523e865322bff8c76ee04e4b238 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Tue, 1 Mar 2022 02:56:51 +0100 Subject: [PATCH] implement 'text.root_from_url()' use domain from input URL for kemono --- gallery_dl/extractor/common.py | 3 +-- gallery_dl/extractor/kemonoparty.py | 2 +- gallery_dl/text.py | 7 +++++++ test/test_text.py | 12 ++++++++++++ 4 files changed, 21 insertions(+), 3 deletions(-) diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 1cf00f14..026034cb 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -621,8 +621,7 @@ class BaseExtractor(Extractor): if index: self.category, self.root = self.instances[index-1] if not self.root: - url = text.ensure_http_scheme(match.group(0)) - self.root = url[:url.index("/", 8)] + self.root = text.root_from_url(match.group(0)) else: self.root = group self.category = group.partition("://")[2] diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index 9b85fcbe..25484f0f 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -30,8 +30,8 @@ class KemonopartyExtractor(Extractor): def __init__(self, match): if match.group(1) == "coomer": self.category = "coomerparty" - self.root = "https://coomer.party" self.cookiedomain = ".coomer.party" + self.root = text.root_from_url(match.group(0)) Extractor.__init__(self, match) def items(self): diff --git a/gallery_dl/text.py b/gallery_dl/text.py index ac4bbcbd..97ef3acb 100644 --- a/gallery_dl/text.py +++ b/gallery_dl/text.py @@ -46,6 +46,13 @@ def ensure_http_scheme(url, scheme="https://"): return url +def root_from_url(url, scheme="https://"): + """Extract scheme and domain from a URL""" + if not url.startswith(("https://", "http://")): + return scheme + url[:url.index("/")] + return url[:url.index("/", 8)] + + def filename_from_url(url): """Extract the last part of an URL to use as a filename""" try: diff --git a/test/test_text.py b/test/test_text.py index 3ab9e73c..ffed7267 100644 --- a/test/test_text.py +++ b/test/test_text.py @@ -102,6 +102,18 @@ class TestText(unittest.TestCase): for value in INVALID_ALT: self.assertEqual(f(value), value) + def test_root_from_url(self, f=text.root_from_url): + result = "https://example.org" + self.assertEqual(f("https://example.org/") , result) + self.assertEqual(f("https://example.org/path"), result) + self.assertEqual(f("example.org/") , result) + self.assertEqual(f("example.org/path/") , result) + + result = "http://example.org" + self.assertEqual(f("http://example.org/") , result) + self.assertEqual(f("http://example.org/path/"), result) + self.assertEqual(f("example.org/", "http://") , result) + def test_filename_from_url(self, f=text.filename_from_url): result = "filename.ext"