merge #4841: [fapello] support '.su' TLD (#4840)

10 months ago · 1ae43d8123
parent b43be67206 2402162e8a
commit 1ae43d8123
2 changed files with 62 additions and 10 deletions
--- a/gallery_dl/extractor/fapello.py
+++ b/gallery_dl/extractor/fapello.py
@ -10,6 +10,9 @@ from .common import Extractor, Message
 from .. import text, exception


+BASE_PATTERN = r"(?:https?://)?(?:www\.)?fapello\.(?:com|su)"
+
+
 class FapelloPostExtractor(Extractor):
    """Extractor for individual posts on fapello.com"""
    category = "fapello"
@ -17,16 +20,16 @@ class FapelloPostExtractor(Extractor):
    directory_fmt = ("{category}", "{model}")
    filename_fmt = "{model}_{id}.{extension}"
    archive_fmt = "{type}_{model}_{id}"
-    pattern = (r"(?:https?://)?(?:www\.)?fapello\.com"
-               r"/(?!search/|popular_videos/)([^/?#]+)/(\d+)")
+    pattern = BASE_PATTERN + r"/(?!search/|popular_videos/)([^/?#]+)/(\d+)"
    example = "https://fapello.com/MODEL/12345/"

    def __init__(self, match):
        Extractor.__init__(self, match)
+        self.root = text.root_from_url(match.group(0))
        self.model, self.id = match.groups()

    def items(self):
-        url = "https://fapello.com/{}/{}/".format(self.model, self.id)
+        url = "{}/{}/{}/".format(self.root, self.model, self.id)
        page = text.extr(
            self.request(url, allow_redirects=False).text,
            'class="uk-align-center"', "</div>", None)
@ -48,27 +51,29 @@ class FapelloModelExtractor(Extractor):
    """Extractor for all posts from a fapello model"""
    category = "fapello"
    subcategory = "model"
-    pattern = (r"(?:https?://)?(?:www\.)?fapello\.com"
-               r"/(?!top-(?:likes|followers)|popular_videos"
+    pattern = (BASE_PATTERN + r"/(?!top-(?:likes|followers)|popular_videos"
               r"|videos|trending|search/?$)"
               r"([^/?#]+)/?$")
    example = "https://fapello.com/model/"

    def __init__(self, match):
        Extractor.__init__(self, match)
+        self.root = text.root_from_url(match.group(0))
        self.model = match.group(1)

    def items(self):
        num = 1
        data = {"_extractor": FapelloPostExtractor}
        while True:
-            url = "https://fapello.com/ajax/model/{}/page-{}/".format(
-                self.model, num)
+            url = "{}/ajax/model/{}/page-{}/".format(
+                self.root, self.model, num)
            page = self.request(url).text
            if not page:
                return

            for url in text.extract_iter(page, '<a href="', '"'):
+                if url == "javascript:void(0);":
+                    continue
                yield Message.Queue, url, data
            num += 1

@ -77,13 +82,14 @@ class FapelloPathExtractor(Extractor):
    """Extractor for models and posts from fapello.com paths"""
    category = "fapello"
    subcategory = "path"
-    pattern = (r"(?:https?://)?(?:www\.)?fapello\.com"
+    pattern = (BASE_PATTERN +
               r"/(?!search/?$)(top-(?:likes|followers)|videos|trending"
               r"|popular_videos/[^/?#]+)/?$")
    example = "https://fapello.com/trending/"

    def __init__(self, match):
        Extractor.__init__(self, match)
+        self.root = text.root_from_url(match.group(0))
        self.path = match.group(1)

    def items(self):
@ -93,9 +99,14 @@ class FapelloPathExtractor(Extractor):
        else:
            data = {"_extractor": FapelloPostExtractor}

+        if "fapello.su" in self.root:
+            self.path = self.path.replace("-", "/")
+            if self.path == "trending":
+                data = {"_extractor": FapelloModelExtractor}
+
        while True:
-            page = self.request("https://fapello.com/ajax/{}/page-{}/".format(
-                self.path, num)).text
+            page = self.request("{}/ajax/{}/page-{}/".format(
+                self.root, self.path, num)).text
            if not page:
                return

--- a/test/results/fapello.py
+++ b/test/results/fapello.py
@ -26,6 +26,7 @@ __tests__ = (
    "#category": ("", "fapello", "post"),
    "#class"   : fapello.FapelloPostExtractor,
    "#pattern" : r"https://cdn\.fapello\.com/content/v/l/vladislava-661/1000/vladislava-661_0693\.mp4",
+    "#exception": exception.NotFoundError,

    "model"    : "vladislava-661",
    "id"       : 693,
@ -40,6 +41,16 @@ __tests__ = (
    "#exception": exception.NotFoundError,
 },

+{
+    "#url"     : "https://fapello.su/grace-charis-gracecharisxo/2038266/",
+    "#category": ("", "fapello", "post"),
+    "#class"   : fapello.FapelloPostExtractor,
+
+    "model"    : "grace-charis-gracecharisxo",
+    "id"       : 2038266,
+    "type"     : "photo",
+},
+
 {
    "#url"     : "https://fapello.com/hyoon/",
    "#category": ("", "fapello", "model"),
@ -55,6 +66,15 @@ __tests__ = (
    "#class"   : fapello.FapelloModelExtractor,
 },

+{
+    "#url"     : "https://fapello.su/grace-charis-gracecharisxo/",
+    "#category": ("", "fapello", "model"),
+    "#class"   : fapello.FapelloModelExtractor,
+    "#pattern" : fapello.FapelloPostExtractor.pattern,
+    "#range"   : "1-50",
+    "#count"   : 50,
+},
+
 {
    "#url"     : "https://fapello.com/top-likes/",
    "#category": ("", "fapello", "path"),
@ -64,6 +84,15 @@ __tests__ = (
    "#count"   : 10,
 },

+{
+    "#url"     : "https://fapello.su/top-likes/",
+    "#category": ("", "fapello", "path"),
+    "#class"   : fapello.FapelloPathExtractor,
+    "#pattern" : fapello.FapelloModelExtractor.pattern,
+    "#range"   : "1-10",
+    "#count"   : 10,
+},
+
 {
    "#url"     : "https://fapello.com/videos/",
    "#category": ("", "fapello", "path"),
@ -79,12 +108,24 @@ __tests__ = (
    "#class"   : fapello.FapelloPathExtractor,
 },

+{
+    "#url"     : "https://fapello.su/top-followers/",
+    "#category": ("", "fapello", "path"),
+    "#class"   : fapello.FapelloPathExtractor,
+},
+
 {
    "#url"     : "https://fapello.com/trending/",
    "#category": ("", "fapello", "path"),
    "#class"   : fapello.FapelloPathExtractor,
 },

+{
+    "#url"     : "https://fapello.su/trending/",
+    "#category": ("", "fapello", "path"),
+    "#class"   : fapello.FapelloPathExtractor,
+},
+
 {
    "#url"     : "https://fapello.com/popular_videos/twelve_hours/",
    "#category": ("", "fapello", "path"),