[kemonoparty] implement a 'files' option (#1991)

similar to 8d676151
3 years ago · d4ec245554
parent ab8eea1a24
commit d4ec245554
2 changed files with 54 additions and 21 deletions
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@ -1338,6 +1338,18 @@ Description
    Extract ``comments`` metadata.


+extractor.kemonoparty.files
+---------------------------
+Type
+    ``list`` of ``strings``
+Default
+    ``["postfile", "attachments", "inline"]``
+Description
+    Determines the type and order of files to be downloaded.
+
+    Available types are ``postfile``, ``attachments``, and ``inline``.
+
+
 extractor.kemonoparty.max-posts
 -------------------------------
 Type
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@ -30,11 +30,12 @@ class KemonopartyExtractor(Extractor):
    def items(self):
        self._prepare_ddosguard_cookies()

-        find_inline = re.compile(
+        self._find_inline = re.compile(
            r'src="(?:https?://kemono\.party)?(/inline/[^"]+'
            r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
-        skip_service = \
+        self._skip_service = \
            "patreon" if self.config("patreon-skip-file", True) else None
+        generators = self._build_file_generators(self.config("files"))
        comments = self.config("comments")

        if self.config("metadata"):
@ -51,20 +52,6 @@ class KemonopartyExtractor(Extractor):

        for post in posts:

-            files = []
-            append = files.append
-            file = post["file"]
-
-            if file:
-                file["type"] = "file"
-                if post["service"] != skip_service or not post["attachments"]:
-                    append(file)
-            for attachment in post["attachments"]:
-                attachment["type"] = "attachment"
-                append(attachment)
-            for path in find_inline(post["content"] or ""):
-                append({"path": path, "name": path, "type": "inline"})
-
            post["date"] = text.parse_datetime(
                post["published"] or post["added"],
                "%a, %d %b %Y %H:%M:%S %Z")
@ -74,8 +61,11 @@ class KemonopartyExtractor(Extractor):
                post["comments"] = self._extract_comments(post)
            yield Message.Directory, post

-            for post["num"], file in enumerate(files, 1):
+            post["num"] = 0
+            for file in itertools.chain.from_iterable(
+                    g(post) for g in generators):
                post["type"] = file["type"]
+                post["num"] += 1
                url = file["path"]
                if url[0] == "/":
                    url = self.root + "/data" + url
@ -104,6 +94,36 @@ class KemonopartyExtractor(Extractor):

        return {c.name: c.value for c in response.history[0].cookies}

+    def _postfile(self, post):
+        file = post["file"]
+        if not file:
+            return ()
+        file["type"] = "file"
+        if post["service"] == self._skip_service and post["attachments"]:
+            return ()
+        return (file,)
+
+    def _attachments(self, post):
+        for attachment in post["attachments"]:
+            attachment["type"] = "attachment"
+        return post["attachments"]
+
+    def _inline(self, post):
+        for path in self._find_inline(post["content"] or ""):
+            yield {"path": path, "name": path, "type": "inline"}
+
+    def _build_file_generators(self, filetypes):
+        if filetypes is None:
+            return (self._postfile, self._attachments, self._inline)
+        genmap = {
+            "postfile"   : self._postfile,
+            "attachments": self._attachments,
+            "inline"     : self._inline,
+        }
+        if isinstance(filetypes, str):
+            filetypes = filetypes.split(",")
+        return [genmap[ft] for ft in filetypes]
+
    def _extract_comments(self, post):
        url = "{}/{}/user/{}/post/{}".format(
            self.root, post["service"], post["user"], post["id"])
@ -189,13 +209,14 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
        }),
        # inline image (#1286)
        ("https://kemono.party/fanbox/user/7356311/post/802343", {
-            "pattern": r"https://kemono\.party/data/inline/fanbox"
-                       r"/uaozO4Yga6ydkGIJFAQDixfE\.jpeg",
+            "pattern": r"https://kemono\.party/data/47/b5/47b5c014ecdcfabdf2c8"
+                       r"5eec53f1133a76336997ae8596f332e97d956a460ad2\.jpg",
        }),
        # kemono.party -> data.kemono.party
        ("https://kemono.party/gumroad/user/trylsc/post/IURjT", {
-            "pattern": r"https://kemono\.party/data/(file|attachment)s"
-                       r"/gumroad/trylsc/IURjT/",
+            "pattern": r"https://kemono\.party/data/("
+                       r"files/gumroad/trylsc/IURjT/reward8\.jpg|"
+                       r"c6/04/c6048f5067fd9dbfa7a8be565ac194efdfb6e4.+\.zip)",
        }),
        # username (#1548, #1652)
        ("https://kemono.party/gumroad/user/3252870377455/post/aJnAH", {