[kemonoparty] add 'comments' option (#1980)

3 years ago · f0fc3b0ba1
parent 1fac74b14d
commit f0fc3b0ba1
2 changed files with 31 additions and 0 deletions
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@ -1328,6 +1328,16 @@ Description
    Download video files.


+extractor.kemonoparty.comments
+-----------------------------
+Type
+    ``bool``
+Default
+    ``false``
+Description
+    Extract ``comments`` metadata.
+
+
 extractor.kemonoparty.max-posts
 -------------------------------
 Type
--- a/gallery_dl/extractor/kemonoparty.py
+++ b/gallery_dl/extractor/kemonoparty.py
@ -35,6 +35,7 @@ class KemonopartyExtractor(Extractor):
            r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
        skip_service = \
            "patreon" if self.config("patreon-skip-file", True) else None
+        comments = self.config("comments")

        if self.config("metadata"):
            username = text.unescape(text.extract(
@ -68,6 +69,8 @@ class KemonopartyExtractor(Extractor):
                post["published"], "%a, %d %b %Y %H:%M:%S %Z")
            if username:
                post["username"] = username
+            if comments:
+                post["comments"] = self._extract_comments(post)
            yield Message.Directory, post

            for post["num"], file in enumerate(files, 1):
@ -100,6 +103,24 @@ class KemonopartyExtractor(Extractor):

        return {c.name: c.value for c in response.history[0].cookies}

+    def _extract_comments(self, post):
+        url = "{}/{}/user/{}/post/{}".format(
+            self.root, post["service"], post["user"], post["id"])
+        page = self.request(url).text
+
+        comments = []
+        for comment in text.extract_iter(page, "<article", "</article>"):
+            extr = text.extract_from(comment)
+            cid = extr('id="', '"')
+            comments.append({
+                "id"  : cid,
+                "user": extr('href="#' + cid + '"', '</').strip(" \n\r>"),
+                "body": extr(
+                    '<section class="comment__body">', '</section>').strip(),
+                "date": extr('datetime="', '"'),
+            })
+        return comments
+

 class KemonopartyUserExtractor(KemonopartyExtractor):
    """Extractor for all posts from a kemono.party user listing"""