From f0fc3b0ba1fd6fc7103b68fb965ed2df1bd72109 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 3 Nov 2021 22:52:15 +0100 Subject: [PATCH] [kemonoparty] add 'comments' option (#1980) --- docs/configuration.rst | 10 ++++++++++ gallery_dl/extractor/kemonoparty.py | 21 +++++++++++++++++++++ 2 files changed, 31 insertions(+) diff --git a/docs/configuration.rst b/docs/configuration.rst index 58fb6285..2487993a 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -1328,6 +1328,16 @@ Description Download video files. +extractor.kemonoparty.comments +----------------------------- +Type + ``bool`` +Default + ``false`` +Description + Extract ``comments`` metadata. + + extractor.kemonoparty.max-posts ------------------------------- Type diff --git a/gallery_dl/extractor/kemonoparty.py b/gallery_dl/extractor/kemonoparty.py index d5aad672..2e1d0b26 100644 --- a/gallery_dl/extractor/kemonoparty.py +++ b/gallery_dl/extractor/kemonoparty.py @@ -35,6 +35,7 @@ class KemonopartyExtractor(Extractor): r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall skip_service = \ "patreon" if self.config("patreon-skip-file", True) else None + comments = self.config("comments") if self.config("metadata"): username = text.unescape(text.extract( @@ -68,6 +69,8 @@ class KemonopartyExtractor(Extractor): post["published"], "%a, %d %b %Y %H:%M:%S %Z") if username: post["username"] = username + if comments: + post["comments"] = self._extract_comments(post) yield Message.Directory, post for post["num"], file in enumerate(files, 1): @@ -100,6 +103,24 @@ class KemonopartyExtractor(Extractor): return {c.name: c.value for c in response.history[0].cookies} + def _extract_comments(self, post): + url = "{}/{}/user/{}/post/{}".format( + self.root, post["service"], post["user"], post["id"]) + page = self.request(url).text + + comments = [] + for comment in text.extract_iter(page, ""): + extr = text.extract_from(comment) + cid = extr('id="', '"') + comments.append({ + "id" : cid, + "user": extr('href="#' + cid + '"', '"), + "body": extr( + '
', '
').strip(), + "date": extr('datetime="', '"'), + }) + return comments + class KemonopartyUserExtractor(KemonopartyExtractor): """Extractor for all posts from a kemono.party user listing"""