From 7abf6e445c7ab9f95299ca6eaaaf5c8f36a5de23 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 13 Sep 2024 11:11:53 +0200 Subject: [PATCH] [cohost] add 'tag' extractor (#4483) --- docs/supportedsites.md | 2 +- gallery_dl/extractor/cohost.py | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 34 insertions(+), 1 deletion(-) diff --git a/docs/supportedsites.md b/docs/supportedsites.md index f4eaf06d..868c3b2f 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -154,7 +154,7 @@ Consider all listed sites to potentially be NSFW. cohost! https://cohost.org/ - Posts, User Profiles + Posts, Tag Searches, User Profiles diff --git a/gallery_dl/extractor/cohost.py b/gallery_dl/extractor/cohost.py index fc7c48bd..c8ee7f45 100644 --- a/gallery_dl/extractor/cohost.py +++ b/gallery_dl/extractor/cohost.py @@ -158,3 +158,36 @@ class CohostPostExtractor(CohostExtractor): post["comments"] = () return (post,) + + +class CohostTagExtractor(CohostExtractor): + """Extractor for tagged posts""" + subcategory = "tag" + pattern = BASE_PATTERN + r"/([^/?#]+)/tagged/([^/?#]+)(?:\?([^#]+))?" + example = "https://cohost.org/USER/tagged/TAG" + + def posts(self): + user, tag, query = self.groups + url = "{}/{}/tagged/{}".format(self.root, user, tag) + params = text.parse_query(query) + post_feed_key = ("tagged-post-feed" if user == "rc" else + "project-tagged-post-feed") + + while True: + page = self.request(url, params=params).text + data = util.json_loads(text.extr( + page, 'id="__COHOST_LOADER_STATE__">', '')) + + try: + feed = data[post_feed_key] + except KeyError: + feed = data.popitem()[1] + + yield from feed["posts"] + + pagination = feed["paginationMode"] + if not pagination.get("morePagesForward"): + return + params["refTimestamp"] = pagination["refTimestamp"] + params["skipPosts"] = \ + pagination["currentSkip"] + pagination["idealPageStride"]