[cohost] add 'tag' extractor (#4483)

pull/4791/merge
Mike Fährmann 7 days ago
parent 0d67d54162
commit 7abf6e445c
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -154,7 +154,7 @@ Consider all listed sites to potentially be NSFW.
<tr>
<td>cohost!</td>
<td>https://cohost.org/</td>
<td>Posts, User Profiles</td>
<td>Posts, Tag Searches, User Profiles</td>
<td></td>
</tr>
<tr>

@ -158,3 +158,36 @@ class CohostPostExtractor(CohostExtractor):
post["comments"] = ()
return (post,)
class CohostTagExtractor(CohostExtractor):
"""Extractor for tagged posts"""
subcategory = "tag"
pattern = BASE_PATTERN + r"/([^/?#]+)/tagged/([^/?#]+)(?:\?([^#]+))?"
example = "https://cohost.org/USER/tagged/TAG"
def posts(self):
user, tag, query = self.groups
url = "{}/{}/tagged/{}".format(self.root, user, tag)
params = text.parse_query(query)
post_feed_key = ("tagged-post-feed" if user == "rc" else
"project-tagged-post-feed")
while True:
page = self.request(url, params=params).text
data = util.json_loads(text.extr(
page, 'id="__COHOST_LOADER_STATE__">', '</script>'))
try:
feed = data[post_feed_key]
except KeyError:
feed = data.popitem()[1]
yield from feed["posts"]
pagination = feed["paginationMode"]
if not pagination.get("morePagesForward"):
return
params["refTimestamp"] = pagination["refTimestamp"]
params["skipPosts"] = \
pagination["currentSkip"] + pagination["idealPageStride"]

Loading…
Cancel
Save