[kemonoparty] split 'discord' extractor (#1940)

in 'server' and 'channel'
pull/1971/head
Mike Fährmann 3 years ago
parent db857b40d8
commit bcbf9bcf36
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -14,7 +14,8 @@ from ..cache import cache
import itertools import itertools
import re import re
BASE_PATTERN = r"(?:https?://)?kemono\.party/([^/?#]+)/user/([^/?#]+)" BASE_PATTERN = r"(?:https?://)?kemono\.party"
USER_PATTERN = BASE_PATTERN + r"/([^/?#]+)/user/([^/?#]+)"
class KemonopartyExtractor(Extractor): class KemonopartyExtractor(Extractor):
@ -103,7 +104,7 @@ class KemonopartyExtractor(Extractor):
class KemonopartyUserExtractor(KemonopartyExtractor): class KemonopartyUserExtractor(KemonopartyExtractor):
"""Extractor for all posts from a kemono.party user listing""" """Extractor for all posts from a kemono.party user listing"""
subcategory = "user" subcategory = "user"
pattern = BASE_PATTERN + r"/?(?:\?o=(\d+))?(?:$|[?#])" pattern = USER_PATTERN + r"/?(?:\?o=(\d+))?(?:$|[?#])"
test = ( test = (
("https://kemono.party/fanbox/user/6993449", { ("https://kemono.party/fanbox/user/6993449", {
"range": "1-25", "range": "1-25",
@ -140,7 +141,7 @@ class KemonopartyUserExtractor(KemonopartyExtractor):
class KemonopartyPostExtractor(KemonopartyExtractor): class KemonopartyPostExtractor(KemonopartyExtractor):
"""Extractor for a single kemono.party post""" """Extractor for a single kemono.party post"""
subcategory = "post" subcategory = "post"
pattern = BASE_PATTERN + r"/post/([^/?#]+)" pattern = USER_PATTERN + r"/post/([^/?#]+)"
test = ( test = (
("https://kemono.party/fanbox/user/6993449/post/506575", { ("https://kemono.party/fanbox/user/6993449/post/506575", {
"pattern": r"https://kemono.party/data/21/0f" "pattern": r"https://kemono.party/data/21/0f"
@ -206,28 +207,30 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
"{channel_name|channel}") "{channel_name|channel}")
filename_fmt = "{id}_{num:>02}_{filename}.{extension}" filename_fmt = "{id}_{num:>02}_{filename}.{extension}"
archive_fmt = "discord_{server}_{id}_{num}" archive_fmt = "discord_{server}_{id}_{num}"
pattern = r"(?:https?://)?kemono\.party/discord/server/(\d+)(?:/?#(.*))?" pattern = BASE_PATTERN + r"/discord/server/(\d+)(?:/channel/(\d+))?#(.*)"
test = ( test = (
("https://kemono.party/discord/server/256559665620451329", {
"pattern": r"https://kemono\.party/data/attachments/discord"
r"/256559665620451329/\d+/\d+/.+",
"count": ">= 2",
}),
(("https://kemono.party/discord" (("https://kemono.party/discord"
"/server/488668827274444803#finish-work"), { "/server/488668827274444803#finish-work"), {
"count": 4, "count": 4,
"keyword": {"channel_name": "finish-work"}, "keyword": {"channel_name": "finish-work"},
}), }),
(("https://kemono.party/discord"
"/server/256559665620451329/channel/462437519519383555#"), {
"pattern": r"https://kemono\.party/data/attachments/discord"
r"/256559665620451329/\d+/\d+/.+",
"count": ">= 2",
}),
) )
def __init__(self, match): def __init__(self, match):
KemonopartyExtractor.__init__(self, match) KemonopartyExtractor.__init__(self, match)
self.server, self.channel = match.groups() self.server, self.channel, self.channel_name = match.groups()
def items(self): def items(self):
self._prepare_ddosguard_cookies() self._prepare_ddosguard_cookies()
for post in self.posts(): for post in self.posts():
post["channel_name"] = self.channel_name
post["date"] = text.parse_datetime( post["date"] = text.parse_datetime(
post["published"], "%a, %d %b %Y %H:%M:%S %Z") post["published"], "%a, %d %b %Y %H:%M:%S %Z")
yield Message.Directory, post yield Message.Directory, post
@ -244,39 +247,56 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
yield Message.Url, url, post yield Message.Url, url, post
def posts(self): def posts(self):
if self.channel is None:
url = "{}/api/discord/channels/lookup?q={}".format( url = "{}/api/discord/channels/lookup?q={}".format(
self.root, self.server) self.root, self.server)
for channel in self.request(url).json():
channels = self.request(url).json() if channel["name"] == self.channel_name:
if self.channel is not None: self.channel = channel["id"]
for channel in channels:
if channel["name"] == self.channel:
channels = (channel,)
break break
else: else:
raise exception.NotFoundError("channel") raise exception.NotFoundError("channel")
for channel in channels: url = "{}/api/discord/channel/{}".format(self.root, self.channel)
url = "{}/api/discord/channel/{}".format(self.root, channel["id"])
params = {"skip": 0} params = {"skip": 0}
channel_name = channel["name"]
while True: while True:
posts = self.request(url, params=params).json() posts = self.request(url, params=params).json()
yield from posts
for post in posts:
post["channel_name"] = channel_name
yield post
if len(posts) < 25: if len(posts) < 25:
break break
params["skip"] += 25 params["skip"] += 25
class KemonopartyDiscordServerExtractor(KemonopartyExtractor):
subcategory = "discord-server"
pattern = BASE_PATTERN + r"/discord/server/(\d+)$"
test = ("https://kemono.party/discord/server/488668827274444803", {
"pattern": KemonopartyDiscordExtractor.pattern,
"count": 13,
})
def __init__(self, match):
KemonopartyExtractor.__init__(self, match)
self.server = match.group(1)
def items(self):
url = "{}/api/discord/channels/lookup?q={}".format(
self.root, self.server)
channels = self.request(url).json()
for channel in channels:
url = "{}/discord/server/{}/channel/{}#{}".format(
self.root, self.server, channel["id"], channel["name"])
channel["_extractor"] = KemonopartyDiscordExtractor
yield Message.Queue, url, channel
class KemonopartyFavoriteExtractor(KemonopartyExtractor): class KemonopartyFavoriteExtractor(KemonopartyExtractor):
"""Extractor for kemono.party favorites""" """Extractor for kemono.party favorites"""
subcategory = "favorite" subcategory = "favorite"
pattern = r"(?:https?://)?kemono\.party/favorites" pattern = BASE_PATTERN + r"/favorites"
test = ("https://kemono.party/favorites", { test = ("https://kemono.party/favorites", {
"pattern": KemonopartyUserExtractor.pattern, "pattern": KemonopartyUserExtractor.pattern,
"url": "f4b5b796979bcba824af84206578c79101c7f0e1", "url": "f4b5b796979bcba824af84206578c79101c7f0e1",

@ -151,6 +151,7 @@ SUBCATEGORY_MAP = {
}, },
"kemonoparty": { "kemonoparty": {
"discord": "Discord Servers", "discord": "Discord Servers",
"discord-server": "",
}, },
"mangadex": { "mangadex": {
"feed" : "Followed Feed", "feed" : "Followed Feed",

Loading…
Cancel
Save