|
|
|
@ -30,11 +30,12 @@ class KemonopartyExtractor(Extractor):
|
|
|
|
|
def items(self):
|
|
|
|
|
self._prepare_ddosguard_cookies()
|
|
|
|
|
|
|
|
|
|
find_inline = re.compile(
|
|
|
|
|
self._find_inline = re.compile(
|
|
|
|
|
r'src="(?:https?://kemono\.party)?(/inline/[^"]+'
|
|
|
|
|
r'|/[0-9a-f]{2}/[0-9a-f]{2}/[0-9a-f]{64}\.[^"]+)').findall
|
|
|
|
|
skip_service = \
|
|
|
|
|
self._skip_service = \
|
|
|
|
|
"patreon" if self.config("patreon-skip-file", True) else None
|
|
|
|
|
generators = self._build_file_generators(self.config("files"))
|
|
|
|
|
comments = self.config("comments")
|
|
|
|
|
|
|
|
|
|
if self.config("metadata"):
|
|
|
|
@ -51,20 +52,6 @@ class KemonopartyExtractor(Extractor):
|
|
|
|
|
|
|
|
|
|
for post in posts:
|
|
|
|
|
|
|
|
|
|
files = []
|
|
|
|
|
append = files.append
|
|
|
|
|
file = post["file"]
|
|
|
|
|
|
|
|
|
|
if file:
|
|
|
|
|
file["type"] = "file"
|
|
|
|
|
if post["service"] != skip_service or not post["attachments"]:
|
|
|
|
|
append(file)
|
|
|
|
|
for attachment in post["attachments"]:
|
|
|
|
|
attachment["type"] = "attachment"
|
|
|
|
|
append(attachment)
|
|
|
|
|
for path in find_inline(post["content"] or ""):
|
|
|
|
|
append({"path": path, "name": path, "type": "inline"})
|
|
|
|
|
|
|
|
|
|
post["date"] = text.parse_datetime(
|
|
|
|
|
post["published"] or post["added"],
|
|
|
|
|
"%a, %d %b %Y %H:%M:%S %Z")
|
|
|
|
@ -74,8 +61,11 @@ class KemonopartyExtractor(Extractor):
|
|
|
|
|
post["comments"] = self._extract_comments(post)
|
|
|
|
|
yield Message.Directory, post
|
|
|
|
|
|
|
|
|
|
for post["num"], file in enumerate(files, 1):
|
|
|
|
|
post["num"] = 0
|
|
|
|
|
for file in itertools.chain.from_iterable(
|
|
|
|
|
g(post) for g in generators):
|
|
|
|
|
post["type"] = file["type"]
|
|
|
|
|
post["num"] += 1
|
|
|
|
|
url = file["path"]
|
|
|
|
|
if url[0] == "/":
|
|
|
|
|
url = self.root + "/data" + url
|
|
|
|
@ -104,6 +94,36 @@ class KemonopartyExtractor(Extractor):
|
|
|
|
|
|
|
|
|
|
return {c.name: c.value for c in response.history[0].cookies}
|
|
|
|
|
|
|
|
|
|
def _postfile(self, post):
|
|
|
|
|
file = post["file"]
|
|
|
|
|
if not file:
|
|
|
|
|
return ()
|
|
|
|
|
file["type"] = "file"
|
|
|
|
|
if post["service"] == self._skip_service and post["attachments"]:
|
|
|
|
|
return ()
|
|
|
|
|
return (file,)
|
|
|
|
|
|
|
|
|
|
def _attachments(self, post):
|
|
|
|
|
for attachment in post["attachments"]:
|
|
|
|
|
attachment["type"] = "attachment"
|
|
|
|
|
return post["attachments"]
|
|
|
|
|
|
|
|
|
|
def _inline(self, post):
|
|
|
|
|
for path in self._find_inline(post["content"] or ""):
|
|
|
|
|
yield {"path": path, "name": path, "type": "inline"}
|
|
|
|
|
|
|
|
|
|
def _build_file_generators(self, filetypes):
|
|
|
|
|
if filetypes is None:
|
|
|
|
|
return (self._postfile, self._attachments, self._inline)
|
|
|
|
|
genmap = {
|
|
|
|
|
"postfile" : self._postfile,
|
|
|
|
|
"attachments": self._attachments,
|
|
|
|
|
"inline" : self._inline,
|
|
|
|
|
}
|
|
|
|
|
if isinstance(filetypes, str):
|
|
|
|
|
filetypes = filetypes.split(",")
|
|
|
|
|
return [genmap[ft] for ft in filetypes]
|
|
|
|
|
|
|
|
|
|
def _extract_comments(self, post):
|
|
|
|
|
url = "{}/{}/user/{}/post/{}".format(
|
|
|
|
|
self.root, post["service"], post["user"], post["id"])
|
|
|
|
@ -189,13 +209,14 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
|
|
|
|
|
}),
|
|
|
|
|
# inline image (#1286)
|
|
|
|
|
("https://kemono.party/fanbox/user/7356311/post/802343", {
|
|
|
|
|
"pattern": r"https://kemono\.party/data/inline/fanbox"
|
|
|
|
|
r"/uaozO4Yga6ydkGIJFAQDixfE\.jpeg",
|
|
|
|
|
"pattern": r"https://kemono\.party/data/47/b5/47b5c014ecdcfabdf2c8"
|
|
|
|
|
r"5eec53f1133a76336997ae8596f332e97d956a460ad2\.jpg",
|
|
|
|
|
}),
|
|
|
|
|
# kemono.party -> data.kemono.party
|
|
|
|
|
("https://kemono.party/gumroad/user/trylsc/post/IURjT", {
|
|
|
|
|
"pattern": r"https://kemono\.party/data/(file|attachment)s"
|
|
|
|
|
r"/gumroad/trylsc/IURjT/",
|
|
|
|
|
"pattern": r"https://kemono\.party/data/("
|
|
|
|
|
r"files/gumroad/trylsc/IURjT/reward8\.jpg|"
|
|
|
|
|
r"c6/04/c6048f5067fd9dbfa7a8be565ac194efdfb6e4.+\.zip)",
|
|
|
|
|
}),
|
|
|
|
|
# username (#1548, #1652)
|
|
|
|
|
("https://kemono.party/gumroad/user/3252870377455/post/aJnAH", {
|
|
|
|
|