|
|
@ -46,7 +46,7 @@ class KemonopartyExtractor(Extractor):
|
|
|
|
comments = self.config("comments")
|
|
|
|
comments = self.config("comments")
|
|
|
|
username = dms = None
|
|
|
|
username = dms = None
|
|
|
|
|
|
|
|
|
|
|
|
# prevent files to be sent with gzip compression
|
|
|
|
# prevent files from being sent with gzip compression
|
|
|
|
headers = {"Accept-Encoding": "identity"}
|
|
|
|
headers = {"Accept-Encoding": "identity"}
|
|
|
|
|
|
|
|
|
|
|
|
if self.config("metadata"):
|
|
|
|
if self.config("metadata"):
|
|
|
@ -63,6 +63,7 @@ class KemonopartyExtractor(Extractor):
|
|
|
|
|
|
|
|
|
|
|
|
for post in posts:
|
|
|
|
for post in posts:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
post["_http_headers"] = headers
|
|
|
|
post["date"] = text.parse_datetime(
|
|
|
|
post["date"] = text.parse_datetime(
|
|
|
|
post["published"] or post["added"],
|
|
|
|
post["published"] or post["added"],
|
|
|
|
"%a, %d %b %Y %H:%M:%S %Z")
|
|
|
|
"%a, %d %b %Y %H:%M:%S %Z")
|
|
|
@ -74,27 +75,32 @@ class KemonopartyExtractor(Extractor):
|
|
|
|
if dms is True:
|
|
|
|
if dms is True:
|
|
|
|
dms = self._extract_dms(post)
|
|
|
|
dms = self._extract_dms(post)
|
|
|
|
post["dms"] = dms
|
|
|
|
post["dms"] = dms
|
|
|
|
yield Message.Directory, post
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
files = []
|
|
|
|
hashes = set()
|
|
|
|
hashes = set()
|
|
|
|
post["num"] = 0
|
|
|
|
|
|
|
|
for file in itertools.chain.from_iterable(
|
|
|
|
for file in itertools.chain.from_iterable(
|
|
|
|
g(post) for g in generators):
|
|
|
|
g(post) for g in generators):
|
|
|
|
url = file["path"]
|
|
|
|
url = file["path"]
|
|
|
|
|
|
|
|
|
|
|
|
match = find_hash(url)
|
|
|
|
match = find_hash(url)
|
|
|
|
if match:
|
|
|
|
if match:
|
|
|
|
post["hash"] = hash = match.group(1)
|
|
|
|
file["hash"] = hash = match.group(1)
|
|
|
|
if hash in hashes and not duplicates:
|
|
|
|
if hash in hashes and not duplicates:
|
|
|
|
self.log.debug("Skipping %s (duplicate)", url)
|
|
|
|
self.log.debug("Skipping %s (duplicate)", url)
|
|
|
|
continue
|
|
|
|
continue
|
|
|
|
hashes.add(hash)
|
|
|
|
hashes.add(hash)
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
post["hash"] = ""
|
|
|
|
file["hash"] = ""
|
|
|
|
|
|
|
|
|
|
|
|
post["type"] = file["type"]
|
|
|
|
files.append(file)
|
|
|
|
post["num"] += 1
|
|
|
|
|
|
|
|
post["_http_headers"] = headers
|
|
|
|
post["count"] = len(files)
|
|
|
|
|
|
|
|
yield Message.Directory, post
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
for post["num"], file in enumerate(files, 1):
|
|
|
|
|
|
|
|
post.update(file)
|
|
|
|
|
|
|
|
url = file["path"]
|
|
|
|
|
|
|
|
|
|
|
|
text.nameext_from_url(file.get("name", url), post)
|
|
|
|
text.nameext_from_url(file.get("name", url), post)
|
|
|
|
if not post["extension"]:
|
|
|
|
if not post["extension"]:
|
|
|
@ -236,6 +242,7 @@ class KemonopartyPostExtractor(KemonopartyExtractor):
|
|
|
|
"keyword": {
|
|
|
|
"keyword": {
|
|
|
|
"added": "Wed, 06 May 2020 20:28:02 GMT",
|
|
|
|
"added": "Wed, 06 May 2020 20:28:02 GMT",
|
|
|
|
"content": str,
|
|
|
|
"content": str,
|
|
|
|
|
|
|
|
"count": 1,
|
|
|
|
"date": "dt:2019-08-11 02:09:04",
|
|
|
|
"date": "dt:2019-08-11 02:09:04",
|
|
|
|
"edited": None,
|
|
|
|
"edited": None,
|
|
|
|
"embed": dict,
|
|
|
|
"embed": dict,
|
|
|
@ -374,6 +381,7 @@ class KemonopartyDiscordExtractor(KemonopartyExtractor):
|
|
|
|
post["channel_name"] = self.channel_name
|
|
|
|
post["channel_name"] = self.channel_name
|
|
|
|
post["date"] = text.parse_datetime(
|
|
|
|
post["date"] = text.parse_datetime(
|
|
|
|
post["published"], "%a, %d %b %Y %H:%M:%S %Z")
|
|
|
|
post["published"], "%a, %d %b %Y %H:%M:%S %Z")
|
|
|
|
|
|
|
|
post["count"] = len(files)
|
|
|
|
yield Message.Directory, post
|
|
|
|
yield Message.Directory, post
|
|
|
|
|
|
|
|
|
|
|
|
for post["num"], file in enumerate(files, 1):
|
|
|
|
for post["num"], file in enumerate(files, 1):
|
|
|
|