[itaku] metadata cleanup (#1842)

- parse 'date_added' as 'date'
- simplify 'tags', 'categorized_tags',  and 'sections'
pull/2719/head
Mike Fährmann 2 years ago
parent 27e8078fb7
commit e0c60a1206
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -31,6 +31,14 @@ class ItakuExtractor(Extractor):
def items(self):
for post in self.posts():
post["date"] = text.parse_datetime(
post["date_added"], "%Y-%m-%dT%H:%M:%S.%f")
for category, tags in post.pop("categorized_tags").items():
post["tags_" + category.lower()] = [t["name"] for t in tags]
post["tags"] = [t["name"] for t in post["tags"]]
post["sections"] = [s["title"] for s in post["sections"]]
url = post["image"]
yield Message.Directory, post
yield Message.Url, url, text.nameext_from_url(url, post)
@ -65,7 +73,6 @@ class ItakuImageExtractor(ItakuExtractor):
"is_blacklisted": False
},
"can_reshare": True,
"categorized_tags": dict,
"date_added": "2022-05-05T19:21:17.674148Z",
"date_edited": "2022-05-25T14:37:46.220612Z",
"description": "sketch from drawpile",
@ -89,8 +96,12 @@ class ItakuImageExtractor(ItakuExtractor):
"owner_displayname": "Piku",
"owner_username": "piku",
"reshared_by_you": False,
"sections": list,
"sections": ["Miku"],
"tags": list,
"tags_character": ["hatsune_miku"],
"tags_copyright": ["vocaloid"],
"tags_general" : ["twintails", "green_hair", "flag", "gloves",
"green_eyes", "female", "racing_miku"],
"title": "Racing Miku 2022 Ver.",
"too_mature": False,
"uncompressed_filesize": "0.62",

Loading…
Cancel
Save