From e0c60a12069a9e9a6195e12a3d92371aed0131d4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 20 Jun 2022 19:28:15 +0200 Subject: [PATCH] [itaku] metadata cleanup (#1842) - parse 'date_added' as 'date' - simplify 'tags', 'categorized_tags', and 'sections' --- gallery_dl/extractor/itaku.py | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/gallery_dl/extractor/itaku.py b/gallery_dl/extractor/itaku.py index c89a85f2..a71360f4 100644 --- a/gallery_dl/extractor/itaku.py +++ b/gallery_dl/extractor/itaku.py @@ -31,6 +31,14 @@ class ItakuExtractor(Extractor): def items(self): for post in self.posts(): + + post["date"] = text.parse_datetime( + post["date_added"], "%Y-%m-%dT%H:%M:%S.%f") + for category, tags in post.pop("categorized_tags").items(): + post["tags_" + category.lower()] = [t["name"] for t in tags] + post["tags"] = [t["name"] for t in post["tags"]] + post["sections"] = [s["title"] for s in post["sections"]] + url = post["image"] yield Message.Directory, post yield Message.Url, url, text.nameext_from_url(url, post) @@ -65,7 +73,6 @@ class ItakuImageExtractor(ItakuExtractor): "is_blacklisted": False }, "can_reshare": True, - "categorized_tags": dict, "date_added": "2022-05-05T19:21:17.674148Z", "date_edited": "2022-05-25T14:37:46.220612Z", "description": "sketch from drawpile", @@ -89,8 +96,12 @@ class ItakuImageExtractor(ItakuExtractor): "owner_displayname": "Piku", "owner_username": "piku", "reshared_by_you": False, - "sections": list, + "sections": ["Miku"], "tags": list, + "tags_character": ["hatsune_miku"], + "tags_copyright": ["vocaloid"], + "tags_general" : ["twintails", "green_hair", "flag", "gloves", + "green_eyes", "female", "racing_miku"], "title": "Racing Miku 2022 Ver.", "too_mature": False, "uncompressed_filesize": "0.62",