From 7625912b317b548dc97dc08940ff9eb4d84ff038 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Thu, 27 Feb 2020 02:13:33 +0100 Subject: [PATCH] [piczel] improve and update - fix tag names - fix a bug in _pagination() - parse datetime in 'created_at' as 'date' - rewrite main loop - replace user profile test --- gallery_dl/extractor/piczel.py | 60 ++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 28 deletions(-) diff --git a/gallery_dl/extractor/piczel.py b/gallery_dl/extractor/piczel.py index 528e6684..35f9f919 100644 --- a/gallery_dl/extractor/piczel.py +++ b/gallery_dl/extractor/piczel.py @@ -22,27 +22,30 @@ class PiczelExtractor(Extractor): def items(self): yield Message.Version, 1 - for image in self.unpack(self.images()): - url = image["image"]["url"] - yield Message.Directory, image - yield Message.Url, url, text.nameext_from_url(url, image) - - @staticmethod - def unpack(images): - """Unpack 'images' into individual image objects""" - for image in images: - if image["multi"]: - multi = image["images"] - del image["images"] - for image["num"], img in enumerate(multi): - image.update(img) - yield image + for post in self.posts(): + post["tags"] = [t["title"] for t in post["tags"] if t["title"]] + post["date"] = text.parse_datetime( + post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z") + + if post["multi"]: + images = post["images"] + del post["images"] + yield Message.Directory, post + for post["num"], image in enumerate(images): + if "id" in image: + del image["id"] + post.update(image) + url = post["image"]["url"] + yield Message.Url, url, text.nameext_from_url(url, post) + else: - image["num"] = 0 - yield image + yield Message.Directory, post + post["num"] = 0 + url = post["image"]["url"] + yield Message.Url, url, text.nameext_from_url(url, post) - def images(self): - """Return an iterable with all relevant image objects""" + def posts(self): + """Return an iterable with all relevant post objects""" def _pagination(self, url, folder_id=None): params = { @@ -53,26 +56,26 @@ class PiczelExtractor(Extractor): while True: data = self.request(url, params=params).json() - yield from data - - if len(data) < 32: + if not data: return params["from_id"] = data[-1]["id"] + yield from data class PiczelUserExtractor(PiczelExtractor): """Extractor for all images from a user's gallery""" subcategory = "user" pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/([^/?&#]+)/?$" - test = ("https://piczel.tv/gallery/Maximumwarp", { - "count": ">= 45", + test = ("https://piczel.tv/gallery/Bikupan", { + "range": "1-100", + "count": ">= 100", }) def __init__(self, match): PiczelExtractor.__init__(self, match) self.user = match.group(1) - def images(self): + def posts(self): url = "{}/api/users/{}/gallery".format(self.root, self.user) return self._pagination(url) @@ -92,7 +95,7 @@ class PiczelFolderExtractor(PiczelExtractor): PiczelExtractor.__init__(self, match) self.user, self.folder_id = match.groups() - def images(self): + def posts(self): url = "{}/api/users/{}/gallery".format(self.root, self.user) return self._pagination(url, self.folder_id) @@ -106,6 +109,7 @@ class PiczelImageExtractor(PiczelExtractor): "content": "df9a053a24234474a19bce2b7e27e0dec23bff87", "keyword": { "created_at": "2018-07-22T05:13:58.000Z", + "date": "dt:2018-07-22 05:13:58", "description": None, "extension": "png", "favorites_count": int, @@ -118,7 +122,7 @@ class PiczelImageExtractor(PiczelExtractor): "nsfw": False, "num": 0, "password_protected": False, - "tags": "fanart, commission, altair, recreators, ", + "tags": ["fanart", "commission", "altair", "recreators"], "title": "Altair", "user": dict, "views": int, @@ -129,6 +133,6 @@ class PiczelImageExtractor(PiczelExtractor): PiczelExtractor.__init__(self, match) self.image_id = match.group(1) - def images(self): + def posts(self): url = "{}/api/gallery/image/{}".format(self.root, self.image_id) return (self.request(url).json(),)