[piczel] improve and update

- fix tag names
- fix a bug in _pagination()
- parse datetime in 'created_at' as 'date'
- rewrite main loop
- replace user profile test
pull/644/head
Mike Fährmann 5 years ago
parent ec85bf90de
commit 7625912b31
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -22,27 +22,30 @@ class PiczelExtractor(Extractor):
def items(self):
yield Message.Version, 1
for image in self.unpack(self.images()):
url = image["image"]["url"]
yield Message.Directory, image
yield Message.Url, url, text.nameext_from_url(url, image)
@staticmethod
def unpack(images):
"""Unpack 'images' into individual image objects"""
for image in images:
if image["multi"]:
multi = image["images"]
del image["images"]
for image["num"], img in enumerate(multi):
image.update(img)
yield image
for post in self.posts():
post["tags"] = [t["title"] for t in post["tags"] if t["title"]]
post["date"] = text.parse_datetime(
post["created_at"], "%Y-%m-%dT%H:%M:%S.%f%z")
if post["multi"]:
images = post["images"]
del post["images"]
yield Message.Directory, post
for post["num"], image in enumerate(images):
if "id" in image:
del image["id"]
post.update(image)
url = post["image"]["url"]
yield Message.Url, url, text.nameext_from_url(url, post)
else:
image["num"] = 0
yield image
yield Message.Directory, post
post["num"] = 0
url = post["image"]["url"]
yield Message.Url, url, text.nameext_from_url(url, post)
def images(self):
"""Return an iterable with all relevant image objects"""
def posts(self):
"""Return an iterable with all relevant post objects"""
def _pagination(self, url, folder_id=None):
params = {
@ -53,26 +56,26 @@ class PiczelExtractor(Extractor):
while True:
data = self.request(url, params=params).json()
yield from data
if len(data) < 32:
if not data:
return
params["from_id"] = data[-1]["id"]
yield from data
class PiczelUserExtractor(PiczelExtractor):
"""Extractor for all images from a user's gallery"""
subcategory = "user"
pattern = r"(?:https?://)?(?:www\.)?piczel\.tv/gallery/([^/?&#]+)/?$"
test = ("https://piczel.tv/gallery/Maximumwarp", {
"count": ">= 45",
test = ("https://piczel.tv/gallery/Bikupan", {
"range": "1-100",
"count": ">= 100",
})
def __init__(self, match):
PiczelExtractor.__init__(self, match)
self.user = match.group(1)
def images(self):
def posts(self):
url = "{}/api/users/{}/gallery".format(self.root, self.user)
return self._pagination(url)
@ -92,7 +95,7 @@ class PiczelFolderExtractor(PiczelExtractor):
PiczelExtractor.__init__(self, match)
self.user, self.folder_id = match.groups()
def images(self):
def posts(self):
url = "{}/api/users/{}/gallery".format(self.root, self.user)
return self._pagination(url, self.folder_id)
@ -106,6 +109,7 @@ class PiczelImageExtractor(PiczelExtractor):
"content": "df9a053a24234474a19bce2b7e27e0dec23bff87",
"keyword": {
"created_at": "2018-07-22T05:13:58.000Z",
"date": "dt:2018-07-22 05:13:58",
"description": None,
"extension": "png",
"favorites_count": int,
@ -118,7 +122,7 @@ class PiczelImageExtractor(PiczelExtractor):
"nsfw": False,
"num": 0,
"password_protected": False,
"tags": "fanart, commission, altair, recreators, ",
"tags": ["fanart", "commission", "altair", "recreators"],
"title": "Altair",
"user": dict,
"views": int,
@ -129,6 +133,6 @@ class PiczelImageExtractor(PiczelExtractor):
PiczelExtractor.__init__(self, match)
self.image_id = match.group(1)
def images(self):
def posts(self):
url = "{}/api/gallery/image/{}".format(self.root, self.image_id)
return (self.request(url).json(),)

Loading…
Cancel
Save