[gelbooru_v01] replace 'extract_all()' with 'extract_from()'

It's even slightly faster, especially on Python before 3.11
pull/4268/head
Mike Fährmann 1 year ago
parent 068aa26c3e
commit 8357acf359
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# Copyright 2021-2022 Mike Fährmann # Copyright 2021-2023 Mike Fährmann
# #
# This program is free software; you can redistribute it and/or modify # This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as # it under the terms of the GNU General Public License version 2 as
@ -19,24 +19,23 @@ class GelbooruV01Extractor(booru.BooruExtractor):
def _parse_post(self, post_id): def _parse_post(self, post_id):
url = "{}/index.php?page=post&s=view&id={}".format( url = "{}/index.php?page=post&s=view&id={}".format(
self.root, post_id) self.root, post_id)
page = self.request(url).text extr = text.extract_from(self.request(url).text)
post = text.extract_all(page, ( post = {
("created_at", 'Posted: ', ' <'), "id" : post_id,
("uploader" , 'By: ', ' <'), "created_at": extr('Posted: ', ' <'),
("width" , 'Size: ', 'x'), "uploader" : extr('By: ', ' <'),
("height" , '', ' <'), "width" : extr('Size: ', 'x'),
("source" , 'Source: <a href="', '"'), "height" : extr('', ' <'),
("rating" , 'Rating: ', '<'), "source" : extr('Source: <a href="', '"'),
("score" , 'Score: ', ' <'), "rating" : (extr('Rating: ', '<') or "?")[0].lower(),
("file_url" , '<img alt="img" src="', '"'), "score" : extr('Score: ', ' <'),
("tags" , 'id="tags" name="tags" cols="40" rows="5">', '<'), "file_url" : extr('<img alt="img" src="', '"'),
))[0] "tags" : text.unescape(extr(
'id="tags" name="tags" cols="40" rows="5">', '<')),
post["id"] = post_id }
post["md5"] = post["file_url"].rpartition("/")[2].partition(".")[0] post["md5"] = post["file_url"].rpartition("/")[2].partition(".")[0]
post["rating"] = (post["rating"] or "?")[0].lower()
post["tags"] = text.unescape(post["tags"])
post["date"] = text.parse_datetime( post["date"] = text.parse_datetime(
post["created_at"], "%Y-%m-%d %H:%M:%S") post["created_at"], "%Y-%m-%d %H:%M:%S")
@ -186,7 +185,7 @@ class GelbooruV01PostExtractor(GelbooruV01Extractor):
"md5": "2aaa0438d58fc7baa75a53b4a9621bb89a9d3fdb", "md5": "2aaa0438d58fc7baa75a53b4a9621bb89a9d3fdb",
"rating": "s", "rating": "s",
"score": str, "score": str,
"source": None, "source": "",
"tags": "blush dress green_eyes green_hair hatsune_miku " "tags": "blush dress green_eyes green_hair hatsune_miku "
"long_hair twintails vocaloid", "long_hair twintails vocaloid",
"uploader": "Honochi31", "uploader": "Honochi31",

Loading…
Cancel
Save