[gelbooru_v01] replace 'extract_all()' with 'extract_from()'

It's even slightly faster, especially on Python before 3.11
1 year ago · 8357acf359
parent 068aa26c3e
commit 8357acf359
1 changed files with 18 additions and 19 deletions
--- a/gallery_dl/extractor/gelbooru_v01.py
+++ b/gallery_dl/extractor/gelbooru_v01.py
@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
-# Copyright 2021-2022 Mike Fährmann
+# Copyright 2021-2023 Mike Fährmann
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2 as
@ -19,24 +19,23 @@ class GelbooruV01Extractor(booru.BooruExtractor):
    def _parse_post(self, post_id):
        url = "{}/index.php?page=post&s=view&id={}".format(
            self.root, post_id)
-        page = self.request(url).text
+        extr = text.extract_from(self.request(url).text)
-
+
-        post = text.extract_all(page, (
+        post = {
-            ("created_at", 'Posted: ', ' <'),
+            "id"        : post_id,
-            ("uploader"  , 'By: ', ' <'),
+            "created_at": extr('Posted: ', ' <'),
-            ("width"     , 'Size: ', 'x'),
+            "uploader"  : extr('By: ', ' <'),
-            ("height"    , '', ' <'),
+            "width"     : extr('Size: ', 'x'),
-            ("source"    , 'Source: <a href="', '"'),
+            "height"    : extr('', ' <'),
-            ("rating"    , 'Rating: ', '<'),
+            "source"    : extr('Source: <a href="', '"'),
-            ("score"     , 'Score: ', ' <'),
+            "rating"    : (extr('Rating: ', '<') or "?")[0].lower(),
-            ("file_url"  , '<img alt="img" src="', '"'),
+            "score"     : extr('Score: ', ' <'),
-            ("tags"      , 'id="tags" name="tags" cols="40" rows="5">', '<'),
+            "file_url"  : extr('<img alt="img" src="', '"'),
-        ))[0]
+            "tags"      : text.unescape(extr(
-
+                'id="tags" name="tags" cols="40" rows="5">', '<')),
-        post["id"] = post_id
+        }
        post["md5"] = post["file_url"].rpartition("/")[2].partition(".")[0]
        post["rating"] = (post["rating"] or "?")[0].lower()
        post["tags"] = text.unescape(post["tags"])
        post["date"] = text.parse_datetime(
            post["created_at"], "%Y-%m-%d %H:%M:%S")
@ -186,7 +185,7 @@ class GelbooruV01PostExtractor(GelbooruV01Extractor):
                "md5": "2aaa0438d58fc7baa75a53b4a9621bb89a9d3fdb",
                "rating": "s",
                "score": str,
-                "source": None,
+                "source": "",
                "tags": "blush dress green_eyes green_hair hatsune_miku "
                        "long_hair twintails vocaloid",
                "uploader": "Honochi31",