[gelbooru_v02] add 'favorite' extractor (closes #1834)

3 years ago · 7bbb1f92d7
parent 4ec11af6a4
commit 7bbb1f92d7
2 changed files with 56 additions and 4 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -888,25 +888,25 @@ Consider all sites to be NSFW unless otherwise known.
 <tr>
    <td>Realbooru</td>
    <td>https://realbooru.com/</td>
-    <td>Pools, Posts, Tag Searches</td>
+    <td>Favorites, Pools, Posts, Tag Searches</td>
    <td></td>
 </tr>
 <tr>
    <td>Rule 34</td>
    <td>https://rule34.xxx/</td>
-    <td>Pools, Posts, Tag Searches</td>
+    <td>Favorites, Pools, Posts, Tag Searches</td>
    <td></td>
 </tr>
 <tr>
    <td>Safebooru</td>
    <td>https://safebooru.org/</td>
-    <td>Pools, Posts, Tag Searches</td>
+    <td>Favorites, Pools, Posts, Tag Searches</td>
    <td></td>
 </tr>
 <tr>
    <td>The Big ImageBoard</td>
    <td>https://tbib.org/</td>
-    <td>Pools, Posts, Tag Searches</td>
+    <td>Favorites, Pools, Posts, Tag Searches</td>
    <td></td>
 </tr>

--- a/gallery_dl/extractor/gelbooru_v02.py
+++ b/gallery_dl/extractor/gelbooru_v02.py
@ -176,6 +176,58 @@ class GelbooruV02PoolExtractor(GelbooruV02Extractor):
                yield post.attrib


+class GelbooruV02FavoriteExtractor(GelbooruV02Extractor):
+    subcategory = "favorite"
+    directory_fmt = ("{category}", "favorites", "{favorite_id}")
+    archive_fmt = "f_{favorite_id}_{id}"
+    per_page = 50
+    pattern = BASE_PATTERN + r"/index\.php\?page=favorites&s=view&id=(\d+)"
+    test = (
+        ("https://rule34.xxx/index.php?page=favorites&s=view&id=1030218", {
+            "count": 3,
+        }),
+        ("https://safebooru.org/index.php?page=favorites&s=view&id=17567", {
+            "count": 2,
+        }),
+        ("https://realbooru.com/index.php?page=favorites&s=view&id=274", {
+            "count": 4,
+        }),
+        ("https://tbib.org/index.php?page=favorites&s=view&id=7881", {
+            "count": 3,
+        }),
+    )
+
+    def __init__(self, match):
+        GelbooruV02Extractor.__init__(self, match)
+        self.favorite_id = match.group(match.lastindex)
+
+    def metadata(self):
+        return {"favorite_id": text.parse_int(self.favorite_id)}
+
+    def posts(self):
+        url = self.root + "/index.php"
+        params = {
+            "page": "favorites",
+            "s"   : "view",
+            "id"  : self.favorite_id,
+            "pid" : self.page_start * self.per_page,
+        }
+
+        data = {}
+        while True:
+            num_ids = 0
+            page = self.request(url, params=params).text
+
+            for data["id"] in text.extract_iter(page, '" id="p', '"'):
+                num_ids += 1
+                for post in self._api_request(data):
+                    yield post.attrib
+
+            if num_ids < self.per_page:
+                return
+            params["pid"] += self.per_page
+
+
 class GelbooruV02PostExtractor(GelbooruV02Extractor):
    subcategory = "post"
    archive_fmt = "{id}"