add album extractor for lolisafe/chibisafe instances

- support bunkr.is (closes #2038) - support zz.ht (closes #2105)
3 years ago · 882c614281
parent 7bf1d3fd32
commit 882c614281
5 changed files with 105 additions and 18 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -1141,5 +1141,21 @@ Consider all sites to be NSFW unless otherwise known.
    <td>Collections, Products</td>
    <td></td>
 </tr>
+
+<tr>
+    <td colspan="4"><strong>lolisafe and chibisafe</strong></td>
+</tr>
+<tr>
+    <td>Bunkr</td>
+    <td>https://bunkr.is/</td>
+    <td>Albums</td>
+    <td></td>
+</tr>
+<tr>
+    <td>ZzZz</td>
+    <td>https://zz.ht/</td>
+    <td>Albums</td>
+    <td></td>
+</tr>
 </tbody>
 </table>
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@ -145,6 +145,7 @@ modules = [
    "foolslide",
    "mastodon",
    "shopify",
+    "lolisafe",
    "imagehosts",
    "directlink",
    "recursive",
--- a/gallery_dl/extractor/cyberdrop.py
+++ b/gallery_dl/extractor/cyberdrop.py
@ -6,16 +6,13 @@

 """Extractors for https://cyberdrop.me/"""

-from .common import Extractor, Message
+from . import lolisafe
 from .. import text


-class CyberdropAlbumExtractor(Extractor):
+class CyberdropAlbumExtractor(lolisafe.LolisafelbumExtractor):
    category = "cyberdrop"
-    subcategory = "album"
    root = "https://cyberdrop.me"
-    directory_fmt = ("{category}", "{album_name} ({album_id})")
-    archive_fmt = "{album_id}_{id}"
    pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.me/a/([^/?#]+)"
    test = (
        # images
@ -44,11 +41,7 @@ class CyberdropAlbumExtractor(Extractor):
        }),
    )

-    def __init__(self, match):
-        Extractor.__init__(self, match)
-        self.album_id = match.group(1)
-
-    def items(self):
+    def fetch_album(self, album_id):
        url = self.root + "/a/" + self.album_id
        extr = text.extract_from(self.request(url).text)

@ -58,9 +51,9 @@ class CyberdropAlbumExtractor(Extractor):
            url = extr('id="file" href="', '"')
            if not url:
                break
-            append(text.unescape(url))
+            append({"file": text.unescape(url)})

-        data = {
+        return files, {
            "album_id"   : self.album_id,
            "album_name" : extr("name: '", "'"),
            "date"       : text.parse_timestamp(extr("timestamp: ", ",")),
@ -68,9 +61,3 @@ class CyberdropAlbumExtractor(Extractor):
            "description": extr("description: `", "`"),
            "count"      : len(files),
        }
-
-        yield Message.Directory, data
-        for url in files:
-            text.nameext_from_url(url, data)
-            data["filename"], _, data["id"] = data["filename"].rpartition("-")
-            yield Message.Url, url, data
--- a/gallery_dl/extractor/lolisafe.py
+++ b/gallery_dl/extractor/lolisafe.py
@ -0,0 +1,81 @@
+# -*- coding: utf-8 -*-
+
+# Copyright 2021 Mike Fährmann
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for lolisafe/chibisafe instances"""
+
+from .common import BaseExtractor, Message
+from .. import text
+
+
+class LolisafeExtractor(BaseExtractor):
+    """Base class for lolisafe extractors"""
+    basecategory = "lolisafe"
+    directory_fmt = ("{category}", "{album_name} ({album_id})")
+    archive_fmt = "{album_id}_{id}"
+
+
+BASE_PATTERN = LolisafeExtractor.update({
+    "bunkr": {"root": "https://bunkr.is", "pattern": r"bunkr\.(?:is|to)"},
+    "zzzz" : {"root": "https://zz.ht"   , "pattern": r"zz\.(?:ht|fo)"},
+})
+
+
+class LolisafelbumExtractor(LolisafeExtractor):
+    subcategory = "album"
+    pattern = BASE_PATTERN + "/a/([^/?#]+)"
+    test = (
+        ("https://bunkr.is/a/Lktg9Keq", {
+            "pattern": r"https://cdn\.bunkr\.is/test-テスト-\"&>-QjgneIQv\.png",
+            "content": "0c8768055e4e20e7c7259608b67799171b691140",
+            "keyword": {
+                "album_id": "Lktg9Keq",
+                "album_name": 'test テスト "&>',
+                "count": 1,
+                "filename": 'test-テスト-"&>',
+                "id": "QjgneIQv",
+                "num": int,
+            },
+        }),
+        ("https://bunkr.to/a/Lktg9Keq"),
+        ("https://zz.ht/a/lop7W6EZ", {
+            "pattern": r"https://z\.zz\.fo/(4anuY|ih560)\.png",
+            "count": 2,
+            "keyword": {
+                "album_id": "lop7W6EZ",
+                "album_name": "ferris",
+            },
+        }),
+        ("https://zz.fo/a/lop7W6EZ"),
+    )
+
+    def __init__(self, match):
+        LolisafeExtractor.__init__(self, match)
+        self.album_id = match.group(match.lastindex)
+
+    def items(self):
+        files, data = self.fetch_album(self.album_id)
+
+        yield Message.Directory, data
+        for data["num"], file in enumerate(files, 1):
+            url = file["file"]
+            text.nameext_from_url(url, data)
+            data["filename"], sep, data["id"] = \
+                data["filename"].rpartition("-")
+            if not sep:
+                data["filename"] = data["id"]
+            yield Message.Url, url, data
+
+    def fetch_album(self, album_id):
+        url = "{}/api/album/get/{}".format(self.root, album_id)
+        data = self.request(url).json()
+
+        return data["files"], {
+            "album_id"  : self.album_id,
+            "album_name": text.unescape(data["title"]),
+            "count"     : data["count"],
+        }
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@ -115,6 +115,7 @@ CATEGORY_MAP = {
    "xhamster"       : "xHamster",
    "xvideos"        : "XVideos",
    "yandere"        : "yande.re",
+    "zzzz"           : "ZzZz",
 }

 SUBCATEGORY_MAP = {
@ -203,6 +204,7 @@ BASE_MAP = {
    "foolslide"   : "FoOlSlide Instances",
    "gelbooru_v01": "Gelbooru Beta 0.1.11",
    "gelbooru_v02": "Gelbooru Beta 0.2",
+    "lolisafe"    : "lolisafe and chibisafe",
    "moebooru"    : "Moebooru and MyImouto",
 }