add album extractor for lolisafe/chibisafe instances

- support bunkr.is (closes #2038)
- support zz.ht    (closes #2105)
pull/2089/head
Mike Fährmann 3 years ago
parent 7bf1d3fd32
commit 882c614281
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -1141,5 +1141,21 @@ Consider all sites to be NSFW unless otherwise known.
<td>Collections, Products</td>
<td></td>
</tr>
<tr>
<td colspan="4"><strong>lolisafe and chibisafe</strong></td>
</tr>
<tr>
<td>Bunkr</td>
<td>https://bunkr.is/</td>
<td>Albums</td>
<td></td>
</tr>
<tr>
<td>ZzZz</td>
<td>https://zz.ht/</td>
<td>Albums</td>
<td></td>
</tr>
</tbody>
</table>

@ -145,6 +145,7 @@ modules = [
"foolslide",
"mastodon",
"shopify",
"lolisafe",
"imagehosts",
"directlink",
"recursive",

@ -6,16 +6,13 @@
"""Extractors for https://cyberdrop.me/"""
from .common import Extractor, Message
from . import lolisafe
from .. import text
class CyberdropAlbumExtractor(Extractor):
class CyberdropAlbumExtractor(lolisafe.LolisafelbumExtractor):
category = "cyberdrop"
subcategory = "album"
root = "https://cyberdrop.me"
directory_fmt = ("{category}", "{album_name} ({album_id})")
archive_fmt = "{album_id}_{id}"
pattern = r"(?:https?://)?(?:www\.)?cyberdrop\.me/a/([^/?#]+)"
test = (
# images
@ -44,11 +41,7 @@ class CyberdropAlbumExtractor(Extractor):
}),
)
def __init__(self, match):
Extractor.__init__(self, match)
self.album_id = match.group(1)
def items(self):
def fetch_album(self, album_id):
url = self.root + "/a/" + self.album_id
extr = text.extract_from(self.request(url).text)
@ -58,9 +51,9 @@ class CyberdropAlbumExtractor(Extractor):
url = extr('id="file" href="', '"')
if not url:
break
append(text.unescape(url))
append({"file": text.unescape(url)})
data = {
return files, {
"album_id" : self.album_id,
"album_name" : extr("name: '", "'"),
"date" : text.parse_timestamp(extr("timestamp: ", ",")),
@ -68,9 +61,3 @@ class CyberdropAlbumExtractor(Extractor):
"description": extr("description: `", "`"),
"count" : len(files),
}
yield Message.Directory, data
for url in files:
text.nameext_from_url(url, data)
data["filename"], _, data["id"] = data["filename"].rpartition("-")
yield Message.Url, url, data

@ -0,0 +1,81 @@
# -*- coding: utf-8 -*-
# Copyright 2021 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for lolisafe/chibisafe instances"""
from .common import BaseExtractor, Message
from .. import text
class LolisafeExtractor(BaseExtractor):
"""Base class for lolisafe extractors"""
basecategory = "lolisafe"
directory_fmt = ("{category}", "{album_name} ({album_id})")
archive_fmt = "{album_id}_{id}"
BASE_PATTERN = LolisafeExtractor.update({
"bunkr": {"root": "https://bunkr.is", "pattern": r"bunkr\.(?:is|to)"},
"zzzz" : {"root": "https://zz.ht" , "pattern": r"zz\.(?:ht|fo)"},
})
class LolisafelbumExtractor(LolisafeExtractor):
subcategory = "album"
pattern = BASE_PATTERN + "/a/([^/?#]+)"
test = (
("https://bunkr.is/a/Lktg9Keq", {
"pattern": r"https://cdn\.bunkr\.is/test-テスト-\"&>-QjgneIQv\.png",
"content": "0c8768055e4e20e7c7259608b67799171b691140",
"keyword": {
"album_id": "Lktg9Keq",
"album_name": 'test テスト "&>',
"count": 1,
"filename": 'test-テスト-"&>',
"id": "QjgneIQv",
"num": int,
},
}),
("https://bunkr.to/a/Lktg9Keq"),
("https://zz.ht/a/lop7W6EZ", {
"pattern": r"https://z\.zz\.fo/(4anuY|ih560)\.png",
"count": 2,
"keyword": {
"album_id": "lop7W6EZ",
"album_name": "ferris",
},
}),
("https://zz.fo/a/lop7W6EZ"),
)
def __init__(self, match):
LolisafeExtractor.__init__(self, match)
self.album_id = match.group(match.lastindex)
def items(self):
files, data = self.fetch_album(self.album_id)
yield Message.Directory, data
for data["num"], file in enumerate(files, 1):
url = file["file"]
text.nameext_from_url(url, data)
data["filename"], sep, data["id"] = \
data["filename"].rpartition("-")
if not sep:
data["filename"] = data["id"]
yield Message.Url, url, data
def fetch_album(self, album_id):
url = "{}/api/album/get/{}".format(self.root, album_id)
data = self.request(url).json()
return data["files"], {
"album_id" : self.album_id,
"album_name": text.unescape(data["title"]),
"count" : data["count"],
}

@ -115,6 +115,7 @@ CATEGORY_MAP = {
"xhamster" : "xHamster",
"xvideos" : "XVideos",
"yandere" : "yande.re",
"zzzz" : "ZzZz",
}
SUBCATEGORY_MAP = {
@ -203,6 +204,7 @@ BASE_MAP = {
"foolslide" : "FoOlSlide Instances",
"gelbooru_v01": "Gelbooru Beta 0.1.11",
"gelbooru_v02": "Gelbooru Beta 0.2",
"lolisafe" : "lolisafe and chibisafe",
"moebooru" : "Moebooru and MyImouto",
}

Loading…
Cancel
Save