merge #3717: [misskey] add misskey extractors

pull/3738/head
Mike Fährmann 2 years ago
commit 6762d99515
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -2029,6 +2029,26 @@ Description
Also emit metadata for text-only posts without media content. Also emit metadata for text-only posts without media content.
extractor.[misskey].renotes
----------------------------
Type
``bool``
Default
``false``
Description
Fetch media from renoted notes.
extractor.[misskey].replies
----------------------------
Type
``bool``
Default
``true``
Description
Fetch media from replies to other notes.
extractor.nana.favkey extractor.nana.favkey
--------------------- ---------------------
Type Type

@ -1126,6 +1126,28 @@ Consider all sites to be NSFW unless otherwise known.
<td></td> <td></td>
</tr> </tr>
<tr>
<td colspan="4"><strong>Misskey Instances</strong></td>
</tr>
<tr>
<td>Misskey.io</td>
<td>https://misskey.io/</td>
<td>Images from Notes, User Profiles</td>
<td></td>
</tr>
<tr>
<td>Lesbian.energy</td>
<td>https://lesbian.energy/</td>
<td>Images from Notes, User Profiles</td>
<td></td>
</tr>
<tr>
<td>Sushi.ski</td>
<td>https://sushi.ski/</td>
<td>Images from Notes, User Profiles</td>
<td></td>
</tr>
<tr> <tr>
<td colspan="4"><strong>Nijie Instances</strong></td> <td colspan="4"><strong>Nijie Instances</strong></td>
</tr> </tr>

@ -94,6 +94,7 @@ modules = [
"mangasee", "mangasee",
"mangoxo", "mangoxo",
"mememuseum", "mememuseum",
"misskey",
"myhentaigallery", "myhentaigallery",
"myportfolio", "myportfolio",
"nana", "nana",

@ -0,0 +1,200 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from .common import BaseExtractor, Message
from .. import text
class MisskeyExtractor(BaseExtractor):
"""Base class for Misskey extractors"""
basecategory = "misskey"
directory_fmt = ("misskey", "{instance}", "{user[username]}")
filename_fmt = "{category}_{id}_{file[id]}.{extension}"
archive_fmt = "{id}_{file[id]}"
def __init__(self, match):
BaseExtractor.__init__(self, match)
self.api = MisskeyAPI(self)
self.instance = self.root.rpartition("://")[2]
self.item = match.group(match.lastindex)
self.renotes = self.config("renotes", False)
self.replies = self.config("replies", True)
def items(self):
for note in self.notes():
files = note.pop("files") or []
renote = note.get("renote")
if renote:
if not self.renotes:
self.log.debug("Skipping %s (renote)", note["id"])
continue
files.extend(renote.get("files") or ())
reply = note.get("reply")
if reply:
if not self.replies:
self.log.debug("Skipping %s (reply)", note["id"])
continue
files.extend(reply.get("files") or ())
note["instance"] = self.instance
note["instance_remote"] = note["user"]["host"]
note["count"] = len(files)
note["date"] = text.parse_datetime(
note["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z")
yield Message.Directory, note
for note["num"], file in enumerate(files, 1):
file["date"] = text.parse_datetime(
file["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z")
note["file"] = file
url = file["url"]
yield Message.Url, url, text.nameext_from_url(url, note)
def notes(self):
"""Return an iterable containing all relevant Note objects"""
return ()
BASE_PATTERN = MisskeyExtractor.update({
"misskey.io": {
"root": "https://misskey.io",
"pattern": r"misskey\.io",
},
"lesbian.energy": {
"root": "https://lesbian.energy",
"pattern": r"lesbian\.energy"
},
"sushi.ski": {
"root": "https://sushi.ski",
"pattern": r"sushi\.ski",
},
})
class MisskeyUserExtractor(MisskeyExtractor):
"""Extractor for all images of a Misskey user"""
subcategory = "user"
pattern = BASE_PATTERN + r"/@([^/?#]+)/?$"
test = (
("https://misskey.io/@lithla", {
"pattern": r"https://s\d+\.arkjp\.net/misskey/[\w-]+\.\w+",
"range": "1-50",
"count": 50,
}),
("https://misskey.io/@blooddj@pawoo.net", {
"range": "1-50",
"count": 50,
}),
("https://lesbian.energy/@rerorero", {
"pattern": r"https://lesbian.energy/files/\w+",
"range": "1-50",
"count": 50,
}),
("https://lesbian.energy/@nano@mk.yopo.work"),
("https://sushi.ski/@ui@misskey.04.si"),
)
def notes(self):
return self.api.users_notes(self.api.user_id_by_username(self.item))
class MisskeyFollowingExtractor(MisskeyExtractor):
"""Extractor for followed Misskey users"""
subcategory = "following"
pattern = BASE_PATTERN + r"/@([^/?#]+)/following"
test = (
("https://misskey.io/@blooddj@pawoo.net/following", {
"extractor": False,
"count": ">= 6",
}),
("https://sushi.ski/@hatusimo_sigure/following"),
)
def items(self):
user_id = self.api.user_id_by_username(self.item)
for user in self.api.users_following(user_id):
user = user["followee"]
url = self.root + "/@" + user["username"]
host = user["host"]
if host is not None:
url += "@" + host
user["_extractor"] = MisskeyUserExtractor
yield Message.Queue, url, user
class MisskeyNoteExtractor(MisskeyExtractor):
"""Extractor for images from a Note"""
subcategory = "note"
pattern = BASE_PATTERN + r"/notes/(\w+)"
test = (
("https://misskey.io/notes/9bhqfo835v", {
"pattern": r"https://s\d+\.arkjp\.net/misskey/[\w-]+\.\w+",
"count": 4,
}),
("https://misskey.io/notes/9brq7z1re6"),
("https://sushi.ski/notes/9bm3x4ksqw", {
"pattern": r"https://media\.sushi\.ski/files/[\w-]+\.png",
"count": 1,
}),
("https://lesbian.energy/notes/995ig09wqy", {
"count": 1,
}),
("https://lesbian.energy/notes/96ynd9w5kc"),
)
def notes(self):
return (self.api.notes_show(self.item),)
class MisskeyAPI():
"""Interface for Misskey API
https://github.com/misskey-dev/misskey
https://misskey-hub.net/en/docs/api/
https://misskey-hub.net/docs/api/endpoints.html
"""
def __init__(self, extractor):
self.root = extractor.root
self.extractor = extractor
self.headers = {"Content-Type": "application/json"}
def user_id_by_username(self, username):
endpoint = "/users/show"
data = {"username": username}
if "@" in username:
data["username"], _, data["host"] = username.partition("@")
return self._call(endpoint, data)["id"]
def users_following(self, user_id):
endpoint = "/users/following"
data = {"userId": user_id}
return self._pagination(endpoint, data)
def users_notes(self, user_id):
endpoint = "/users/notes"
data = {"userId": user_id}
return self._pagination(endpoint, data)
def notes_show(self, note_id):
endpoint = "/notes/show"
data = {"noteId": note_id}
return self._call(endpoint, data)
def _call(self, endpoint, data):
url = self.root + "/api" + endpoint
return self.extractor.request(
url, method="POST", headers=self.headers, json=data).json()
def _pagination(self, endpoint, data):
data["limit"] = 100
while True:
notes = self._call(endpoint, data)
if not notes:
return
yield from notes
data["untilId"] = notes[-1]["id"]

@ -140,6 +140,7 @@ SUBCATEGORY_MAP = {
"issue" : "Comic Issues", "issue" : "Comic Issues",
"manga" : "Manga", "manga" : "Manga",
"media" : "Media Files", "media" : "Media Files",
"note" : "Images from Notes",
"popular": "Popular Images", "popular": "Popular Images",
"recent" : "Recent Images", "recent" : "Recent Images",
"search" : "Search Results", "search" : "Search Results",

Loading…
Cancel
Save