merge #3717: [misskey] add misskey extractors
commit
6762d99515
@ -0,0 +1,200 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
from .common import BaseExtractor, Message
|
||||
from .. import text
|
||||
|
||||
|
||||
class MisskeyExtractor(BaseExtractor):
|
||||
"""Base class for Misskey extractors"""
|
||||
basecategory = "misskey"
|
||||
directory_fmt = ("misskey", "{instance}", "{user[username]}")
|
||||
filename_fmt = "{category}_{id}_{file[id]}.{extension}"
|
||||
archive_fmt = "{id}_{file[id]}"
|
||||
|
||||
def __init__(self, match):
|
||||
BaseExtractor.__init__(self, match)
|
||||
self.api = MisskeyAPI(self)
|
||||
self.instance = self.root.rpartition("://")[2]
|
||||
self.item = match.group(match.lastindex)
|
||||
self.renotes = self.config("renotes", False)
|
||||
self.replies = self.config("replies", True)
|
||||
|
||||
def items(self):
|
||||
for note in self.notes():
|
||||
files = note.pop("files") or []
|
||||
renote = note.get("renote")
|
||||
if renote:
|
||||
if not self.renotes:
|
||||
self.log.debug("Skipping %s (renote)", note["id"])
|
||||
continue
|
||||
files.extend(renote.get("files") or ())
|
||||
|
||||
reply = note.get("reply")
|
||||
if reply:
|
||||
if not self.replies:
|
||||
self.log.debug("Skipping %s (reply)", note["id"])
|
||||
continue
|
||||
files.extend(reply.get("files") or ())
|
||||
|
||||
note["instance"] = self.instance
|
||||
note["instance_remote"] = note["user"]["host"]
|
||||
note["count"] = len(files)
|
||||
note["date"] = text.parse_datetime(
|
||||
note["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
|
||||
yield Message.Directory, note
|
||||
for note["num"], file in enumerate(files, 1):
|
||||
file["date"] = text.parse_datetime(
|
||||
file["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z")
|
||||
note["file"] = file
|
||||
url = file["url"]
|
||||
yield Message.Url, url, text.nameext_from_url(url, note)
|
||||
|
||||
def notes(self):
|
||||
"""Return an iterable containing all relevant Note objects"""
|
||||
return ()
|
||||
|
||||
|
||||
BASE_PATTERN = MisskeyExtractor.update({
|
||||
"misskey.io": {
|
||||
"root": "https://misskey.io",
|
||||
"pattern": r"misskey\.io",
|
||||
},
|
||||
"lesbian.energy": {
|
||||
"root": "https://lesbian.energy",
|
||||
"pattern": r"lesbian\.energy"
|
||||
},
|
||||
"sushi.ski": {
|
||||
"root": "https://sushi.ski",
|
||||
"pattern": r"sushi\.ski",
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
class MisskeyUserExtractor(MisskeyExtractor):
|
||||
"""Extractor for all images of a Misskey user"""
|
||||
subcategory = "user"
|
||||
pattern = BASE_PATTERN + r"/@([^/?#]+)/?$"
|
||||
test = (
|
||||
("https://misskey.io/@lithla", {
|
||||
"pattern": r"https://s\d+\.arkjp\.net/misskey/[\w-]+\.\w+",
|
||||
"range": "1-50",
|
||||
"count": 50,
|
||||
}),
|
||||
("https://misskey.io/@blooddj@pawoo.net", {
|
||||
"range": "1-50",
|
||||
"count": 50,
|
||||
}),
|
||||
("https://lesbian.energy/@rerorero", {
|
||||
"pattern": r"https://lesbian.energy/files/\w+",
|
||||
"range": "1-50",
|
||||
"count": 50,
|
||||
}),
|
||||
("https://lesbian.energy/@nano@mk.yopo.work"),
|
||||
("https://sushi.ski/@ui@misskey.04.si"),
|
||||
)
|
||||
|
||||
def notes(self):
|
||||
return self.api.users_notes(self.api.user_id_by_username(self.item))
|
||||
|
||||
|
||||
class MisskeyFollowingExtractor(MisskeyExtractor):
|
||||
"""Extractor for followed Misskey users"""
|
||||
subcategory = "following"
|
||||
pattern = BASE_PATTERN + r"/@([^/?#]+)/following"
|
||||
test = (
|
||||
("https://misskey.io/@blooddj@pawoo.net/following", {
|
||||
"extractor": False,
|
||||
"count": ">= 6",
|
||||
}),
|
||||
("https://sushi.ski/@hatusimo_sigure/following"),
|
||||
)
|
||||
|
||||
def items(self):
|
||||
user_id = self.api.user_id_by_username(self.item)
|
||||
for user in self.api.users_following(user_id):
|
||||
user = user["followee"]
|
||||
url = self.root + "/@" + user["username"]
|
||||
host = user["host"]
|
||||
if host is not None:
|
||||
url += "@" + host
|
||||
user["_extractor"] = MisskeyUserExtractor
|
||||
yield Message.Queue, url, user
|
||||
|
||||
|
||||
class MisskeyNoteExtractor(MisskeyExtractor):
|
||||
"""Extractor for images from a Note"""
|
||||
subcategory = "note"
|
||||
pattern = BASE_PATTERN + r"/notes/(\w+)"
|
||||
test = (
|
||||
("https://misskey.io/notes/9bhqfo835v", {
|
||||
"pattern": r"https://s\d+\.arkjp\.net/misskey/[\w-]+\.\w+",
|
||||
"count": 4,
|
||||
}),
|
||||
("https://misskey.io/notes/9brq7z1re6"),
|
||||
("https://sushi.ski/notes/9bm3x4ksqw", {
|
||||
"pattern": r"https://media\.sushi\.ski/files/[\w-]+\.png",
|
||||
"count": 1,
|
||||
}),
|
||||
("https://lesbian.energy/notes/995ig09wqy", {
|
||||
"count": 1,
|
||||
}),
|
||||
("https://lesbian.energy/notes/96ynd9w5kc"),
|
||||
)
|
||||
|
||||
def notes(self):
|
||||
return (self.api.notes_show(self.item),)
|
||||
|
||||
|
||||
class MisskeyAPI():
|
||||
"""Interface for Misskey API
|
||||
|
||||
https://github.com/misskey-dev/misskey
|
||||
https://misskey-hub.net/en/docs/api/
|
||||
https://misskey-hub.net/docs/api/endpoints.html
|
||||
"""
|
||||
|
||||
def __init__(self, extractor):
|
||||
self.root = extractor.root
|
||||
self.extractor = extractor
|
||||
self.headers = {"Content-Type": "application/json"}
|
||||
|
||||
def user_id_by_username(self, username):
|
||||
endpoint = "/users/show"
|
||||
data = {"username": username}
|
||||
if "@" in username:
|
||||
data["username"], _, data["host"] = username.partition("@")
|
||||
return self._call(endpoint, data)["id"]
|
||||
|
||||
def users_following(self, user_id):
|
||||
endpoint = "/users/following"
|
||||
data = {"userId": user_id}
|
||||
return self._pagination(endpoint, data)
|
||||
|
||||
def users_notes(self, user_id):
|
||||
endpoint = "/users/notes"
|
||||
data = {"userId": user_id}
|
||||
return self._pagination(endpoint, data)
|
||||
|
||||
def notes_show(self, note_id):
|
||||
endpoint = "/notes/show"
|
||||
data = {"noteId": note_id}
|
||||
return self._call(endpoint, data)
|
||||
|
||||
def _call(self, endpoint, data):
|
||||
url = self.root + "/api" + endpoint
|
||||
return self.extractor.request(
|
||||
url, method="POST", headers=self.headers, json=data).json()
|
||||
|
||||
def _pagination(self, endpoint, data):
|
||||
data["limit"] = 100
|
||||
while True:
|
||||
notes = self._call(endpoint, data)
|
||||
if not notes:
|
||||
return
|
||||
yield from notes
|
||||
data["untilId"] = notes[-1]["id"]
|
Loading…
Reference in new issue