diff --git a/docs/configuration.rst b/docs/configuration.rst index a59bf3ad..d3ffb031 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -2029,6 +2029,26 @@ Description Also emit metadata for text-only posts without media content. +extractor.[misskey].renotes +---------------------------- +Type + ``bool`` +Default + ``false`` +Description + Fetch media from renoted notes. + + +extractor.[misskey].replies +---------------------------- +Type + ``bool`` +Default + ``true`` +Description + Fetch media from replies to other notes. + + extractor.nana.favkey --------------------- Type diff --git a/docs/supportedsites.md b/docs/supportedsites.md index c991c32c..220ce0f2 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -1126,6 +1126,28 @@ Consider all sites to be NSFW unless otherwise known. + + Misskey Instances + + + Misskey.io + https://misskey.io/ + Images from Notes, User Profiles + + + + Lesbian.energy + https://lesbian.energy/ + Images from Notes, User Profiles + + + + Sushi.ski + https://sushi.ski/ + Images from Notes, User Profiles + + + Nijie Instances diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 6f660580..3968d727 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -94,6 +94,7 @@ modules = [ "mangasee", "mangoxo", "mememuseum", + "misskey", "myhentaigallery", "myportfolio", "nana", diff --git a/gallery_dl/extractor/misskey.py b/gallery_dl/extractor/misskey.py new file mode 100644 index 00000000..06fccc7d --- /dev/null +++ b/gallery_dl/extractor/misskey.py @@ -0,0 +1,200 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from .common import BaseExtractor, Message +from .. import text + + +class MisskeyExtractor(BaseExtractor): + """Base class for Misskey extractors""" + basecategory = "misskey" + directory_fmt = ("misskey", "{instance}", "{user[username]}") + filename_fmt = "{category}_{id}_{file[id]}.{extension}" + archive_fmt = "{id}_{file[id]}" + + def __init__(self, match): + BaseExtractor.__init__(self, match) + self.api = MisskeyAPI(self) + self.instance = self.root.rpartition("://")[2] + self.item = match.group(match.lastindex) + self.renotes = self.config("renotes", False) + self.replies = self.config("replies", True) + + def items(self): + for note in self.notes(): + files = note.pop("files") or [] + renote = note.get("renote") + if renote: + if not self.renotes: + self.log.debug("Skipping %s (renote)", note["id"]) + continue + files.extend(renote.get("files") or ()) + + reply = note.get("reply") + if reply: + if not self.replies: + self.log.debug("Skipping %s (reply)", note["id"]) + continue + files.extend(reply.get("files") or ()) + + note["instance"] = self.instance + note["instance_remote"] = note["user"]["host"] + note["count"] = len(files) + note["date"] = text.parse_datetime( + note["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z") + + yield Message.Directory, note + for note["num"], file in enumerate(files, 1): + file["date"] = text.parse_datetime( + file["createdAt"], "%Y-%m-%dT%H:%M:%S.%f%z") + note["file"] = file + url = file["url"] + yield Message.Url, url, text.nameext_from_url(url, note) + + def notes(self): + """Return an iterable containing all relevant Note objects""" + return () + + +BASE_PATTERN = MisskeyExtractor.update({ + "misskey.io": { + "root": "https://misskey.io", + "pattern": r"misskey\.io", + }, + "lesbian.energy": { + "root": "https://lesbian.energy", + "pattern": r"lesbian\.energy" + }, + "sushi.ski": { + "root": "https://sushi.ski", + "pattern": r"sushi\.ski", + }, +}) + + +class MisskeyUserExtractor(MisskeyExtractor): + """Extractor for all images of a Misskey user""" + subcategory = "user" + pattern = BASE_PATTERN + r"/@([^/?#]+)/?$" + test = ( + ("https://misskey.io/@lithla", { + "pattern": r"https://s\d+\.arkjp\.net/misskey/[\w-]+\.\w+", + "range": "1-50", + "count": 50, + }), + ("https://misskey.io/@blooddj@pawoo.net", { + "range": "1-50", + "count": 50, + }), + ("https://lesbian.energy/@rerorero", { + "pattern": r"https://lesbian.energy/files/\w+", + "range": "1-50", + "count": 50, + }), + ("https://lesbian.energy/@nano@mk.yopo.work"), + ("https://sushi.ski/@ui@misskey.04.si"), + ) + + def notes(self): + return self.api.users_notes(self.api.user_id_by_username(self.item)) + + +class MisskeyFollowingExtractor(MisskeyExtractor): + """Extractor for followed Misskey users""" + subcategory = "following" + pattern = BASE_PATTERN + r"/@([^/?#]+)/following" + test = ( + ("https://misskey.io/@blooddj@pawoo.net/following", { + "extractor": False, + "count": ">= 6", + }), + ("https://sushi.ski/@hatusimo_sigure/following"), + ) + + def items(self): + user_id = self.api.user_id_by_username(self.item) + for user in self.api.users_following(user_id): + user = user["followee"] + url = self.root + "/@" + user["username"] + host = user["host"] + if host is not None: + url += "@" + host + user["_extractor"] = MisskeyUserExtractor + yield Message.Queue, url, user + + +class MisskeyNoteExtractor(MisskeyExtractor): + """Extractor for images from a Note""" + subcategory = "note" + pattern = BASE_PATTERN + r"/notes/(\w+)" + test = ( + ("https://misskey.io/notes/9bhqfo835v", { + "pattern": r"https://s\d+\.arkjp\.net/misskey/[\w-]+\.\w+", + "count": 4, + }), + ("https://misskey.io/notes/9brq7z1re6"), + ("https://sushi.ski/notes/9bm3x4ksqw", { + "pattern": r"https://media\.sushi\.ski/files/[\w-]+\.png", + "count": 1, + }), + ("https://lesbian.energy/notes/995ig09wqy", { + "count": 1, + }), + ("https://lesbian.energy/notes/96ynd9w5kc"), + ) + + def notes(self): + return (self.api.notes_show(self.item),) + + +class MisskeyAPI(): + """Interface for Misskey API + + https://github.com/misskey-dev/misskey + https://misskey-hub.net/en/docs/api/ + https://misskey-hub.net/docs/api/endpoints.html + """ + + def __init__(self, extractor): + self.root = extractor.root + self.extractor = extractor + self.headers = {"Content-Type": "application/json"} + + def user_id_by_username(self, username): + endpoint = "/users/show" + data = {"username": username} + if "@" in username: + data["username"], _, data["host"] = username.partition("@") + return self._call(endpoint, data)["id"] + + def users_following(self, user_id): + endpoint = "/users/following" + data = {"userId": user_id} + return self._pagination(endpoint, data) + + def users_notes(self, user_id): + endpoint = "/users/notes" + data = {"userId": user_id} + return self._pagination(endpoint, data) + + def notes_show(self, note_id): + endpoint = "/notes/show" + data = {"noteId": note_id} + return self._call(endpoint, data) + + def _call(self, endpoint, data): + url = self.root + "/api" + endpoint + return self.extractor.request( + url, method="POST", headers=self.headers, json=data).json() + + def _pagination(self, endpoint, data): + data["limit"] = 100 + while True: + notes = self._call(endpoint, data) + if not notes: + return + yield from notes + data["untilId"] = notes[-1]["id"] diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index fe647dc2..ff75c6c8 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -140,6 +140,7 @@ SUBCATEGORY_MAP = { "issue" : "Comic Issues", "manga" : "Manga", "media" : "Media Files", + "note" : "Images from Notes", "popular": "Popular Images", "recent" : "Recent Images", "search" : "Search Results",