From 7d6520e15dba1a50e7c3609293efe3ddd521aa75 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sun, 15 Sep 2024 19:39:54 +0200 Subject: [PATCH] [bluesky] support video downloads (#6183) --- docs/configuration.rst | 10 ++++ gallery_dl/extractor/bluesky.py | 93 +++++++++++++++++++++++---------- test/results/bluesky.py | 19 ++++++- 3 files changed, 92 insertions(+), 30 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index ad6ad07f..3574e2c3 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -1457,6 +1457,16 @@ Description Process reposts. +extractor.bluesky.videos +------------------------ +Type + ``bool`` +Default + ``true`` +Description + Download videos. + + extractor.bunkr.tlds -------------------- Type diff --git a/gallery_dl/extractor/bluesky.py b/gallery_dl/extractor/bluesky.py index c97bf65e..787d5b06 100644 --- a/gallery_dl/extractor/bluesky.py +++ b/gallery_dl/extractor/bluesky.py @@ -41,6 +41,7 @@ class BlueskyExtractor(Extractor): self.api = BlueskyAPI(self) self._user = self._user_did = None self.instance = self.root.partition("://")[2] + self.videos = self.config("videos", True) def items(self): for post in self.posts(): @@ -55,14 +56,6 @@ class BlueskyExtractor(Extractor): post.update(post["record"]) del post["record"] - images = () - if "embed" in post: - media = post["embed"] - if "media" in media: - media = media["media"] - if "images" in media: - images = media["images"] - if self._metadata_facets: if "facets" in post: post["hashtags"] = tags = [] @@ -82,44 +75,88 @@ class BlueskyExtractor(Extractor): if self._metadata_user: post["user"] = self._user or post["author"] + files = self._extract_files(post) post["instance"] = self.instance post["post_id"] = pid - post["count"] = len(images) + post["count"] = len(files) post["date"] = text.parse_datetime( post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S") yield Message.Directory, post - if not images: + if not files: continue + for post["num"], file in enumerate(files, 1): + post.update(file) + yield Message.Url, file["url"], post + + def posts(self): + return () + + def _extract_files(self, post): + if "embed" not in post: + return () + + files = [] + + media = post["embed"] + if "media" in media: + media = media["media"] + + if "images" in media: base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob" "?did={}&cid=".format(post["author"]["did"])) - post["num"] = 0 - - for file in images: - post["num"] += 1 - post["description"] = file["alt"] + for image in media["images"]: try: - aspect = file["aspectRatio"] - post["width"] = aspect["width"] - post["height"] = aspect["height"] + aspect = image["aspectRatio"] + width = aspect["width"] + height = aspect["height"] except KeyError: - post["width"] = post["height"] = 0 + width = height = 0 - image = file["image"] + data = image["image"] try: - cid = image["ref"]["$link"] + cid = data["ref"]["$link"] except KeyError: - cid = image["cid"] - post["filename"] = cid - post["extension"] = image["mimeType"].rpartition("/")[2] - - yield Message.Url, base + cid, post + cid = data["cid"] + + files.append({ + "description": image.get("alt"), + "width" : width, + "height" : height, + "filename" : cid, + "extension" : data["mimeType"].rpartition("/")[2], + "url" : base + cid, + }) + + if "video" in media and self.videos: + base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob" + "?did={}&cid=".format(post["author"]["did"])) + try: + aspect = media["aspectRatio"] + width = aspect["width"] + height = aspect["height"] + except KeyError: + width = height = 0 - def posts(self): - return () + video = media["video"] + try: + cid = video["ref"]["$link"] + except KeyError: + cid = video["cid"] + + files.append({ + "description": media.get("alt") or "", + "width" : width, + "height" : height, + "filename" : cid, + "extension" : video["mimeType"].rpartition("/")[2], + "url" : base + cid, + }) + + return files def _make_post(self, actor, kind): did = self.api._did_from_actor(actor) diff --git a/test/results/bluesky.py b/test/results/bluesky.py index 68815614..a30e4018 100644 --- a/test/results/bluesky.py +++ b/test/results/bluesky.py @@ -149,13 +149,13 @@ __tests__ = ( "user" : { "avatar" : str, "banner" : str, - "description" : "Official Bluesky account (check domain👆)\n\nFollow for updates and announcements", + "description" : str, "did" : "did:plc:z72i7hdynmk6r22z27h6tvur", "displayName" : "Bluesky", "followersCount": int, "followsCount" : int, "handle" : "bsky.app", - "indexedAt" : "2024-01-20T05:04:41.904Z", + "indexedAt" : "2024-08-30T21:49:26.737Z", "labels" : [], "postsCount" : int, }, @@ -212,4 +212,19 @@ __tests__ = ( "#urls" : "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:owc2r2dsewj3hk73rtd746zh&cid=bafkreieuhplc7fpbvi3suvacaf2dqxzvuu4hgl5o6eifqb76tf3uopldmi", }, +{ + "#url" : "https://bsky.app/profile/mikf.bsky.social/post/3l46q5glfex27", + "#comment" : "video (#6183)", + "#category": ("", "bluesky", "post"), + "#class" : bluesky.BlueskyPostExtractor, + "#urls" : "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:cslxjqkeexku6elp5xowxkq7&cid=bafkreihq2nsfocrnlpx4nykb4szouqszxwmy3ucnk4k46nx5t6hjnxlti4", + + "description": "kirby and reimu dance", + "text" : "video", + "width" : 1280, + "height" : 720, + "filename" : "bafkreihq2nsfocrnlpx4nykb4szouqszxwmy3ucnk4k46nx5t6hjnxlti4", + "extension" : "mp4", +}, + )