[bluesky] support video downloads (#6183)

pull/4791/merge
Mike Fährmann 4 days ago
parent af8cba089e
commit 7d6520e15d
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -1457,6 +1457,16 @@ Description
Process reposts.
extractor.bluesky.videos
------------------------
Type
``bool``
Default
``true``
Description
Download videos.
extractor.bunkr.tlds
--------------------
Type

@ -41,6 +41,7 @@ class BlueskyExtractor(Extractor):
self.api = BlueskyAPI(self)
self._user = self._user_did = None
self.instance = self.root.partition("://")[2]
self.videos = self.config("videos", True)
def items(self):
for post in self.posts():
@ -55,14 +56,6 @@ class BlueskyExtractor(Extractor):
post.update(post["record"])
del post["record"]
images = ()
if "embed" in post:
media = post["embed"]
if "media" in media:
media = media["media"]
if "images" in media:
images = media["images"]
if self._metadata_facets:
if "facets" in post:
post["hashtags"] = tags = []
@ -82,44 +75,88 @@ class BlueskyExtractor(Extractor):
if self._metadata_user:
post["user"] = self._user or post["author"]
files = self._extract_files(post)
post["instance"] = self.instance
post["post_id"] = pid
post["count"] = len(images)
post["count"] = len(files)
post["date"] = text.parse_datetime(
post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
yield Message.Directory, post
if not images:
if not files:
continue
for post["num"], file in enumerate(files, 1):
post.update(file)
yield Message.Url, file["url"], post
def posts(self):
return ()
def _extract_files(self, post):
if "embed" not in post:
return ()
files = []
media = post["embed"]
if "media" in media:
media = media["media"]
if "images" in media:
base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob"
"?did={}&cid=".format(post["author"]["did"]))
post["num"] = 0
for file in images:
post["num"] += 1
post["description"] = file["alt"]
for image in media["images"]:
try:
aspect = file["aspectRatio"]
post["width"] = aspect["width"]
post["height"] = aspect["height"]
aspect = image["aspectRatio"]
width = aspect["width"]
height = aspect["height"]
except KeyError:
post["width"] = post["height"] = 0
width = height = 0
image = file["image"]
data = image["image"]
try:
cid = image["ref"]["$link"]
cid = data["ref"]["$link"]
except KeyError:
cid = image["cid"]
post["filename"] = cid
post["extension"] = image["mimeType"].rpartition("/")[2]
yield Message.Url, base + cid, post
cid = data["cid"]
files.append({
"description": image.get("alt"),
"width" : width,
"height" : height,
"filename" : cid,
"extension" : data["mimeType"].rpartition("/")[2],
"url" : base + cid,
})
if "video" in media and self.videos:
base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob"
"?did={}&cid=".format(post["author"]["did"]))
try:
aspect = media["aspectRatio"]
width = aspect["width"]
height = aspect["height"]
except KeyError:
width = height = 0
def posts(self):
return ()
video = media["video"]
try:
cid = video["ref"]["$link"]
except KeyError:
cid = video["cid"]
files.append({
"description": media.get("alt") or "",
"width" : width,
"height" : height,
"filename" : cid,
"extension" : video["mimeType"].rpartition("/")[2],
"url" : base + cid,
})
return files
def _make_post(self, actor, kind):
did = self.api._did_from_actor(actor)

@ -149,13 +149,13 @@ __tests__ = (
"user" : {
"avatar" : str,
"banner" : str,
"description" : "Official Bluesky account (check domain👆)\n\nFollow for updates and announcements",
"description" : str,
"did" : "did:plc:z72i7hdynmk6r22z27h6tvur",
"displayName" : "Bluesky",
"followersCount": int,
"followsCount" : int,
"handle" : "bsky.app",
"indexedAt" : "2024-01-20T05:04:41.904Z",
"indexedAt" : "2024-08-30T21:49:26.737Z",
"labels" : [],
"postsCount" : int,
},
@ -212,4 +212,19 @@ __tests__ = (
"#urls" : "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:owc2r2dsewj3hk73rtd746zh&cid=bafkreieuhplc7fpbvi3suvacaf2dqxzvuu4hgl5o6eifqb76tf3uopldmi",
},
{
"#url" : "https://bsky.app/profile/mikf.bsky.social/post/3l46q5glfex27",
"#comment" : "video (#6183)",
"#category": ("", "bluesky", "post"),
"#class" : bluesky.BlueskyPostExtractor,
"#urls" : "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:cslxjqkeexku6elp5xowxkq7&cid=bafkreihq2nsfocrnlpx4nykb4szouqszxwmy3ucnk4k46nx5t6hjnxlti4",
"description": "kirby and reimu dance",
"text" : "video",
"width" : 1280,
"height" : 720,
"filename" : "bafkreihq2nsfocrnlpx4nykb4szouqszxwmy3ucnk4k46nx5t6hjnxlti4",
"extension" : "mp4",
},
)

Loading…
Cancel
Save