[bluesky] download images in original resolution (#4438)

at least up to 2000 px
pull/5195/head
Mike Fährmann 7 months ago
parent 6414dc6bca
commit 55bbd49a0e
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -20,7 +20,7 @@ class BlueskyExtractor(Extractor):
"""Base class for bluesky extractors"""
category = "bluesky"
directory_fmt = ("{category}", "{author[handle]}")
filename_fmt = "{indexedAt[:19]}_{post_id}_{num}.{extension}"
filename_fmt = "{createdAt[:19]}_{post_id}_{num}.{extension}"
archive_fmt = "{filename}"
root = "https://bsky.app"
@ -34,20 +34,31 @@ class BlueskyExtractor(Extractor):
def items(self):
for post in self.posts():
post = post["post"]
post.update(post["record"])
del post["record"]
try:
images = post["embed"]["images"]
except KeyError:
images = ()
images = ()
if "embed" in post:
media = post["embed"]
if "media" in media:
media = media["media"]
if "images" in media:
images = media["images"]
post["post_id"] = post["uri"].rpartition("/")[2]
post["count"] = len(images)
post["date"] = text.parse_datetime(
post["indexedAt"][:19], "%Y-%m-%dT%H:%M:%S")
post["createdAt"][:19], "%Y-%m-%dT%H:%M:%S")
yield Message.Directory, post
if not images:
continue
base = ("https://bsky.social/xrpc/com.atproto.sync.getBlob"
"?did={}&cid=".format(post["author"]["did"]))
post["num"] = 0
for file in images:
post["num"] += 1
post["description"] = file["alt"]
@ -59,11 +70,11 @@ class BlueskyExtractor(Extractor):
except KeyError:
post["width"] = post["height"] = 0
url = file["fullsize"]
name = url.rpartition("/")[2]
post["filename"], _, post["extension"] = name.rpartition("@")
image = file["image"]
post["filename"] = link = image["ref"]["$link"]
post["extension"] = image["mimeType"].rpartition("/")[2]
yield Message.Url, url, post
yield Message.Url, base + link, post
def posts(self):
return ()

@ -12,8 +12,8 @@ __tests__ = (
"#url" : "https://bsky.app/profile/bsky.app/post/3kh5rarr3gn2n",
"#category": ("", "bluesky", "post"),
"#class" : bluesky.BlueskyPostExtractor,
"#urls" : "https://cdn.bsky.app/img/feed_fullsize/plain/did:plc:z72i7hdynmk6r22z27h6tvur/bafkreidypzoaybmfj5h7pnpiyct6ng5yae6ydp4czrm72ocg7ev6vbirri@jpeg",
"#sha1_content": "c36a27d135277dc08b7bfd289e4078af7b32c720",
"#urls" : "https://bsky.social/xrpc/com.atproto.sync.getBlob?did=did:plc:z72i7hdynmk6r22z27h6tvur&cid=bafkreidypzoaybmfj5h7pnpiyct6ng5yae6ydp4czrm72ocg7ev6vbirri",
"#sha1_content": "ffcf25e7c511173a12de5276b85903309fcd8d14",
"author": {
"avatar" : "https://cdn.bsky.app/img/avatar/plain/did:plc:z72i7hdynmk6r22z27h6tvur/bafkreihagr2cmvl2jt4mgx3sppwe2it3fwolkrbtjrhcnwjk4jdijhsoze@jpeg",
@ -24,6 +24,7 @@ __tests__ = (
},
"cid" : "bafyreihh7m6bfrwlcjfklwturmja7qfse5gte7lskpmgw76flivimbnoqm",
"count" : 1,
"createdAt" : "2023-12-22T18:58:32.715Z",
"date" : "dt:2023-12-22 18:58:32",
"description": "The bluesky logo with the blue butterfly",
"extension" : "jpeg",

Loading…
Cancel
Save