From df1c643ddab57c64e735be3b9c4d4a547de8be45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 10 Aug 2022 20:01:46 +0200 Subject: [PATCH] [tumblr] attempt to extract full-resolution photos - for photos with apparent width == 2048 or height == 3072 - can be disabled with 'original' option --- docs/configuration.rst | 14 ++++++++++++++ docs/gallery-dl.conf | 1 + gallery_dl/extractor/tumblr.py | 18 ++++++++++++++++++ 3 files changed, 33 insertions(+) diff --git a/docs/configuration.rst b/docs/configuration.rst index a6318378..76a4537a 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -2237,6 +2237,20 @@ Description Search posts for inline images and videos. +extractor.tumblr.original +------------------------- +Type + ``bool`` +Default + ``true`` +Description + Download full-resolution ``photo`` images. + + For each photo with "maximum" resolution + (width equal to 2048 or height equal to 3072), + use an extra HTTP request to find the URL to its full-resolution version. + + extractor.tumblr.reblogs ------------------------ Type diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index be881be5..bbd4dc09 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -284,6 +284,7 @@ "external": false, "inline": true, "posts": "all", + "original": true, "reblogs": true }, "twitter": diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index cf999987..6d7c9405 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -64,6 +64,7 @@ class TumblrExtractor(Extractor): self.inline = self.config("inline", True) self.reblogs = self.config("reblogs", True) self.external = self.config("external", False) + self.original = self.config("original", True) if len(self.types) == 1: self.api.posts_type = next(iter(self.types)) @@ -110,12 +111,17 @@ class TumblrExtractor(Extractor): for photo in photos: post["photo"] = photo + best_photo = photo["original_size"] for alt_photo in photo["alt_sizes"]: if (alt_photo["height"] > best_photo["height"] or alt_photo["width"] > best_photo["width"]): best_photo = alt_photo photo.update(best_photo) + + if "/s2048x3072/" in photo["url"] and self.original: + photo["url"] = self._original_image(photo["url"]) + del photo["original_size"] del photo["alt_sizes"] yield self._prepare_image(photo["url"], post) @@ -205,6 +211,12 @@ class TumblrExtractor(Extractor): def _skip_reblog_same_blog(self, post): return self.blog != post.get("reblogged_root_uuid") + def _original_image(self, url): + url = url.replace("/s2048x3072/", "/s99999x99999/", 1) + headers = {"Accept": "text/html,*/*;q=0.8"} + response = self.request(url, headers=headers) + return text.extract(response.text, '" src="', '"')[0] + class TumblrUserExtractor(TumblrExtractor): """Extractor for all images from a tumblr-user""" @@ -284,6 +296,12 @@ class TumblrPostExtractor(TumblrExtractor): ("https://mikf123.tumblr.com/post/181022380064/chat-post", { "count": 0, }), + ("https://mikf123.tumblr.com/image/689860196535762944", { + "pattern": r"^https://\d+\.media\.tumblr\.com" + r"/134791621559a79793563b636b5fe2c6" + r"/8f1131551cef6e74-bc/s99999x99999" + r"/188cf9b8915b0d0911c6c743d152fc62e8f38491\.png$", + }), ("http://ziemniax.tumblr.com/post/109697912859/", { "exception": exception.NotFoundError, # HTML response (#297) }),