diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index aeb06c56..96056911 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -15,12 +15,20 @@ import re def _original_image(url): - if url.endswith(".gif") and "_inline_" in url: - return url - return re.sub( - (r"https?://\d+\.media\.tumblr\.com" - r"/([0-9a-f]+/tumblr_[^/?&#.]+)_\d+\.([0-9a-z]+)"), - r"https://s3.amazonaws.com/data.tumblr.com/\1_raw.\2", url + match = re.match( + r"https?://\d+\.media\.tumblr\.com" + r"((/[0-9a-f]+)?/tumblr_[^/?&#.]+)_\d+\.([0-9a-z]+)", + url) + + if not match: + return (url,) + root = "https://s3.amazonaws.com/data.tumblr.com" + path, key, ext = match.groups() + + return ( + "".join((root, path, "_raw." if key else "_1280.", ext)), + "".join((root, path, "_500.", ext)), + url, ) @@ -90,7 +98,7 @@ class TumblrExtractor(Extractor): photo.update(photo["original_size"]) del photo["original_size"] del photo["alt_sizes"] - yield self._prepare(_original_image(photo["url"]), post) + yield self._prepare_image(photo["url"], post) if "audio_url" in post: # type: "audio" yield self._prepare(post["audio_url"], post) @@ -102,7 +110,7 @@ class TumblrExtractor(Extractor): for key in ("body", "description"): if key in post: for url in re.findall('