[tumblr] add fallback for failed higher-resolution images (#2957)

pull/3003/head
Mike Fährmann 2 years ago
parent 6992d01e19
commit f728b5ca06
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -116,15 +116,17 @@ class TumblrExtractor(Extractor):
if self.original and "/s2048x3072/" in photo["url"] and ( if self.original and "/s2048x3072/" in photo["url"] and (
photo["width"] == 2048 or photo["height"] == 3072): photo["width"] == 2048 or photo["height"] == 3072):
try: photo["url"], fb = self._original_photo(photo["url"])
photo["url"] = self._original_photo(photo["url"]) if fb:
except Exception: post["_fallback"] = self._original_image_fallback(
self._warn_original(photo["url"], post) photo["url"], post["id"])
del photo["original_size"] del photo["original_size"]
del photo["alt_sizes"] del photo["alt_sizes"]
posts.append( posts.append(
self._prepare_image(photo["url"], post.copy())) self._prepare_image(photo["url"], post.copy()))
del post["photo"] del post["photo"]
post.pop("_fallback", None)
url = post.get("audio_url") # type "audio" url = post.get("audio_url") # type "audio"
if url and url.startswith("https://a.tumblr.com/"): if url and url.startswith("https://a.tumblr.com/"):
@ -140,11 +142,12 @@ class TumblrExtractor(Extractor):
# API response, but they can't contain images/videos anyway # API response, but they can't contain images/videos anyway
body = post["reblog"]["comment"] + post["reblog"]["tree_html"] body = post["reblog"]["comment"] + post["reblog"]["tree_html"]
for url in _findall_image(body): for url in _findall_image(body):
try: url, fb = self._original_inline_image(url)
url = self._original_inline_image(url) if fb:
except Exception: post["_fallback"] = self._original_image_fallback(
self._warn_original(url, post) url, post["id"])
posts.append(self._prepare_image(url, post.copy())) posts.append(self._prepare_image(url, post.copy()))
post.pop("_fallback", None)
for url in _findall_video(body): for url in _findall_video(body):
url = self._original_video(url) url = self._original_video(url)
posts.append(self._prepare(url, post.copy())) posts.append(self._prepare(url, post.copy()))
@ -231,25 +234,23 @@ class TumblrExtractor(Extractor):
resized, n = self._subn_orig_image("/s99999x99999/", url, 1) resized, n = self._subn_orig_image("/s99999x99999/", url, 1)
if n: if n:
return self._update_image_token(resized) return self._update_image_token(resized)
return self._sub_image(r"https://\1_1280.\2", url) return self._sub_image(r"https://\1_1280.\2", url), False
def _original_video(self, url): def _original_video(self, url):
return self._sub_video(r"https://\1.\2", url) return self._sub_video(r"https://\1.\2", url)
def _update_image_token(self, resized): def _update_image_token(self, resized):
headers = {"Accept": "text/html,*/*;q=0.8"} headers = {"Accept": "text/html,*/*;q=0.8"}
response = self.request(resized, headers=headers)
for _ in range(3): updated = text.extract(response.text, '" src="', '"')[0]
response = self.request(resized, headers=headers) return updated, (resized == updated)
updated = text.extract(response.text, '" src="', '"')[0]
if updated != resized: def _original_image_fallback(self, url, post_id):
return updated yield self._update_image_token(url)[0]
yield self._update_image_token(url)[0]
raise RuntimeError("invalid token") yield self._update_image_token(url)[0]
def _warn_original(self, url, post):
self.log.warning("Unable to fetch higher-resolution " self.log.warning("Unable to fetch higher-resolution "
"version of %s (%s)", url, post["id"]) "version of %s (%s)", url, post_id)
class TumblrUserExtractor(TumblrExtractor): class TumblrUserExtractor(TumblrExtractor):

Loading…
Cancel
Save