[tumblr] improve inline extraction (#137)

server
Mike Fährmann 6 years ago
parent 010da8372a
commit b7a9f6cc49
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -104,16 +104,16 @@ class TumblrExtractor(Extractor):
if "video_url" in post: # type: "video"
yield self._prepare(_original_video(post["video_url"]), post)
if self.inline: # inline media
for key in ("body", "description", "source"):
if key not in post:
continue
for url in re.findall('<img src="([^"]+)"', post[key]):
url = _original_inline_image(url)
yield self._prepare_image(url, post)
for url in re.findall('<source src="([^"]+)"', post[key]):
url = _original_video(url)
yield self._prepare(url, post)
if self.inline and "reblog" in post: # inline media
# only "chat" posts are missing a "reblog" key in their
# API response, but they can't contain images/videos anyway
body = post["reblog"]["comment"]
for url in re.findall('<img src="([^"]+)"', body):
url = _original_inline_image(url)
yield self._prepare_image(url, post)
for url in re.findall('<source src="([^"]+)"', body):
url = _original_video(url)
yield self._prepare(url, post)
if self.external: # external links
post["extension"] = None

Loading…
Cancel
Save