|
|
|
@ -104,16 +104,16 @@ class TumblrExtractor(Extractor):
|
|
|
|
|
if "video_url" in post: # type: "video"
|
|
|
|
|
yield self._prepare(_original_video(post["video_url"]), post)
|
|
|
|
|
|
|
|
|
|
if self.inline: # inline media
|
|
|
|
|
for key in ("body", "description", "source"):
|
|
|
|
|
if key not in post:
|
|
|
|
|
continue
|
|
|
|
|
for url in re.findall('<img src="([^"]+)"', post[key]):
|
|
|
|
|
url = _original_inline_image(url)
|
|
|
|
|
yield self._prepare_image(url, post)
|
|
|
|
|
for url in re.findall('<source src="([^"]+)"', post[key]):
|
|
|
|
|
url = _original_video(url)
|
|
|
|
|
yield self._prepare(url, post)
|
|
|
|
|
if self.inline and "reblog" in post: # inline media
|
|
|
|
|
# only "chat" posts are missing a "reblog" key in their
|
|
|
|
|
# API response, but they can't contain images/videos anyway
|
|
|
|
|
body = post["reblog"]["comment"]
|
|
|
|
|
for url in re.findall('<img src="([^"]+)"', body):
|
|
|
|
|
url = _original_inline_image(url)
|
|
|
|
|
yield self._prepare_image(url, post)
|
|
|
|
|
for url in re.findall('<source src="([^"]+)"', body):
|
|
|
|
|
url = _original_video(url)
|
|
|
|
|
yield self._prepare(url, post)
|
|
|
|
|
|
|
|
|
|
if self.external: # external links
|
|
|
|
|
post["extension"] = None
|
|
|
|
|