[reddit] download preview for 404ed imgur links (#4322)

This is a pretty ugly hack as the internal infrastructure doesn't
really support switching from external URL to regular download in
case the former fails, but it kind of works ...

Can be disabled by setting 'reddit.fallback' to 'false'.
pull/4489/head
Mike Fährmann 1 year ago
parent d12a5e440a
commit 14af15bd18
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -108,7 +108,11 @@ class RedditExtractor(Extractor):
if match:
extra.append(match.group(1))
elif not match_user(url) and not match_subreddit(url):
if "preview" in data:
data["_fallback"] = self._previews(data)
yield Message.Queue, text.unescape(url), data
if "_fallback" in data:
del data["_fallback"]
if not extra or depth == max_depth:
return
@ -165,6 +169,13 @@ class RedditExtractor(Extractor):
submission["_ytdl_extra"] = {"title": submission["title"]}
return submission["url"]
def _previews(self, post):
try:
for image in post["preview"]["images"]:
yield image["source"]["url"]
except Exception as exc:
self.log.debug("%s: %s", exc.__class__.__name__, exc)
class RedditSubredditExtractor(RedditExtractor):
"""Extractor for URLs from subreddits on reddit.com"""

@ -388,10 +388,23 @@ class DownloadJob(Job):
try:
if pextr.config("parent-skip"):
job._skipcnt = self._skipcnt
self.status |= job.run()
status = job.run()
self._skipcnt = job._skipcnt
else:
self.status |= job.run()
status = job.run()
if status:
self.status |= status
if "_fallback" in kwdict and self.fallback:
fallback = kwdict["_fallback"] = \
iter(kwdict["_fallback"])
try:
url = next(fallback)
except StopIteration:
pass
else:
text.nameext_from_url(url, kwdict)
self.handle_url(url, kwdict)
break
except exception.RestartExtraction:
pass

Loading…
Cancel
Save