From a666ddd16b8b3bd38a94a15d27060ba0d4dcf8b7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= <mike_faehrmann@web.de>
Date: Fri, 7 Sep 2018 18:32:45 +0200
Subject: [PATCH] [tumblr] extend 'reblogs' functionality (#103)

Setting 'reblogs' to "deleted" will check if the parent post of a
reblog has been deleted and download its media content if that is the
case, otherwise it will be skipped.

This is a rather costly operation (1 API request per reblogged post)
and should therefore be used with care.
---
 docs/configuration.rst         | 10 +++++++---
 gallery_dl/extractor/tumblr.py | 15 ++++++++++++++-
 2 files changed, 21 insertions(+), 4 deletions(-)

diff --git a/docs/configuration.rst b/docs/configuration.rst
index d693b336..9cd86d8f 100644
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@@ -614,16 +614,20 @@ extractor.tumblr.inline
 =========== =====
 Type        ``bool``
 Default     ``false``
-Description Search posts for inline images.
+Description Search posts for inline images and videos.
 =========== =====
 
 
 extractor.tumblr.reblogs
 ------------------------
 =========== =====
-Type        ``bool``
+Type        ``bool`` or ``string``
 Default     ``true``
-Description Extract images from reblogged posts.
+Description * ``true``: Extract media from reblogged posts
+            * ``false``: Skip reblogged posts
+            * ``"deleted"``: Skip reblogged posts, but download from them
+              anyway if the parent post has been deleted
+              (requires 1 additional API request per reblogged post)
 =========== =====
 
 
diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py
index e6eb79d1..68aa14a2 100644
--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@@ -73,7 +73,7 @@ class TumblrExtractor(Extractor):
                 yield Message.Directory, blog.copy()
 
             reblog = "reblogged_from_id" in post
-            if reblog and not self.reblogs:
+            if reblog and self._skip_reblog(post):
                 continue
             post["reblogged"] = reblog
 
@@ -158,6 +158,19 @@ class TumblrExtractor(Extractor):
 
         return Message.Url, url, post
 
+    def _skip_reblog(self, post):
+        if self.reblogs != "deleted":
+            return not self.reblogs
+        match = re.match(
+            TumblrPostExtractor.pattern[0], post["reblogged_root_url"])
+        if match:
+            blog = match.group(1) or match.group(2)
+            try:
+                next(self.api.posts(blog, {"id": match.group(3)}))
+            except exception.NotFoundError:
+                return False
+        return True
+
 
 class TumblrUserExtractor(TumblrExtractor):
     """Extractor for all images from a tumblr-user"""