[tumblr] extend 'reblogs' functionality (#103)

Setting 'reblogs' to "deleted" will check if the parent post of a reblog has been deleted and download its media content if that is the case, otherwise it will be skipped. This is a rather costly operation (1 API request per reblogged post) and should therefore be used with care.
6 years ago · a666ddd16b
parent c9b8e6aefc
commit a666ddd16b
2 changed files with 21 additions and 4 deletions
--- a/docs/configuration.rst
+++ b/docs/configuration.rst
@ -614,16 +614,20 @@ extractor.tumblr.inline
 =========== =====
 Type        ``bool``
 Default     ``false``
-Description Search posts for inline images.
+Description Search posts for inline images and videos.
 =========== =====


 extractor.tumblr.reblogs
 ------------------------
 =========== =====
-Type        ``bool``
+Type        ``bool`` or ``string``
 Default     ``true``
-Description Extract images from reblogged posts.
+Description * ``true``: Extract media from reblogged posts
+            * ``false``: Skip reblogged posts
+            * ``"deleted"``: Skip reblogged posts, but download from them
+              anyway if the parent post has been deleted
+              (requires 1 additional API request per reblogged post)
 =========== =====


--- a/gallery_dl/extractor/tumblr.py
+++ b/gallery_dl/extractor/tumblr.py
@ -73,7 +73,7 @@ class TumblrExtractor(Extractor):
                yield Message.Directory, blog.copy()

            reblog = "reblogged_from_id" in post
-            if reblog and not self.reblogs:
+            if reblog and self._skip_reblog(post):
                continue
            post["reblogged"] = reblog

@ -158,6 +158,19 @@ class TumblrExtractor(Extractor):

        return Message.Url, url, post

+    def _skip_reblog(self, post):
+        if self.reblogs != "deleted":
+            return not self.reblogs
+        match = re.match(
+            TumblrPostExtractor.pattern[0], post["reblogged_root_url"])
+        if match:
+            blog = match.group(1) or match.group(2)
+            try:
+                next(self.api.posts(blog, {"id": match.group(3)}))
+            except exception.NotFoundError:
+                return False
+        return True
+

 class TumblrUserExtractor(TumblrExtractor):
    """Extractor for all images from a tumblr-user"""