From a666ddd16b8b3bd38a94a15d27060ba0d4dcf8b7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 7 Sep 2018 18:32:45 +0200 Subject: [PATCH] [tumblr] extend 'reblogs' functionality (#103) Setting 'reblogs' to "deleted" will check if the parent post of a reblog has been deleted and download its media content if that is the case, otherwise it will be skipped. This is a rather costly operation (1 API request per reblogged post) and should therefore be used with care. --- docs/configuration.rst | 10 +++++++--- gallery_dl/extractor/tumblr.py | 15 ++++++++++++++- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index d693b336..9cd86d8f 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -614,16 +614,20 @@ extractor.tumblr.inline =========== ===== Type ``bool`` Default ``false`` -Description Search posts for inline images. +Description Search posts for inline images and videos. =========== ===== extractor.tumblr.reblogs ------------------------ =========== ===== -Type ``bool`` +Type ``bool`` or ``string`` Default ``true`` -Description Extract images from reblogged posts. +Description * ``true``: Extract media from reblogged posts + * ``false``: Skip reblogged posts + * ``"deleted"``: Skip reblogged posts, but download from them + anyway if the parent post has been deleted + (requires 1 additional API request per reblogged post) =========== ===== diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index e6eb79d1..68aa14a2 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -73,7 +73,7 @@ class TumblrExtractor(Extractor): yield Message.Directory, blog.copy() reblog = "reblogged_from_id" in post - if reblog and not self.reblogs: + if reblog and self._skip_reblog(post): continue post["reblogged"] = reblog @@ -158,6 +158,19 @@ class TumblrExtractor(Extractor): return Message.Url, url, post + def _skip_reblog(self, post): + if self.reblogs != "deleted": + return not self.reblogs + match = re.match( + TumblrPostExtractor.pattern[0], post["reblogged_root_url"]) + if match: + blog = match.group(1) or match.group(2) + try: + next(self.api.posts(blog, {"id": match.group(3)})) + except exception.NotFoundError: + return False + return True + class TumblrUserExtractor(TumblrExtractor): """Extractor for all images from a tumblr-user"""