[tumblr] change 'reblogs' option (#103)

- rename "deleted" to "same-blog"
- change test for deleted original post to test if
  original post owner has the same UUID (full blog name) as the one
  being downloaded from
- add 'blog[uuid]' metadata to allow comparison with
  'reblogged_from_uuid'
pull/133/head
Mike Fährmann 6 years ago
parent f1695567e8
commit 7742cf8601
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -625,9 +625,8 @@ Type ``bool`` or ``string``
Default ``true``
Description * ``true``: Extract media from reblogged posts
* ``false``: Skip reblogged posts
* ``"deleted"``: Skip reblogged posts, but download from them
anyway if the parent post has been deleted
(requires 1 additional API request per reblogged post)
* ``"same-blog"``: Skip reblogged posts unless the original post
is from the same blog
=========== =====

@ -61,6 +61,9 @@ class TumblrExtractor(Extractor):
elif not self.types:
self.log.warning("no valid post types selected")
if self.reblogs == "same-blog":
self._skip_reblog = self._skip_reblog_same_blog
def items(self):
blog = None
yield Message.Version, 1
@ -70,6 +73,7 @@ class TumblrExtractor(Extractor):
continue
if not blog:
blog = self.api.info(self.blog)
blog["uuid"] = self.blog
yield Message.Directory, blog.copy()
reblog = "reblogged_from_id" in post
@ -158,18 +162,11 @@ class TumblrExtractor(Extractor):
return Message.Url, url, post
def _skip_reblog(self, post):
if self.reblogs != "deleted":
return not self.reblogs
match = re.match(
TumblrPostExtractor.pattern[0], post["reblogged_root_url"])
if match:
blog = match.group(1) or match.group(2)
try:
next(self.api.posts(blog, {"id": match.group(3)}))
except exception.NotFoundError:
return False
return True
def _skip_reblog(self, _):
return not self.reblogs
def _skip_reblog_same_blog(self, post):
return self.blog != post["reblogged_root_uuid"]
class TumblrUserExtractor(TumblrExtractor):

Loading…
Cancel
Save