diff --git a/docs/configuration.rst b/docs/configuration.rst index c3ed5626..eb4cfd91 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -3735,6 +3735,23 @@ Description use an extra HTTP request to find the URL to its full-resolution version. +extractor.tumblr.pagination +--------------------------- +Type + ``string`` +Default + ``"offset"`` +Description + Controls how to paginate over blog posts. + + * ``"api"``: ``next`` parameter provided by the API + (potentially misses posts due to a + `bug `__ + in Tumblr's API) + * ``"before"``: timestamp of last post + * ``"offset"``: post offset number + + extractor.tumblr.ratelimit -------------------------- Type diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index c34910f8..ff29c046 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -386,7 +386,7 @@ class TumblrAPI(oauth.OAuth1API): def posts(self, blog, params): """Retrieve published posts""" params["offset"] = self.extractor.config("offset") - params["limit"] = "50" + params["limit"] = 50 params["reblog_info"] = "true" params["type"] = self.posts_type params["before"] = self.before @@ -398,8 +398,14 @@ class TumblrAPI(oauth.OAuth1API): def likes(self, blog): """Retrieve liked posts""" + endpoint = "/v2/blog/{}/likes".format(blog) params = {"limit": "50", "before": self.before} - return self._pagination(blog, "/likes", params, key="liked_posts") + while True: + posts = self._call(endpoint, params)["liked_posts"] + if not posts: + return + yield from posts + params["before"] = posts[-1]["liked_timestamp"] def _call(self, endpoint, params, **kwargs): url = self.ROOT + endpoint @@ -474,6 +480,7 @@ class TumblrAPI(oauth.OAuth1API): if self.api_key: params["api_key"] = self.api_key + strategy = self.extractor.config("pagination") while True: data = self._call(endpoint, params) @@ -481,13 +488,31 @@ class TumblrAPI(oauth.OAuth1API): self.BLOG_CACHE[blog] = data["blog"] cache = False - yield from data[key] - - try: - endpoint = data["_links"]["next"]["href"] - except KeyError: - return + posts = data[key] + yield from posts - params = None - if self.api_key: - endpoint += "&api_key=" + self.api_key + if strategy == "api": + try: + endpoint = data["_links"]["next"]["href"] + except KeyError: + return + + params = None + if self.api_key: + endpoint += "&api_key=" + self.api_key + + elif strategy == "before": + if not posts: + return + timestamp = posts[-1]["timestamp"] + 1 + if params["before"] and timestamp >= params["before"]: + return + params["before"] = timestamp + params["offset"] = None + + else: # offset + params["offset"] = \ + text.parse_int(params["offset"]) + params["limit"] + params["before"] = None + if params["offset"] >= data["total_posts"]: + return