diff --git a/docs/configuration.rst b/docs/configuration.rst index 107a8fa6..b3e4c2da 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -577,6 +577,40 @@ Description Minimum and maximum wait time in seconds between each image =========== ===== +extractor.tumblr.external +------------------------- +=========== ===== +Type ``bool`` +Default ``false`` +Description Follow external URLs (e.g. from "Link" posts) and try to extract + images from them. +=========== ===== + + +extractor.tumblr.inline +----------------------- +=========== ===== +Type ``bool`` +Default ``false`` +Description Search posts for inline images. +=========== ===== + + +extractor.tumblr.posts +---------------------- +=========== ===== +Type ``string`` +Default ``"photo"`` +Description A comma-separated list of post types to extract images, etc. from. + For example: ``"text,link,photo"``. + + Possible types are ``text``, ``quote``, ``link``, ``answer``, + ``video``, ``audio``, ``photo``, ``chat``. + + You can use ``"all"`` instead of listing all types separately. +=========== ===== + + API Tokens & IDs ================ @@ -590,7 +624,7 @@ extractor.deviantart.client-id & .client-secret ----------------------------------------------- =========== ===== Type ``string`` -How To - login and visit DeviantArt's `Applications & Keys`_ section +How To - login and visit DeviantArt's `Applications & Keys`_ section - click "Register your Application" - click "Save" (top right; default settings are fine) - copy ``client_id`` and ``client_secret`` of your new "Untitled" @@ -602,7 +636,7 @@ extractor.flickr.api-key & .api-secret -------------------------------------- =========== ===== Type ``string`` -How To - login and `Create an App`_ in Flickr's `App Garden`_ +How To - login and `Create an App`_ in Flickr's `App Garden`_ - click "APPLY FOR A NON-COMMERCIAL KEY" - fill out the form with a random name and description and click "SUBMIT" @@ -615,7 +649,7 @@ extractor.pawoo.access-token ---------------------------- =========== ===== Type ``string`` -How To +How To =========== ===== @@ -623,7 +657,7 @@ extractor.pinterest.access-token -------------------------------- =========== ===== Type ``string`` -How To +How To =========== ===== @@ -631,7 +665,7 @@ extractor.reddit.client-id & .user-agent ---------------------------------------- =========== ===== Type ``string`` -How To - login and visit the apps_ section of your account's preferences +How To - login and visit the apps_ section of your account's preferences - click the "are you a developer? create an app..." button - fill out the form, choose "installed app", preferably set "http://localhost:6414/" as "redirect uri" and finally click @@ -644,6 +678,21 @@ How To - login and visit the apps_ section of your account's preferences =========== ===== +extractor.tumblr.api-key +------------------------ +=========== ===== +Type ``string`` +How To - login and visit Tumblr's Applications_ section + - click "Register application" + - fill out the form: use a random name and description, set + https://example.org/ as "Application Website" and "Default + callback URL" + - solve Google's "I'm not a robot" challenge and click "Register" + - copy your ``OAuth Consumer Key`` and put it in your configuration + file +=========== ===== + + .. |.netrc| replace:: ``.netrc`` .. |tempfile.gettempdir()| replace:: ``tempfile.gettempdir()`` .. |requests.request()| replace:: ``requests.request()`` @@ -675,3 +724,4 @@ How To - login and visit the apps_ section of your account's preferences .. _`App Garden`: https://www.flickr.com/services/ .. _apps: https://www.reddit.com/prefs/apps/ .. _`API access rules`: https://github.com/reddit/reddit/wiki/API +.. _Applications: https://www.tumblr.com/oauth/apps diff --git a/docs/gallery-dl.conf b/docs/gallery-dl.conf index 59fdfe19..bf67d670 100644 --- a/docs/gallery-dl.conf +++ b/docs/gallery-dl.conf @@ -100,6 +100,12 @@ { "mp4": true }, + "tumblr": + { + "posts": "photo", + "inline": false, + "external": false + }, "recursive": { "blacklist": ["directlink", "oauth", "recursive", "test"] diff --git a/gallery_dl/extractor/tumblr.py b/gallery_dl/extractor/tumblr.py index 8f61f3df..a994f1eb 100644 --- a/gallery_dl/extractor/tumblr.py +++ b/gallery_dl/extractor/tumblr.py @@ -14,6 +14,26 @@ from ..cache import memcache import re +def _original_image(url): + return re.sub( + (r"https?://\d+\.media\.tumblr\.com" + r"/([0-9a-f]+)/tumblr_([^/?&#.]+)_\d+\.([0-9a-z]+)"), + r"http://data.tumblr.com/\1/tumblr_\2_raw.\3", url + ) + + +def _original_video(url): + return re.sub( + (r"https?://vt\.media\.tumblr\.com" + r"/tumblr_([^_]+)_\d+\.([0-9a-z]+)"), + r"https://vt.media.tumblr.com/tumblr_\1.\2", url + ) + + +POST_TYPES = frozenset(( + "text", "quote", "link", "answer", "video", "audio", "photo", "chat")) + + class TumblrExtractor(Extractor): """Base class for tumblr extractors""" category = "tumblr" @@ -25,85 +45,94 @@ class TumblrExtractor(Extractor): self.user = match.group(1) self.api = TumblrAPI(self) + self.inline = self.config("inline", False) + self.external = self.config("external", False) + + types = self.config("posts", ("photo",)) + if types == "all": + self.types = POST_TYPES + elif types: + if isinstance(types, str): + types = types.split(",") + self.types = frozenset(types) + else: + self.types = frozenset() + def items(self): blog = self.api.info(self.user) yield Message.Version, 1 yield Message.Directory, blog for post in self.posts(): + if post["type"] not in self.types: + continue + post["blog"] = blog post["offset"] = 0 if "trail" in post: del post["trail"] - if "photos" in post: + if "photos" in post: # type "photo" or "link" photos = post["photos"] del post["photos"] for photo in photos: + post["photo"] = photo photo.update(photo["original_size"]) - photo["url"] = self._original_image(photo["url"]) del photo["original_size"] del photo["alt_sizes"] - post["extension"] = photo["url"].rpartition(".")[2] - post["offset"] += 1 - post["photo"] = photo - yield Message.Url, photo["url"], post + yield self._prepare(photo["url"], post) if "audio_url" in post: # type: "audio" - post["extension"] = None - post["offset"] += 1 - yield Message.Url, post["audio_url"], post + yield self._prepare( + post["audio_url"], post, None) if "video_url" in post: # type: "video" - url = post["video_url"] - post["extension"] = url.rpartition(".")[2] - post["offset"] += 1 - yield Message.Url, self._original_video(url), post - - if "description" in post: # inline images - for url in re.findall(r' src="([^"]+)"', post["description"]): - post["extension"] = url.rpartition(".")[2] - post["offset"] += 1 - yield Message.Url, self._original_image(url), post + yield self._prepare( + post["video_url"], post, _original_video) - if "permalink_url" in post: # external video/audio - yield Message.Queue, post["permalink_url"], post + if self.inline: # inline images + for key in ("body", "description"): + if key in post: + for url in re.findall('