diff --git a/docs/configuration.rst b/docs/configuration.rst index 7572f9b2..42c45485 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -1269,6 +1269,16 @@ Description Provide ``artist``, ``author``, and ``group`` metadata fields. +extractor.mastodon.text-posts +----------------------------- +Type + ``bool`` +Default + ``false`` +Description + Also emit metadata for text-only posts without media content. + + extractor.newgrounds.flash -------------------------- Type diff --git a/gallery_dl/extractor/mastodon.py b/gallery_dl/extractor/mastodon.py index daa3d658..9785fc6e 100644 --- a/gallery_dl/extractor/mastodon.py +++ b/gallery_dl/extractor/mastodon.py @@ -6,7 +6,7 @@ # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. -"""Extractors for mastodon instances""" +"""Extractors for Mastodon instances""" from .common import BaseExtractor, Message from .. import text, exception @@ -29,26 +29,23 @@ class MastodonExtractor(BaseExtractor): def items(self): for status in self.statuses(): attachments = status["media_attachments"] - if attachments: - self.prepare(status) - yield Message.Directory, status - for media in attachments: - status["media"] = media - url = media["url"] - yield Message.Url, url, text.nameext_from_url(url, status) + del status["media_attachments"] + + status["instance"] = self.instance + status["tags"] = [tag["name"] for tag in status["tags"]] + status["date"] = text.parse_datetime( + status["created_at"][:19], "%Y-%m-%dT%H:%M:%S") + + yield Message.Directory, status + for media in attachments: + status["media"] = media + url = media["url"] + yield Message.Url, url, text.nameext_from_url(url, status) def statuses(self): - """Return an iterable containing all relevant Status-objects""" + """Return an iterable containing all relevant Status objects""" return () - def prepare(self, status): - """Prepare a status object""" - del status["media_attachments"] - status["instance"] = self.instance - status["tags"] = [tag["name"] for tag in status["tags"]] - status["date"] = text.parse_datetime( - status["created_at"][:19], "%Y-%m-%dT%H:%M:%S") - INSTANCES = { "mastodon.social": { @@ -97,6 +94,7 @@ class MastodonUserExtractor(MastodonExtractor): def statuses(self): api = MastodonAPI(self) + username = self.item handle = "@{}@{}".format(username, self.instance) for account in api.account_search(handle, 1): @@ -104,7 +102,9 @@ class MastodonUserExtractor(MastodonExtractor): break else: raise exception.NotFoundError("account") - return api.account_statuses(account["id"]) + + return api.account_statuses( + account["id"], not self.config("text-posts", False)) class MastodonStatusExtractor(MastodonExtractor): @@ -130,8 +130,8 @@ class MastodonStatusExtractor(MastodonExtractor): class MastodonAPI(): """Minimal interface for the Mastodon API + https://docs.joinmastodon.org/ https://github.com/tootsuite/mastodon - https://github.com/tootsuite/documentation/blob/master/Using-the-API/API.md """ def __init__(self, extractor): @@ -153,15 +153,15 @@ class MastodonAPI(): self.headers = {"Authorization": "Bearer " + access_token} def account_search(self, query, limit=40): - """Search for content""" + """Search for accounts""" endpoint = "/v1/accounts/search" params = {"q": query, "limit": limit} return self._call(endpoint, params).json() - def account_statuses(self, account_id): - """Get an account's statuses""" + def account_statuses(self, account_id, only_media=True): + """Fetch an account's statuses""" endpoint = "/v1/accounts/{}/statuses".format(account_id) - params = {"only_media": "1"} + params = {"only_media": "1"} if only_media else None return self._pagination(endpoint, params) def status(self, status_id):