[mastodon] implement 'text-posts' option (#1569)

similar to Twitter's 'text-tweets'
pull/1677/head
Mike Fährmann 3 years ago
parent 414bdc95a3
commit 93d356712c
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -1269,6 +1269,16 @@ Description
Provide ``artist``, ``author``, and ``group`` metadata fields.
extractor.mastodon.text-posts
-----------------------------
Type
``bool``
Default
``false``
Description
Also emit metadata for text-only posts without media content.
extractor.newgrounds.flash
--------------------------
Type

@ -6,7 +6,7 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for mastodon instances"""
"""Extractors for Mastodon instances"""
from .common import BaseExtractor, Message
from .. import text, exception
@ -29,26 +29,23 @@ class MastodonExtractor(BaseExtractor):
def items(self):
for status in self.statuses():
attachments = status["media_attachments"]
if attachments:
self.prepare(status)
yield Message.Directory, status
for media in attachments:
status["media"] = media
url = media["url"]
yield Message.Url, url, text.nameext_from_url(url, status)
del status["media_attachments"]
status["instance"] = self.instance
status["tags"] = [tag["name"] for tag in status["tags"]]
status["date"] = text.parse_datetime(
status["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
yield Message.Directory, status
for media in attachments:
status["media"] = media
url = media["url"]
yield Message.Url, url, text.nameext_from_url(url, status)
def statuses(self):
"""Return an iterable containing all relevant Status-objects"""
"""Return an iterable containing all relevant Status objects"""
return ()
def prepare(self, status):
"""Prepare a status object"""
del status["media_attachments"]
status["instance"] = self.instance
status["tags"] = [tag["name"] for tag in status["tags"]]
status["date"] = text.parse_datetime(
status["created_at"][:19], "%Y-%m-%dT%H:%M:%S")
INSTANCES = {
"mastodon.social": {
@ -97,6 +94,7 @@ class MastodonUserExtractor(MastodonExtractor):
def statuses(self):
api = MastodonAPI(self)
username = self.item
handle = "@{}@{}".format(username, self.instance)
for account in api.account_search(handle, 1):
@ -104,7 +102,9 @@ class MastodonUserExtractor(MastodonExtractor):
break
else:
raise exception.NotFoundError("account")
return api.account_statuses(account["id"])
return api.account_statuses(
account["id"], not self.config("text-posts", False))
class MastodonStatusExtractor(MastodonExtractor):
@ -130,8 +130,8 @@ class MastodonStatusExtractor(MastodonExtractor):
class MastodonAPI():
"""Minimal interface for the Mastodon API
https://docs.joinmastodon.org/
https://github.com/tootsuite/mastodon
https://github.com/tootsuite/documentation/blob/master/Using-the-API/API.md
"""
def __init__(self, extractor):
@ -153,15 +153,15 @@ class MastodonAPI():
self.headers = {"Authorization": "Bearer " + access_token}
def account_search(self, query, limit=40):
"""Search for content"""
"""Search for accounts"""
endpoint = "/v1/accounts/search"
params = {"q": query, "limit": limit}
return self._call(endpoint, params).json()
def account_statuses(self, account_id):
"""Get an account's statuses"""
def account_statuses(self, account_id, only_media=True):
"""Fetch an account's statuses"""
endpoint = "/v1/accounts/{}/statuses".format(account_id)
params = {"only_media": "1"}
params = {"only_media": "1"} if only_media else None
return self._pagination(endpoint, params)
def status(self, status_id):

Loading…
Cancel
Save