From 342371086ba174837a3a6ae53a6ca6b907ae3c34 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 19 Apr 2017 10:17:43 +0200 Subject: [PATCH] [pawoo] add extractors for accounts and statuses https://pawoo.net is a Mastodon[1] instance hosted by Pixiv [1] https://github.com/tootsuite/mastodon --- gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/pawoo.py | 149 +++++++++++++++++++++++++++++++ supportedsites.rst | 3 +- 3 files changed, 152 insertions(+), 1 deletion(-) create mode 100644 gallery_dl/extractor/pawoo.py diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index e12f6df2..c44af9ec 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -51,6 +51,7 @@ modules = [ "mangastream", "nhentai", "nijie", + "pawoo", "pinterest", "powermanga", "readcomiconline", diff --git a/gallery_dl/extractor/pawoo.py b/gallery_dl/extractor/pawoo.py new file mode 100644 index 00000000..5f9a29b5 --- /dev/null +++ b/gallery_dl/extractor/pawoo.py @@ -0,0 +1,149 @@ +# -*- coding: utf-8 -*- + +# Copyright 2017 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extract images from https://pawoo.net""" + +from .common import Extractor, Message +from .. import text, exception + + +class PawooExtractor(Extractor): + """Base class for pawoo extractors""" + category = "pawoo" + directory_fmt = ["{category}", "{account[username]}"] + filename_fmt = "{category}_{id}_{media[id]}.{extension}" + + def __init__(self): + Extractor.__init__(self) + self.api = MastodonAPI(self.session, self.log) + + def items(self): + yield Message.Version, 1 + for status in self.statuses(): + attachments = self.prepare(status) + yield Message.Directory, status + for media in attachments: + status["media"] = media + url = media["url"] + yield Message.Url, url, text.nameext_from_url(url, status) + + def statuses(self): + """Return an iterable containing all relevant Status-objects""" + return [] + + @staticmethod + def prepare(status): + """Prepare a status object""" + for key in ("favourites_count", "reblogs_count", "reblog", "mentions"): + del status[key] + account = status["account"] + for key in ("followers_count", "following_count", "statuses_count"): + del account[key] + attachments = status["media_attachments"] + del status["media_attachments"] + return attachments + + +class PawooAccountExtractor(PawooExtractor): + """Extractor for all images of an account/user on pawoo.net""" + subcategory = "account" + pattern = [r"(?:https?://)?pawoo\.net/(@[^/]+)/?$"] + test = [ + ("https://pawoo.net/@kuroda", { + "url": "1507de8fe69a35d9ceb837c7082e25425eeed593", + "keyword": "841b641adbcd18846474ef2e9eb2d104d4afbc7e", + }), + ("https://pawoo.net/@zZzZz/", { + "exception": exception.NotFoundError, + }), + ] + + def __init__(self, match): + PawooExtractor.__init__(self) + self.account_name = match.group(1) + + def statuses(self): + results = self.api.search(self.account_name) + for account in results["accounts"]: + if account["username"] == self.account_name[1:]: + break + else: + raise exception.NotFoundError("account") + return self.api.account_statuses(account["id"]) + + +class PawooStatusExtractor(PawooExtractor): + """Extractor for images from a status on pawoo.net""" + subcategory = "status" + pattern = [r"(?:https?://)?pawoo\.net/@[^/]+/(\d+)"] + test = [ + ("https://pawoo.net/@takehana_note/559043", { + "url": "bff6f435c0101a911eebd985d6b752c2b61721ef", + "keyword": "b922582f8eb8cee306ae05920fbad5a7992acbd5", + "content": "3b148cf90174173355fe34179741ce476921b2fc", + }), + ("https://pawoo.net/@zZzZz/12346", { + "exception": exception.NotFoundError, + }), + ] + + def __init__(self, match): + PawooExtractor.__init__(self) + self.status_id = match.group(1) + + def statuses(self): + return (self.api.status(self.status_id),) + + +class MastodonAPI(): + """Minimal interface for the Mastodon API on pawoo.net + + https://github.com/tootsuite/mastodon + https://github.com/tootsuite/documentation/blob/master/Using-the-API/API.md + """ + + def __init__(self, session, log, root="https://pawoo.net", + access_token=("0f04191976cf22a5319c1e91a73cbcb2" + "510b589e2757efcca922f9b3173d119b")): + self.session = session + self.session.headers["Authorization"] = "Bearer " + access_token + self.log = log + self.root = root + + def search(self, searchterm): + """Search for content""" + response = self.session.get( + self.root + "/api/v1/search", + params={"q": searchterm}, + ) + return self._parse(response) + + def status(self, status_id): + """Fetch a Status""" + response = self.session.get( + self.root + "/api/v1/statuses/" + status_id + ) + return self._parse(response) + + def account_statuses(self, account_id): + """Get an account's statuses""" + url = "{}/api/v1/accounts/{}/statuses?only_media=1".format( + self.root, account_id) + while True: + response = self.session.get(url) + yield from self._parse(response) + url = response.links.get("next", {}).get("url") + if not url: + break + + @staticmethod + def _parse(response): + """Parse an API response""" + if response.status_code == 404: + raise exception.NotFoundError() + return response.json() diff --git a/supportedsites.rst b/supportedsites.rst index 22d7a64a..6dad94e7 100644 --- a/supportedsites.rst +++ b/supportedsites.rst @@ -12,8 +12,8 @@ Supported Sites - dynasty-scans.com - e621.net - exhentai.org -- famatg.com - fapat.me +- fascans.com - gelbooru.com - gomanga.co - hbrowse.com @@ -56,6 +56,7 @@ Supported Sites - mangastream.com - nhentai.net - nijie.info +- pawoo.net - pic-maniac.com - pinterest.com - pixhost.org