# -*- coding: utf-8 -*- # Copyright 2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://derpibooru.org/""" from .booru import BooruExtractor from .. import text, exception import operator BASE_PATTERN = r"(?:https?://)?derpibooru\.org" class DerpibooruExtractor(BooruExtractor): """Base class for derpibooru extractors""" category = "derpibooru" filename_fmt = "{filename}.{extension}" archive_fmt = "{id}" root = "https://derpibooru.org" request_interval = 1.0 per_page = 50 _file_url = operator.itemgetter("view_url") @staticmethod def _prepare(post): post["date"] = text.parse_datetime(post["created_at"]) @staticmethod def _extended_tags(post): pass def _pagination(self, url, params): params["page"] = 1 params["per_page"] = self.per_page api_key = self.config("api-key") if api_key: params["key"] = api_key filter_id = self.config("filter") if filter_id: params["filter_id"] = filter_id elif not api_key: params["filter_id"] = "56027" # "Everything" filter while True: data = self.request(url, params=params).json() yield from data["images"] if len(data["images"]) < self.per_page: return params["page"] += 1 class DerpibooruPostExtractor(DerpibooruExtractor): """Extractor for single posts from derpibooru.org""" subcategory = "post" pattern = BASE_PATTERN + r"/images/(\d+)" test = ("https://derpibooru.org/images/1", { "content": "88449eeb0c4fa5d3583d0b794f6bc1d70bf7f889", "count": 1, "keyword": { "animated": False, "aspect_ratio": 1.0, "comment_count": int, "created_at": "2012-01-02T03:12:33Z", "date": "dt:2012-01-02 03:12:33", "deletion_reason": None, "description": "", "downvotes": int, "duplicate_of": None, "duration": 0.04, "extension": "png", "faves": int, "first_seen_at": "2012-01-02T03:12:33Z", "format": "png", "height": 900, "hidden_from_users": False, "id": 1, "mime_type": "image/png", "name": "1__safe_fluttershy_solo_cloud_happy_flying_upvotes+galore" "_artist-colon-speccysy_get_sunshine", "orig_sha512_hash": None, "processed": True, "representations": dict, "score": int, "sha512_hash": "f16c98e2848c2f1bfff3985e8f1a54375cc49f78125391aeb8" "0534ce011ead14e3e452a5c4bc98a66f56bdfcd07ef7800663" "b994f3f343c572da5ecc22a9660f", "size": 860914, "source_url": "https://www.deviantart.com/speccysy/art" "/Afternoon-Flight-215193985", "spoilered": False, "tag_count": 36, "tag_ids": list, "tags": list, "thumbnails_generated": True, "updated_at": "2020-05-28T13:14:07Z", "uploader": "Clover the Clever", "uploader_id": 211188, "upvotes": int, "view_url": str, "width": 900, "wilson_score": float, }, }) def __init__(self, match): DerpibooruExtractor.__init__(self, match) self.image_id = match.group(1) def posts(self): url = self.root + "/api/v1/json/images/" + self.image_id return (self.request(url).json()["image"],) class DerpibooruSearchExtractor(DerpibooruExtractor): """Extractor for search results on derpibooru.org""" subcategory = "search" directory_fmt = ("{category}", "{search_tags}") pattern = BASE_PATTERN + r"/(?:search/?\?([^#]+)|tags/([^/?#]+))" test = ( ("https://derpibooru.org/search?q=cute", { "range": "40-60", "count": 21, }), ("https://derpibooru.org/tags/cute", { "range": "40-60", "count": 21, }), ) def __init__(self, match): DerpibooruExtractor.__init__(self, match) query, tags = match.groups() self.params = text.parse_query(query) if query else {"q": tags} def metadata(self): return {"search_tags": self.params.get("q", "")} def posts(self): url = self.root + "/api/v1/json/search/images" return self._pagination(url, self.params) class DerpibooruGalleryExtractor(DerpibooruExtractor): """Extractor for galleries on derpibooru.org""" subcategory = "gallery" directory_fmt = ("{category}", "galleries", "{gallery[id]} {gallery[title]}") pattern = BASE_PATTERN + r"/galleries/(\d+)" test = ("https://derpibooru.org/galleries/1", { "pattern": r"https://derpicdn\.net/img/view/\d+/\d+/\d+/\d+[^/]+$", "keyword": { "gallery": { "description": "Indexes start at 1 :P", "id": 1, "spoiler_warning": "", "thumbnail_id": 1, "title": "The Very First Gallery", "user": "DeliciousBlackInk", "user_id": 365446, }, }, }) def __init__(self, match): DerpibooruExtractor.__init__(self, match) self.gallery_id = match.group(1) def metadata(self): url = self.root + "/api/v1/json/search/galleries" params = {"q": "id:" + self.gallery_id} galleries = self.request(url, params=params).json()["galleries"] if not galleries: raise exception.NotFoundError("gallery") return {"gallery": galleries[0]} def posts(self): gallery_id = "gallery_id:" + self.gallery_id url = self.root + "/api/v1/json/search/images" params = {"sd": "desc", "sf": gallery_id, "q" : gallery_id} return self._pagination(url, params)