# -*- coding: utf-8 -*- # Copyright 2021-2022 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for Gelbooru Beta 0.2 sites""" from . import booru from .. import text, util, exception from xml.etree import ElementTree import collections import re class GelbooruV02Extractor(booru.BooruExtractor): basecategory = "gelbooru_v02" def __init__(self, match): booru.BooruExtractor.__init__(self, match) self.api_key = self.config("api-key") self.user_id = self.config("user-id") try: self.api_root = INSTANCES[self.category]["api_root"] except KeyError: self.api_root = self.root if self.category == "realbooru": self._file_url = self._file_url_realbooru def _api_request(self, params): url = self.api_root + "/index.php?page=dapi&s=post&q=index" return ElementTree.fromstring(self.request(url, params=params).text) def _pagination(self, params): params["pid"] = self.page_start params["limit"] = self.per_page post = None while True: try: root = self._api_request(params) except ElementTree.ParseError: if "tags" not in params or post is None: raise taglist = [tag for tag in params["tags"].split() if not tag.startswith("id:<")] taglist.append("id:<" + str(post.attrib["id"])) params["tags"] = " ".join(taglist) params["pid"] = 0 continue post = None for post in root: yield post.attrib if len(root) < self.per_page: return params["pid"] += 1 @staticmethod def _prepare(post): post["date"] = text.parse_datetime( post["created_at"], "%a %b %d %H:%M:%S %z %Y") def _file_url_realbooru(self, post): url = post["file_url"] if url.count("/") == 5: md5 = post["md5"] url = "{}/images/{}/{}/{}.{}".format( self.root, md5[0:2], md5[2:4], md5, url.rpartition(".")[2]) return url def _extended_tags(self, post, page=None): if not page: url = "{}/index.php?page=post&s=view&id={}".format( self.root, post["id"]) page = self.request(url).text html = text.extract(page, '