# -*- coding: utf-8 -*- # Copyright 2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for Gelbooru v0.2 sites""" from . import booru from .. import text, util, exception from xml.etree import ElementTree import collections import re class GelbooruV02Extractor(booru.BooruExtractor): basecategory = "gelbooru_v02" def _api_request(self, params): url = self.root + "/index.php?page=dapi&s=post&q=index" return ElementTree.fromstring(self.request(url, params=params).text) def _pagination(self, params): params["pid"] = self.page_start params["limit"] = self.per_page while True: root = self._api_request(params) for post in root: yield post.attrib if len(root) < self.per_page: return params["pid"] += 1 @staticmethod def _prepare(post): post["date"] = text.parse_datetime( post["created_at"], "%a %b %d %H:%M:%S %z %Y") def _extended_tags(self, post, page=None): if not page: url = "{}/index.php?page=post&s=view&id={}".format( self.root, post["id"]) page = self.request(url).text html = text.extract(page, '