# -*- coding: utf-8 -*- # Copyright 2022-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://toyhou.se/""" from .common import Extractor, Message from .. import text, util BASE_PATTERN = r"(?:https?://)?(?:www\.)?toyhou\.se" class ToyhouseExtractor(Extractor): """Base class for toyhouse extractors""" category = "toyhouse" root = "https://toyhou.se" directory_fmt = ("{category}", "{user|artists!S}") archive_fmt = "{id}" def __init__(self, match): Extractor.__init__(self, match) self.user = match.group(1) self.offset = 0 def items(self): metadata = self.metadata() for post in util.advance(self.posts(), self.offset): if metadata: post.update(metadata) text.nameext_from_url(post["url"], post) post["id"], _, post["hash"] = post["filename"].partition("_") yield Message.Directory, post yield Message.Url, post["url"], post def posts(self): return () def metadata(self): return None def skip(self, num): self.offset += num return num def _parse_post(self, post, needle='\n
', '<'), "%d %b %Y, %I:%M:%S %p"), "artists": [ text.remove_html(artist) for artist in extr( '
', '
\n
').split( '
') ], "characters": text.split_html(extr( '
', ''): cnt += 1 yield self._parse_post(post) if cnt == 0 and params["page"] == 1: token, pos = text.extract( page, '