# -*- coding: utf-8 -*- # Copyright 2019-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://hentaifox.com/""" from .common import GalleryExtractor, Extractor, Message from .. import text, util class HentaifoxBase(): """Base class for hentaifox extractors""" category = "hentaifox" root = "https://hentaifox.com" class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor): """Extractor for image galleries on hentaifox.com""" pattern = r"(?:https?://)?(?:www\.)?hentaifox\.com(/gallery/(\d+))" example = "https://hentaifox.com/gallery/12345/" def __init__(self, match): GalleryExtractor.__init__(self, match) self.gallery_id = match.group(2) @staticmethod def _split(txt): return [ text.remove_html(tag.partition(">")[2], "", "") for tag in text.extract_iter( txt, "class='tag_btn", "') yield { "url" : text.urljoin(self.root, url), "gallery_id": text.parse_int( url.strip("/").rpartition("/")[2]), "title" : text.unescape(title), "_extractor": HentaifoxGalleryExtractor, } pos = page.find(">Next<") url = text.rextract(page, "href=", ">", pos)[0] if pos == -1 or "/pag" not in url: return num += 1