# -*- coding: utf-8 -*- # Copyright 2019-2024 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://hentainexus.com/""" from .common import GalleryExtractor, Extractor, Message from .. import text, util import binascii class HentainexusGalleryExtractor(GalleryExtractor): """Extractor for hentainexus galleries""" category = "hentainexus" root = "https://hentainexus.com" pattern = (r"(?i)(?:https?://)?(?:www\.)?hentainexus\.com" r"/(?:view|read)/(\d+)") example = "https://hentainexus.com/view/12345" def __init__(self, match): self.gallery_id = match.group(1) url = "{}/view/{}".format(self.root, self.gallery_id) GalleryExtractor.__init__(self, match, url) def metadata(self, page): rmve = text.remove_html extr = text.extract_from(page) data = { "gallery_id": text.parse_int(self.gallery_id), "cover" : extr('"og:image" content="', '"'), "title" : extr('

', '

'), } for key in ("Artist", "Book", "Circle", "Event", "Language", "Magazine", "Parody", "Publisher", "Description"): value = rmve(extr('viewcolumn">' + key + '', '')) value, sep, rest = value.rpartition(" (") data[key.lower()] = value if sep else rest data["tags"] = tags = [] for k in text.extract_iter(page, '> 1 ^ 0xc else: C = C >> 1 k = primes[C & 0x7] x = 0 S = list(range(256)) for i in range(256): x = (x + S[i] + key[i % len(key)]) % 256 S[i], S[x] = S[x], S[i] result = "" a = c = m = x = 0 for n in range(64, len(blob)): a = (a + k) % 256 x = (c + S[(x + S[a]) % 256]) % 256 c = (c + a + S[a]) % 256 S[a], S[x] = S[x], S[a] m = S[(x + S[(a + S[(m + c) % 256]) % 256]) % 256] result += chr(blob[n] ^ m) return result @staticmethod def _join_title(data): event = data['event'] artist = data['artist'] circle = data['circle'] title = data['title'] parody = data['parody'] book = data['book'] magazine = data['magazine'] # a few galleries have a large number of artists or parodies, # which get replaced with "Various" in the title string if artist.count(',') >= 3: artist = 'Various' if parody.count(',') >= 3: parody = 'Various' jt = '' if event: jt += '({}) '.format(event) if circle: jt += '[{} ({})] '.format(circle, artist) else: jt += '[{}] '.format(artist) jt += title if parody.lower() != 'original work': jt += ' ({})'.format(parody) if book: jt += ' ({})'.format(book) if magazine: jt += ' ({})'.format(magazine) return jt class HentainexusSearchExtractor(Extractor): """Extractor for hentainexus search results""" category = "hentainexus" subcategory = "search" root = "https://hentainexus.com" pattern = (r"(?i)(?:https?://)?(?:www\.)?hentainexus\.com" r"(?:/page/\d+)?/?(?:\?(q=[^/?#]+))?$") example = "https://hentainexus.com/?q=QUERY" def items(self): params = text.parse_query(self.groups[0]) data = {"_extractor": HentainexusGalleryExtractor} path = "/" while path: page = self.request(self.root + path, params=params).text extr = text.extract_from(page) while True: gallery_id = extr('