") data = { "gallery_id" : text.parse_int(self.gallery_id), "title" : title_en or title_jp, "title_en" : title_en, "title_jp" : title_jp, # impossible to parse with strptime() "date" : extr('datetime="', '"'), } tdict = collections.defaultdict(list) for path in text.extract_iter(tags, 'href="/', '"'): kind, _, name = path.partition("/") tdict[kind].append(name.replace("+", " ")) data.update(tdict) if "languages" in data: data["language"] = data["languages"][-1].capitalize() data["lang"] = util.language_to_code(data["language"]) del data["languages"] return data def images(self, _): url = "{}/viewc/{}/1".format(self.root, self.gallery_id) page = self.request(url).text images = text.extract(page, "var images", ";")[0] return [(img, None) for img in text.extract_iter(images, "'", "'")] class HentaihandTagExtractor(Extractor): """Extractor for tag searches on hentaihand.com""" category = "hentaihand" subcategory = "tag" root = "https://hentaihand.com" pattern = (r"(?i)(?:https?://)?(?:www\.)?hentaihand\.com" r"(/(?:parody|characters|tags|artists|groups|languages" r"|categories|relationships)/[^#]+)") test = ( ("https://hentaihand.com/artists/tony+taka", { "pattern": HentaihandGalleryExtractor.pattern, "count": ">= 50", }), ("https://hentaihand.com/artists/tony+taka/popular?page=2"), ("https://hentaihand.com/tags/full+color"), ("https://hentaihand.com/languages/japanese"), ("https://hentaihand.com/categories/manga"), ) def __init__(self, match): Extractor.__init__(self, match) self.path, _, query = match.group(1).partition("?") self.query = text.parse_query(query) self.query["page"] = text.parse_int(self.query.get("page"), 1) def items(self): yield Message.Version, 1 url = self.root + self.path params = self.query.copy() data = {"_extractor": HentaihandGalleryExtractor} while True: page = self.request(url, params=params).text for path in text.extract_iter(page, '(current)<") if pos < 0 or page.find('class="page-link" href="', pos) < 0: break params["page"] += 1 class HentaihandSearchExtractor(HentaihandTagExtractor): """Extractor for search results on hentaihand.com""" subcategory = "search" pattern = r"(?i)(?:https?://)?(?:www\.)?hentaihand\.com(/search/?[^#]+)" test = ("https://hentaihand.com/search?q=color", { "pattern": HentaihandGalleryExtractor.pattern, "range": "1-50", "count": 50, })

", "<")) title_jp = text.unescape(extr("