diff --git a/gallery_dl/extractor/hentaifox.py b/gallery_dl/extractor/hentaifox.py index 7e0b63ce..84ad3aff 100644 --- a/gallery_dl/extractor/hentaifox.py +++ b/gallery_dl/extractor/hentaifox.py @@ -23,8 +23,8 @@ class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor): pattern = r"(?:https?://)?(?:www\.)?hentaifox\.com(/gallery/(\d+))" test = ("https://hentaifox.com/gallery/56622/", { "pattern": r"https://i\d*\.hentaifox\.com/\d+/\d+/\d+\.jpg", + "keyword": "b7ff141331d0c7fc711ab28d45dfbb013a83d8e9", "count": 24, - "keyword": "903ebe227d85e484460382fc6cbab42be7a244d5", }) def __init__(self, match): @@ -37,19 +37,43 @@ class HentaifoxGalleryExtractor(HentaifoxBase, GalleryExtractor): return { "gallery_id": text.parse_int(self.gallery_id), "title" : text.unescape(extr("

", "

")), - "parody" : split(extr(">Parodies:" , ""))[::2], - "characters": split(extr(">Characters:", ""))[::2], - "tags" : split(extr(">Tags:" , ""))[::2], - "artist" : split(extr(">Artists:" , ""))[::2], - "group" : split(extr(">Groups:" , ""))[::2], - "type" : text.remove_html(extr(">Category:", "")), + "parody" : split(extr(">Parodies:" , ""))[::2], + "characters": split(extr(">Characters:", ""))[::2], + "tags" : split(extr(">Tags:" , ""))[::2], + "artist" : split(extr(">Artists:" , ""))[::2], + "group" : split(extr(">Groups:" , ""))[::2], + "type" : text.remove_html(extr(">Category:", "= 0: + extr = text.extract + load_id = extr(page, 'id="load_id" value="', '"', pos)[0] + load_dir = extr(page, 'id="load_dir" value="', '"', pos)[0] + load_pages = extr(page, 'id="load_pages" value="', '"', pos)[0] + + url = self.root + "/includes/thumbs_loader.php" + data = { + "u_id" : self.gallery_id, + "g_id" : load_id, + "img_dir" : load_dir, + "visible_pages": "0", + "total_pages" : load_pages, + "type" : "2", + } + headers = { + "Origin": self.root, + "Referer": self.gallery_url, + "X-Requested-With": "XMLHttpRequest", + } + page = self.request( + url, method="POST", headers=headers, data=data).text + return [ - (text.urljoin(self.root, url.replace("t.", ".")), None) + (url.replace("t.", "."), None) for url in text.extract_iter(page, 'data-src="', '"') ] @@ -64,15 +88,13 @@ class HentaifoxSearchExtractor(HentaifoxBase, Extractor): ("https://hentaifox.com/character/reimu-hakurei/"), ("https://hentaifox.com/artist/distance/"), ("https://hentaifox.com/search/touhou/"), - ("https://hentaifox.com/tag/full-colour/", { + ("https://hentaifox.com/tag/heterochromia/", { "pattern": HentaifoxGalleryExtractor.pattern, - "count": ">= 40", + "count": ">= 60", "keyword": { - "url": str, + "url" : str, "gallery_id": int, - "thumbnail": r"re:https://i\d*.hentaifox.com/\d+/\d+/thumb\.", - "title": str, - "tags": list, + "title" : str, }, }), ) @@ -87,31 +109,26 @@ class HentaifoxSearchExtractor(HentaifoxBase, Extractor): yield Message.Queue, gallery["url"], gallery def galleries(self): - url = "{}/{}/".format(self.root, self.path) + num = 1 while True: + url = "{}{}/pag/{}/".format(self.root, self.path, num) page = self.request(url).text - info, gpos = text.extract( - page, 'class="galleries_overview">', 'class="clear">') - for ginfo in text.extract_iter(info, '
') yield { - "url": text.urljoin(self.root, url), + "url" : text.urljoin(self.root, url), "gallery_id": text.parse_int( url.strip("/").rpartition("/")[2]), - "thumbnail": text.urljoin(self.root, thumb), - "title": text.unescape(title), - "tags": tags.split(), + "title" : text.unescape(title), "_extractor": HentaifoxGalleryExtractor, } - pos = page.find('class="current"', gpos) - url = text.extract(page, 'href="', '"', pos)[0] + pos = page.find(">Next<") + url = text.rextract(page, "href=", ">", pos)[0] if pos == -1 or "/pag" not in url: return - url = text.urljoin(self.root, url) + num += 1 diff --git a/test/test_results.py b/test/test_results.py index 354f6975..ce7b38a2 100644 --- a/test/test_results.py +++ b/test/test_results.py @@ -26,7 +26,6 @@ TRAVIS_SKIP = { # temporary issues, etc. BROKEN = { - "hentaifox", "mangapark", "pixhost", }