diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 1e9b106c..368a1712 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -496,7 +496,7 @@ Consider all sites to be NSFW unless otherwise known. nhentai https://nhentai.net/ - Galleries, Search Results + Favorites, Galleries, Search Results diff --git a/gallery_dl/extractor/nhentai.py b/gallery_dl/extractor/nhentai.py index bca5b4fb..20b716b2 100644 --- a/gallery_dl/extractor/nhentai.py +++ b/gallery_dl/extractor/nhentai.py @@ -101,7 +101,6 @@ class NhentaiGalleryExtractor(NhentaiBase, GalleryExtractor): class NhentaiSearchExtractor(NhentaiBase, Extractor): """Extractor for nhentai search results""" - category = "nhentai" subcategory = "search" pattern = r"(?:https?://)?nhentai\.net/search/?\?([^#]+)" test = ("https://nhentai.net/search/?q=touhou", { @@ -130,3 +129,31 @@ class NhentaiSearchExtractor(NhentaiBase, Extractor): if 'class="next"' not in page: return params["page"] += 1 + + +class NhentaiFavoriteExtractor(NhentaiBase, Extractor): + """Extractor for nhentai favorites""" + subcategory = "favorite" + pattern = r"(?:https?://)?nhentai\.net/favorites/?(?:\?([^#]+))?" + test = ("https://nhentai.net/favorites/",) + + def __init__(self, match): + Extractor.__init__(self, match) + self.params = text.parse_query(match.group(1)) + + def items(self): + data = {"_extractor": NhentaiGalleryExtractor} + for gallery_id in self._pagination(self.params): + url = "{}/g/{}/".format(self.root, gallery_id) + yield Message.Queue, url, data + + def _pagination(self, params): + url = "{}/favorites/".format(self.root) + params["page"] = text.parse_int(params.get("page"), 1) + + while True: + page = self.request(url, params=params).text + yield from text.extract_iter(page, 'href="/g/', '/') + if 'class="next"' not in page: + return + params["page"] += 1