[nhentai] add 'favorite' extractor (#1814)

3 years ago · 3e36543c98
parent 656358ea92
commit 3e36543c98
2 changed files with 29 additions and 2 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -496,7 +496,7 @@ Consider all sites to be NSFW unless otherwise known.
 <tr>
    <td>nhentai</td>
    <td>https://nhentai.net/</td>
-    <td>Galleries, Search Results</td>
+    <td>Favorites, Galleries, Search Results</td>
    <td></td>
 </tr>
 <tr>
--- a/gallery_dl/extractor/nhentai.py
+++ b/gallery_dl/extractor/nhentai.py
@ -101,7 +101,6 @@ class NhentaiGalleryExtractor(NhentaiBase, GalleryExtractor):

 class NhentaiSearchExtractor(NhentaiBase, Extractor):
    """Extractor for nhentai search results"""
-    category = "nhentai"
    subcategory = "search"
    pattern = r"(?:https?://)?nhentai\.net/search/?\?([^#]+)"
    test = ("https://nhentai.net/search/?q=touhou", {
@ -130,3 +129,31 @@ class NhentaiSearchExtractor(NhentaiBase, Extractor):
            if 'class="next"' not in page:
                return
            params["page"] += 1
+
+
+class NhentaiFavoriteExtractor(NhentaiBase, Extractor):
+    """Extractor for nhentai favorites"""
+    subcategory = "favorite"
+    pattern = r"(?:https?://)?nhentai\.net/favorites/?(?:\?([^#]+))?"
+    test = ("https://nhentai.net/favorites/",)
+
+    def __init__(self, match):
+        Extractor.__init__(self, match)
+        self.params = text.parse_query(match.group(1))
+
+    def items(self):
+        data = {"_extractor": NhentaiGalleryExtractor}
+        for gallery_id in self._pagination(self.params):
+            url = "{}/g/{}/".format(self.root, gallery_id)
+            yield Message.Queue, url, data
+
+    def _pagination(self, params):
+        url = "{}/favorites/".format(self.root)
+        params["page"] = text.parse_int(params.get("page"), 1)
+
+        while True:
+            page = self.request(url, params=params).text
+            yield from text.extract_iter(page, 'href="/g/', '/')
+            if 'class="next"' not in page:
+                return
+            params["page"] += 1