diff --git a/docs/configuration.rst b/docs/configuration.rst index 09173ba4..a070b472 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -906,6 +906,19 @@ Description instead of the value listed on ``https://e-hentai.org/home.php`` +extractor.exhentai.metadata +--------------------------- +Type + ``string`` +Default + ``"html"`` +Description + Select the gallery metadata source. + + * ``"api"``: Get data from the `API `_. + * ``"html"``: Extract data from HTML. + + extractor.exhentai.original --------------------------- Type diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index f0071123..eabb8674 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -182,9 +182,6 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): gpage = self._gallery_page() data = self.get_metadata(gpage) - self.count = data["count"] - - yield Message.Version, 1 yield Message.Directory, data images = itertools.chain( @@ -200,6 +197,11 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): def get_metadata(self, page): """Extract gallery metadata""" + if self.config("metadata") == "api": + return self.metadata_from_api() + return self.metadata_from_page(page) + + def metadata_from_page(self, page): extr = text.extract_from(page) data = { "gallery_id" : self.gallery_id, @@ -225,6 +227,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): "torrentcount" : text.parse_int(extr('>Torrent Download (', ')')), } + self.count = data["count"] data["lang"] = util.language_to_code(data["language"]) data["tags"] = [ text.unquote(tag.replace("+", " ")) @@ -233,6 +236,25 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): return data + def metadata_from_api(self): + url = self.root + "/api.php" + data = { + "method": "gdata", + "gidlist": ((self.gallery_id, self.gallery_token),), + "namespace": 1, + } + + data = self.request(url, method="POST", json=data).json() + if "error" in data: + raise exception.StopExtraction(data["error"]) + + data = data["gmetadata"][0] + data["eh_category"] = data["category"] + data["date"] = text.parse_timestamp(data["posted"]) + self.count = data["filecount"] + + return data + def image_from_page(self, page): """Get image url and data from webpage""" pos = page.index('