From 61fbbd2dae68a4ed10751c9eb783d4ddde479537 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Fri, 26 Feb 2021 17:53:27 +0100 Subject: [PATCH] [exhentai] rename metadata fields to match API results (#1325) - gallery_id -> gid - gallery_token -> token - title_jp -> title_jpn - visible -> expunged - gallery_size -> filesize - count -> filecount Also changes the function of the 'metadata' option. It is now boolean and causes extra data fields from the API to be added instead of completely replacing the data from HTML when activated. --- docs/configuration.rst | 11 +++-- gallery_dl/extractor/exhentai.py | 81 ++++++++++++++++++++++---------- 2 files changed, 61 insertions(+), 31 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index 1ede2fe8..6cdf2878 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -927,14 +927,15 @@ Description extractor.exhentai.metadata --------------------------- Type - ``string`` + ``bool`` Default - ``"html"`` + ``false`` Description - Select the gallery metadata source. + Load extended gallery metadata from the + `API `_. - * ``"api"``: Get data from the `API `_. - * ``"html"``: Extract data from HTML. + Adds ``archiver_key``, ``posted``, and ``torrents``. + Makes ``date`` and ``filesize`` more precise. extractor.exhentai.original diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index eabb8674..d1f6ec25 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -22,10 +22,10 @@ BASE_PATTERN = r"(?:https?://)?(e[x-]|g\.e-)hentai\.org" class ExhentaiExtractor(Extractor): """Base class for exhentai extractors""" category = "exhentai" - directory_fmt = ("{category}", "{gallery_id} {title[:247]}") + directory_fmt = ("{category}", "{gid} {title[:247]}") filename_fmt = ( - "{gallery_id}_{num:>04}_{image_token}_{filename}.{extension}") - archive_fmt = "{gallery_id}_{num}" + "{gid}_{num:>04}_{image_token}_{filename}.{extension}") + archive_fmt = "{gid}_{num}" cookienames = ("ipb_member_id", "ipb_pass_hash") cookiedomain = ".exhentai.org" root = "https://exhentai.org" @@ -131,7 +131,39 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): r"|/s/([\da-f]{10})/(\d+)-(\d+))") test = ( ("https://exhentai.org/g/1200119/d55c44d3d0/", { - "keyword": "199db053b4ccab94463b459e1cfe079df8cdcdd1", + "keyword": { + "cost": int, + "date": "dt:2018-03-18 20:15:00", + "eh_category": "Non-H", + "expunged": False, + "favorites": "17", + "filecount": "4", + "filesize": 1488978, + "gid": 1200119, + "height": int, + "image_token": "re:[0-9a-f]{10}", + "lang": "jp", + "language": "Japanese", + "parent": "", + "rating": r"re:\d\.\d+", + "size": int, + "tags": [ + "parody:komi-san wa komyushou desu.", + "character:shouko komi", + "group:seventh lowlife", + "sample", + ], + "thumb": "https://exhentai.org/t/ce/0a/ce0a5bcb583229a9b07c0f8" + "3bcb1630ab1350640-624622-736-1036-jpg_250.jpg", + "title": "C93 [Seventh_Lowlife] Komi-san ha Tokidoki Daitan de" + "su (Komi-san wa Komyushou desu) [Sample]", + "title_jpn": "(C93) [Comiketjack (わ!)] 古見さんは、時々大胆" + "です。 (古見さんは、コミュ症です。) [見本]", + "token": "d55c44d3d0", + "torrentcount": "0", + "uploader": "klorpa", + "width": int, + }, "content": "e9891a4c017ed0bb734cd1efba5cd03f594d31ff", }), ("https://exhentai.org/g/960461/4f0e369d82/", { @@ -182,6 +214,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): gpage = self._gallery_page() data = self.get_metadata(gpage) + self.count = text.parse_int(data["filecount"]) yield Message.Directory, data images = itertools.chain( @@ -197,37 +230,38 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): def get_metadata(self, page): """Extract gallery metadata""" - if self.config("metadata") == "api": - return self.metadata_from_api() - return self.metadata_from_page(page) + data = self.metadata_from_page(page) + if self.config("metadata", False): + data.update(self.metadata_from_api()) + data["date"] = text.parse_timestamp(data["posted"]) + return data def metadata_from_page(self, page): extr = text.extract_from(page) data = { - "gallery_id" : self.gallery_id, - "gallery_token": self.gallery_token, + "gid" : self.gallery_id, + "token" : self.gallery_token, "thumb" : extr("background:transparent url(", ")"), "title" : text.unescape(extr('

', '

')), - "title_jp" : text.unescape(extr('

', '

')), + "title_jpn" : text.unescape(extr('

', '

')), + "_" : extr('