From ad96e705465b0d4e056978f40e5b327bd5e2b1a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Wed, 15 Feb 2023 15:42:32 +0100 Subject: [PATCH] [bunkr] fix extraction (#3636, #3655) --- gallery_dl/extractor/bunkr.py | 83 ++++++++++++++++++++--------------- 1 file changed, 48 insertions(+), 35 deletions(-) diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py index a9596d2f..17d066d4 100644 --- a/gallery_dl/extractor/bunkr.py +++ b/gallery_dl/extractor/bunkr.py @@ -9,7 +9,7 @@ """Extractors for https://bunkr.su/""" from .lolisafe import LolisafeAlbumExtractor -from .. import text, util +from .. import text class BunkrAlbumExtractor(LolisafeAlbumExtractor): @@ -19,7 +19,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:[sr]u|is|to)/a/([^/?#]+)" test = ( ("https://bunkr.su/a/Lktg9Keq", { - "pattern": r"https://cdn\.bunkr\.su/test-テスト-\"&>-QjgneIQv\.png", + "pattern": r"https://cdn\.bunkr\.ru/test-テスト-\"&>-QjgneIQv\.png", "content": "0c8768055e4e20e7c7259608b67799171b691140", "keyword": { "album_id": "Lktg9Keq", @@ -40,44 +40,57 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): ("https://bunkr.is/a/iXTTc1o2", { "pattern": r"https://(cdn|media-files)4\.bunkr\.ru/", "content": "da29aae371b7adc8c5ef8e6991b66b69823791e8", + "keyword": { + "album_id": "iXTTc1o2", + "album_name": "test2", + "album_size": "691.1 KB", + "count": 2, + "description": "072022", + "filename": "re:video-wFO9FtxG|image-sZrQUeOx", + "id": "re:wFO9FtxG|sZrQUeOx", + "name": "re:video|image", + "num": int, + }, }), ("https://bunkr.to/a/Lktg9Keq"), ) def fetch_album(self, album_id): - root = self.root + # album metadata + page = self.request(self.root + "/a/" + self.album_id).text + info = text.split_html(text.extr( + page, "").partition(">")[2]) + count, _, size = info[1].split(None, 2) + + # files + cdn = None + files = [] + append = files.append + headers = {"Referer": self.root.replace("://", "://stream.", 1) + "/"} - try: - data = util.json_loads(text.extr( - self.request(root + "/a/" + self.album_id).text, - 'id="__NEXT_DATA__" type="application/json">', '<')) - album = data["props"]["pageProps"]["album"] - files = album["files"] - except Exception as exc: - self.log.debug("%s: %s", exc.__class__.__name__, exc) - self.log.debug("Falling back to lolisafe API") - self.root = root.replace("://", "://app.", 1) - files, data = LolisafeAlbumExtractor.fetch_album(self, album_id) - # fix file URLs (bunkr..ru -> bunkr.ru) (#3481) - for file in files: - file["file"] = file["file"].replace("bunkr..", "bunkr.", 1) - else: - for file in files: - file["file"] = file["cdn"] + "/" + file["name"] - data = { - "album_id" : self.album_id, - "album_name" : text.unescape(album["name"]), - "description": text.unescape(album["description"]), - "count" : len(files), - } + pos = page.index('class="grid-images') + for url in text.extract_iter(page, ' 2 else "", + "count" : len(files), + }