gallery-dl/gallery_dl/extractor/bunkr.py

# -*- coding: utf-8 -*-

# Copyright 2022-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://bunkr.su/"""

from .lolisafe import LolisafeAlbumExtractor
from .. import text, util


class BunkrAlbumExtractor(LolisafeAlbumExtractor):
    """Extractor for bunkr.su albums"""
    category = "bunkr"
    root = "https://bunkr.su"
    pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:[sr]u|is|to)/a/([^/?#]+)"
    test = (
        ("https://bunkr.su/a/Lktg9Keq", {
            "pattern": r"https://cdn\.bunkr\.su/test-テスト-\"&>-QjgneIQv\.png",
            "content": "0c8768055e4e20e7c7259608b67799171b691140",
            "keyword": {
                "album_id": "Lktg9Keq",
                "album_name": 'test テスト "&>',
                "count": 1,
                "filename": 'test-テスト-"&>-QjgneIQv',
                "id": "QjgneIQv",
                "name": 'test-テスト-"&>',
                "num": int,
            },
        }),
        # mp4 (#2239)
        ("https://app.bunkr.ru/a/ptRHaCn2", {
            "pattern": r"https://media-files\.bunkr\.ru/_-RnHoW69L\.mp4",
            "content": "80e61d1dbc5896ae7ef9a28734c747b28b320471",
        }),
        # cdn4
        ("https://bunkr.is/a/iXTTc1o2", {
            "pattern": r"https://(cdn|media-files)4\.bunkr\.ru/",
            "content": "da29aae371b7adc8c5ef8e6991b66b69823791e8",
        }),
        ("https://bunkr.to/a/Lktg9Keq"),
    )

    def fetch_album(self, album_id):
        root = self.root

        try:
            data = util.json_loads(text.extr(
                self.request(root + "/a/" + self.album_id).text,
                'id="__NEXT_DATA__" type="application/json">', '<'))
            album = data["props"]["pageProps"]["album"]
            files = album["files"]
        except Exception as exc:
            self.log.debug("%s: %s", exc.__class__.__name__, exc)
            self.log.debug("Falling back to lolisafe API")
            self.root = root.replace("://", "://app.", 1)
            files, data = LolisafeAlbumExtractor.fetch_album(self, album_id)
            # fix file URLs (bunkr..ru -> bunkr.ru) (#3481)
            for file in files:
                file["file"] = file["file"].replace("bunkr..", "bunkr.", 1)
        else:
            for file in files:
                file["file"] = file["cdn"] + "/" + file["name"]
            data = {
                "album_id"   : self.album_id,
                "album_name" : text.unescape(album["name"]),
                "description": text.unescape(album["description"]),
                "count"      : len(files),
            }

        headers = {"Referer": root.replace("://", "://stream.", 1) + "/"}
        for file in files:
            if file["file"].endswith(
                    (".mp4", ".m4v", ".mov", ".webm", ".mkv", ".ts",
                     ".zip", ".rar", ".7z")):
                file["_http_headers"] = headers
                file["file"] = file["file"].replace(
                    "://cdn", "://media-files", 1)

        return files, data
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2 years ago			`# -- coding: utf-8 --`

[bunkr] fix URLs returned by API (#3481) 2 years ago			`# Copyright 2022-2023 Mike Fährmann`
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2 years ago			`#`
			`# This program is free software; you can redistribute it and/or modify`
			`# it under the terms of the GNU General Public License version 2 as`
			`# published by the Free Software Foundation.`

[bunkr] update domain (#3636) 2 years ago			`"""Extractors for https://bunkr.su/"""`
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2 years ago
			`from .lolisafe import LolisafeAlbumExtractor`
replace json.loads with direct calls to JSONDecoder.decode 2 years ago			`from .. import text, util`
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2 years ago

			`class BunkrAlbumExtractor(LolisafeAlbumExtractor):`
[bunkr] update domain (#3636) 2 years ago			`"""Extractor for bunkr.su albums"""`
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2 years ago			`category = "bunkr"`
[bunkr] update domain (#3636) 2 years ago			`root = "https://bunkr.su"`
			`pattern = r"(?:https?://)?(?:app\.)?bunkr\.(?:[sr]u\|is\|to)/a/([^/?#]+)"`
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2 years ago			`test = (`
[bunkr] update domain (#3636) 2 years ago			`("https://bunkr.su/a/Lktg9Keq", {`
			`"pattern": r"https://cdn\.bunkr\.su/test-テスト-\"&>-QjgneIQv\.png",`
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2 years ago			`"content": "0c8768055e4e20e7c7259608b67799171b691140",`
			`"keyword": {`
			`"album_id": "Lktg9Keq",`
			`"album_name": 'test テスト "&>',`
			`"count": 1,`
			`"filename": 'test-テスト-"&>-QjgneIQv',`
			`"id": "QjgneIQv",`
			`"name": 'test-テスト-"&>',`
			`"num": int,`
			`},`
			`}),`
			`# mp4 (#2239)`
[bunkr] update domain (#3636) 2 years ago			`("https://app.bunkr.ru/a/ptRHaCn2", {`
[bunkr] update domain (#3391) and improve bunkr/app.bunkr handling 2 years ago			`"pattern": r"https://media-files\.bunkr\.ru/_-RnHoW69L\.mp4",`
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2 years ago			`"content": "80e61d1dbc5896ae7ef9a28734c747b28b320471",`
			`}),`
[bunkr] fix extraction (#2903) 2 years ago			`# cdn4`
			`("https://bunkr.is/a/iXTTc1o2", {`
[bunkr] update domain (#3391) and improve bunkr/app.bunkr handling 2 years ago			`"pattern": r"https://(cdn\|media-files)4\.bunkr\.ru/",`
[bunkr] fix extraction (#2903) 2 years ago			`"content": "da29aae371b7adc8c5ef8e6991b66b69823791e8",`
			`}),`
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2 years ago			`("https://bunkr.to/a/Lktg9Keq"),`
			`)`

			`def fetch_album(self, album_id):`
[bunkr] update domain (#3391) and improve bunkr/app.bunkr handling 2 years ago			`root = self.root`
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2 years ago
			`try:`
replace json.loads with direct calls to JSONDecoder.decode 2 years ago			`data = util.json_loads(text.extr(`
[bunkr] update domain (#3391) and improve bunkr/app.bunkr handling 2 years ago			`self.request(root + "/a/" + self.album_id).text,`
replace 'text.extract()' with 'text.extr()' where possible 2 years ago			`'id="__NEXT_DATA__" type="application/json">', '<'))`
[bunkr] fix extraction (#2903) 2 years ago			`album = data["props"]["pageProps"]["album"]`
			`files = album["files"]`
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2 years ago			`except Exception as exc:`
[bunkr] update domain (#3391) and improve bunkr/app.bunkr handling 2 years ago			`self.log.debug("%s: %s", exc.__class__.__name__, exc)`
[bunkr] fix URLs returned by API (#3481) 2 years ago			`self.log.debug("Falling back to lolisafe API")`
[bunkr] update domain (#3391) and improve bunkr/app.bunkr handling 2 years ago			`self.root = root.replace("://", "://app.", 1)`
			`files, data = LolisafeAlbumExtractor.fetch_album(self, album_id)`
[bunkr] fix URLs returned by API (#3481) 2 years ago			`# fix file URLs (bunkr..ru -> bunkr.ru) (#3481)`
			`for file in files:`
			`file["file"] = file["file"].replace("bunkr..", "bunkr.", 1)`
[bunkr] update domain (#3391) and improve bunkr/app.bunkr handling 2 years ago			`else:`
			`for file in files:`
			`file["file"] = file["cdn"] + "/" + file["name"]`
			`data = {`
			`"album_id" : self.album_id,`
			`"album_name" : text.unescape(album["name"]),`
			`"description": text.unescape(album["description"]),`
			`"count" : len(files),`
			`}`
[bunkr] use 'media-files' servers for more file types 2 years ago
misc fixes - fix typo (#3399) - remove double assignment - [bunkr] update things I forgot in 6b6f886d - [soundgasm] adjust 'archive_fmt' (#3388) 2 years ago			`headers = {"Referer": root.replace("://", "://stream.", 1) + "/"}`
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2 years ago			`for file in files:`
[bunkr] update domain (#3391) and improve bunkr/app.bunkr handling 2 years ago			`if file["file"].endswith(`
[bunkr] Fix extracting ts files 2 years ago			`(".mp4", ".m4v", ".mov", ".webm", ".mkv", ".ts",`
			`".zip", ".rar", ".7z")):`
[bunkr] use 'media-files' servers for more file types 2 years ago			`file["_http_headers"] = headers`
[bunkr] update domain (#3391) and improve bunkr/app.bunkr handling 2 years ago			`file["file"] = file["file"].replace(`
misc fixes - fix typo (#3399) - remove double assignment - [bunkr] update things I forgot in 6b6f886d - [soundgasm] adjust 'archive_fmt' (#3388) 2 years ago			`"://cdn", "://media-files", 1)`
[bunkr] fix extraction (#2732) move bunkr.is code to its own module 2 years ago
[bunkr] update domain (#3391) and improve bunkr/app.bunkr handling 2 years ago			`return files, data`