# -*- coding: utf-8 -*- # Copyright 2022-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://bunkrr.su/""" from .lolisafe import LolisafeAlbumExtractor from .. import text from urllib.parse import urlsplit, urlunsplit BASE_PATTERN = r"(?:https?://)?(?:app\.)?bunkr+\.(?:la|[sr]u|is|to)" MEDIA_DOMAIN_OVERRIDES = { "cdn9.bunkr.ru" : "c9.bunkr.ru", "cdn12.bunkr.ru": "media-files12.bunkr.la", "cdn-pizza.bunkr.ru": "pizza.bunkr.ru", } CDN_HOSTED_EXTENSIONS = ( ".mp4", ".m4v", ".mov", ".webm", ".mkv", ".ts", ".wmv", ".zip", ".rar", ".7z", ) class BunkrAlbumExtractor(LolisafeAlbumExtractor): """Extractor for bunkrr.su albums""" category = "bunkr" root = "https://bunkrr.su" pattern = BASE_PATTERN + r"/a/([^/?#]+)" example = "https://bunkrr.su/a/ID" def fetch_album(self, album_id): # album metadata page = self.request(self.root + "/a/" + self.album_id).text info = text.split_html(text.extr( page, "").partition(">")[2]) count, _, size = info[1].split(None, 2) pos = page.index('class="grid-images') urls = list(text.extract_iter(page, ' 2 else "", "count" : len(urls), } def _extract_files(self, urls): for url in urls: if url.startswith("/"): try: url = self._extract_file(text.unescape(url)) except Exception as exc: self.log.error("%s: %s", exc.__class__.__name__, exc) continue else: if url.lower().endswith(CDN_HOSTED_EXTENSIONS): scheme, domain, path, query, fragment = urlsplit(url) if domain in MEDIA_DOMAIN_OVERRIDES: domain = MEDIA_DOMAIN_OVERRIDES[domain] else: domain = domain.replace("cdn", "media-files", 1) url = urlunsplit((scheme, domain, path, query, fragment)) yield {"file": text.unescape(url)} def _extract_file(self, path): page = self.request(self.root + path).text if path[1] == "v": url = text.extr(page, '