# -*- coding: utf-8 -*- # Copyright 2019-2021 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for 4chan archives based on FoolFuuka""" from .common import Extractor, Message, generate_extractors from .. import text import itertools class FoolfuukaExtractor(Extractor): """Base extractor for FoolFuuka based boards/archives""" basecategory = "foolfuuka" archive_fmt = "{board[shortname]}_{num}_{timestamp}" external = "default" def __init__(self, match): Extractor.__init__(self, match) self.session.headers["Referer"] = self.root if self.external == "direct": self.remote = self._remote_direct def items(self): yield Message.Directory, self.metadata() for post in self.posts(): media = post["media"] if not media: continue url = media["media_link"] if not url and "remote_media_link" in media: url = self.remote(media) if url.startswith("/"): url = self.root + url post["filename"], _, post["extension"] = \ media["media"].rpartition(".") yield Message.Url, url, post def metadata(self): """ """ def posts(self): """Return an iterable with all relevant posts""" def remote(self, media): """Resolve a remote media link""" needle = '