# -*- coding: utf-8 -*- # Copyright 2019-2023 Mike Fährmann # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License version 2 as # published by the Free Software Foundation. """Extractors for https://issuu.com/""" from .common import GalleryExtractor, Extractor, Message from .. import text, util class IssuuBase(): """Base class for issuu extractors""" category = "issuu" root = "https://issuu.com" class IssuuPublicationExtractor(IssuuBase, GalleryExtractor): """Extractor for a single publication""" subcategory = "publication" directory_fmt = ("{category}", "{document[username]}", "{document[date]:%Y-%m-%d} {document[title]}") filename_fmt = "{num:>03}.{extension}" archive_fmt = "{document[publicationId]}_{num}" pattern = r"(?:https?://)?issuu\.com(/[^/?#]+/docs/[^/?#]+)" example = "https://issuu.com/issuu/docs/TITLE/" def metadata(self, page): pos = page.rindex('id="initial-data"') data = util.json_loads(text.rextract( page, '