From 5aa30c366955d5e009ad9062a6821e4487535f26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sat, 27 Mar 2021 18:28:16 +0100 Subject: [PATCH] [tapas] add 'series' and 'episode' extractors (#692) --- docs/supportedsites.md | 6 ++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/tapas.py | 170 +++++++++++++++++++++++++++++++ 3 files changed, 177 insertions(+) create mode 100644 gallery_dl/extractor/tapas.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 1cb97d48..9a0a9b10 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -643,6 +643,12 @@ Consider all sites to be NSFW unless otherwise known. Posts, User Profiles Supported + + Tapas + https://tapas.io/ + Episodes, Series + + Tsumino https://www.tsumino.com/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 3e85cef7..e43fa64b 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -109,6 +109,7 @@ modules = [ "smugmug", "speakerdeck", "subscribestar", + "tapas", "tsumino", "tumblr", "tumblrgallery", diff --git a/gallery_dl/extractor/tapas.py b/gallery_dl/extractor/tapas.py new file mode 100644 index 00000000..fb663042 --- /dev/null +++ b/gallery_dl/extractor/tapas.py @@ -0,0 +1,170 @@ +# -*- coding: utf-8 -*- + +# Copyright 2021 Mike Fährmann +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://tapas.io/""" + +from .common import Extractor, Message +from .. import text, exception + +BASE_PATTERN = r"(?:https?://)?tapas\.io" + + +class TapasExtractor(Extractor): + """Extractor for manga-chapters from tapas.io""" + category = "tapas" + root = "https://tapas.io" + directory_fmt = ("{category}", "{series[title]}", "{id} {title}") + filename_fmt = "{num:>02}.{extension}" + archive_fmt = "{id}_{num}" + _cache = {} + + def __init__(self, match): + Extractor.__init__(self, match) + setcookie = self.session.cookies.set + setcookie("birthDate" , "1981-02-03", domain=".tapas.io") + setcookie("adjustedBirthDate", "1981-02-03", domain=".tapas.io") + + def items(self): + headers = {"Accept": "application/json, text/javascript, */*;"} + + for episode_id in self.episode_ids(): + url = "{}/episode/{}".format(self.root, episode_id) + data = self.request(url, headers=headers).json()["data"] + + episode = data["episode"] + if not episode.get("free") and not episode.get("unlocked"): + raise exception.StopExtraction( + "Episode '%s' not unlocked (ID %s) ", + episode["title"], episode_id) + + html = data["html"] + series_id = text.rextract(html, 'data-series-id="', '"')[0] + try: + episode["series"] = self._cache[series_id] + except KeyError: + url = "{}/series/{}".format(self.root, series_id) + episode["series"] = self._cache[series_id] = self.request( + url, headers=headers).json()["data"] + + episode["date"] = text.parse_datetime(episode["publish_date"]) + yield Message.Directory, episode + + if episode["book"]: + content, _ = text.extract( + html, '
', '