From 4d784d1f6f1995052b9b2345c047b64b6c6c5c88 Mon Sep 17 00:00:00 2001 From: 4censord Date: Fri, 19 Jan 2024 15:08:25 +0100 Subject: [PATCH] [toondex] add support --- docs/supportedsites.md | 6 ++ gallery_dl/extractor/__init__.py | 1 + gallery_dl/extractor/toondex.py | 120 +++++++++++++++++++++++++++++++ scripts/supportedsites.py | 1 + 4 files changed, 128 insertions(+) create mode 100644 gallery_dl/extractor/toondex.py diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 53c88335..e78880f4 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -859,6 +859,12 @@ Consider all listed sites to potentially be NSFW. Galleries + + ToonDex + https://toondex.net/ + Chapters, Manga + + Toyhouse https://toyhou.se/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 8e712961..a76144c1 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -154,6 +154,7 @@ modules = [ "tcbscans", "telegraph", "tmohentai", + "toondex", "toyhouse", "tsumino", "tumblr", diff --git a/gallery_dl/extractor/toondex.py b/gallery_dl/extractor/toondex.py new file mode 100644 index 00000000..2ea2d12b --- /dev/null +++ b/gallery_dl/extractor/toondex.py @@ -0,0 +1,120 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://toondex.net/""" + +import re +from .common import MangaExtractor, ChapterExtractor +from .. import text + + +BASE_PATTERN = r"(?:https?://)?toondex\.net" + + +class ToondexBase: + """Base class for Toondex extractors""" + + category = "toondex" + root = "https://toondex.net/" + + def get_title(self, page): + """Gets the title of the manga""" + title = text.extr(page, "", "") + title = text.unescape(title).strip() + match = re.search( + r"(?:Chapter \d+ \| |Chapter \d+\.\d+ \| |Read )" + r"(.+)(?: - (Read Free Online Comics at )?ToonDex)", + title, + ) + if match: + title = match.group(1) + return title + + +class ToondexChapterExtractor(ToondexBase, ChapterExtractor): + """Extractor for manga chapters from Toondex.net""" + + subcategory = "chapter" + directory_fmt = ( + "{category}", + "{manga}", + "Chapter-{chapter:03}{chapter_minor}", + ) + archive_fmt = "{chapter:03}{chapter_minor}_{page}" + pattern = BASE_PATTERN + r"/comics/([\w\d-]+)\/chapter-(\d+-[\d+]|\d+)/?" + example = "https://toondex.net/comics/sex-stopwatch/chapter-1/" + + def __init__(self, match): + url = match.group(0) + self.gid, self.chapter = match.groups() + ChapterExtractor.__init__(self, match, url) + + def metadata(self, page): + chapter, sep, minor = self.chapter.partition("-") + + data = { + "manga": self.get_title(page), + "manga_id": self.gid, + "chapter": text.parse_int(chapter), + "chapter_id": f"{self.gid}-chapter-{self.chapter}", + "chapter_minor": sep + minor, + } + return data + + def images(self, page): + images = [] + first_img = text.extract( + page, '