diff --git a/docs/supportedsites.md b/docs/supportedsites.md index 8e4c59a1..6040cd47 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -97,6 +97,12 @@ Consider all listed sites to potentially be NSFW. Albums, Artwork Listings, Challenges, Followed Users, individual Images, Likes, Search Results, User Profiles + + Bato + https://bato.to + Chapters, Manga + + BBC https://bbc.co.uk/ diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 695b8b2a..99de2169 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -24,6 +24,7 @@ modules = [ "architizer", "artstation", "aryion", + "bato", "bbc", "behance", "blogger", diff --git a/gallery_dl/extractor/bato.py b/gallery_dl/extractor/bato.py new file mode 100644 index 00000000..c34b74fc --- /dev/null +++ b/gallery_dl/extractor/bato.py @@ -0,0 +1,113 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for https://bato.to and aliases (v3x only)""" + +from .common import ChapterExtractor, MangaExtractor +from .. import text, exception +import re + +BASE_PATTERN = r"(?:https?://)?(?:bato\.to|dto\.to|batotoo\.com|wto\.to)" +MANGA_PATTERN = r"/title/\d+(?:-[0-9a-z]+)*/?" +CHAPTER_PATTERN = r"/\d+(?:-vol_\d+)?-ch_\d+\.?\d*/?" + +class BatoBase(): + """Base class for bato v3x extractors""" + category = "bato" + root = "https://bato.to" + +class BatoChapterExtractor(BatoBase, ChapterExtractor): + """Extractor for manga chapters from bato.to""" + pattern = BASE_PATTERN + "(" + MANGA_PATTERN + CHAPTER_PATTERN + ")" + # There are three possible patterns for a chapter + example = "https://bato.to/title/12345-manga-name-with-spaces/54212-ch_1.5" + example1 = "https://bato.to/title/12345-manga-name-with-spaces/54212-vol1-ch_1.5" + example2 = "https://bato.to/title/12345/54212" + # v2x, not supported + example3 = "https://bato.to/chapter/54212" + + def __init__(self, match): + self.path = match.group(1) + ChapterExtractor.__init__(self, match, self.root + self.path) + + def metadata(self, page): + info, _ = text.extract(page, '', r' - Read Free Manga Online at Bato.To') + info = info.encode('latin-1').decode('utf-8').replace("\n", "") + + match = re.match( + r"(.+) - " + r"(?:Volume *(\d+) )?" + r"Chapter *([\d\.]+)", info) + manga, volume, chapter = match.groups() if match else ("", "", info) + chapter, sep, minor = chapter.partition(".") + title_container = text.extr(page, f'") + title = text.extr(title_container, "", "") + + return { + "manga" : text.unescape(manga), + "title" : text.unescape(title), + "author" : "", + "volume" : text.parse_int(volume), + "chapter" : text.parse_int(chapter), + "chapter_minor": sep + minor, + } + + def images(self, page): + images_container = text.extr(page, 'pageOpts', ':[0,0]}"') + images_container = text.unescape(images_container) + + return [(url, None) for url in text.extract_iter(images_container, r'\"', r'\"')] + + +class BatoMangaExtractor(BatoBase, MangaExtractor): + """Extractor for manga from bato.to""" + reverse = False + chapterclass = BatoChapterExtractor + pattern = BASE_PATTERN + "(" + MANGA_PATTERN + "$" + ")" + # There are two possible patterns for a manga + example = "https://bato.to/title/12345-manga-name-with-spaces/" + example2 = "https://bato.to/title/12345/" + # v2x, not supported + example3 = "https://bato.to/series/12345/manga-name-with-space" + + def chapters(self, page): + data = {} + num_chapters, _ = text.extract(page, ">Chapters<", "") + num_chapters, _ = text.extract(num_chapters, r"", r"") + num_chapters = text.parse_int(num_chapters) + if num_chapters == 0: + raise exception.NotFoundError("chapter") + + manga, _ = text.extract(page, '', r' - Read Free Manga Online at Bato.To') + manga = manga.encode('latin-1').decode('utf-8').replace("\n", "") + data["manga"] = manga + + results = [] + for chapter_num in range(num_chapters): + chapter, _ = text.extract(page, f'
") + chapter += r"" # Add this back in so we can match the date + url, pos = text.extract(chapter, '') + title, _ = text.extract(title, r"", r"") + if title is None or title == "" or title == "": + title, _ = text.extract(chapter, ">", "", pos) + + date, _ = text.extract(chapter, "") + date, _ = text.extract(date, 'time="', '"') + + data["date"] = date + data["title"] = title + data["chapter"] = text.parse_int(chapter_major) + data["chapter_minor"] = sep + chapter_minor + + if url.startswith("/"): + url = self.root + url + results.append((url, data.copy())) + return results \ No newline at end of file diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 4839660d..e3738b8b 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -32,6 +32,7 @@ CATEGORY_MAP = { "atfbooru" : "ATFBooru", "b4k" : "arch.b4k.co", "baraag" : "baraag", + "bato" : "Bato", "bbc" : "BBC", "comicvine" : "Comic Vine", "coomerparty" : "Coomer", diff --git a/test/results/bato.py b/test/results/bato.py new file mode 100644 index 00000000..18479f9a --- /dev/null +++ b/test/results/bato.py @@ -0,0 +1,65 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +from gallery_dl.extractor import bato +from gallery_dl import exception + +__tests__ = ( +{ + "#url" : "https://bato.to/title/86408-i-shall-master-this-family-official/1681030-ch_8", + "#category": ("", "bato", "chapter"), + "#class" : bato.BatoChapterExtractor, + "#count" : 66, + + "manga" : "I Shall Master this Family! [Official]", + "title" : "Observing", + "chapter" : 8, +}, +{ + "#url" : "https://bato.to/title/104929-86-eighty-six-official/1943513-vol_1-ch_5", + "#comment" : "volume (vol) in url", + "#category": ("", "bato", "chapter"), + "#class" : bato.BatoChapterExtractor, + "#count" : 7, + + "manga" : "86--EIGHTY-SIX (Official)", + "title" : "The Spearhead Squadron's Power", + "volume" : 1, + "chapter" : 5, +}, +{ + "#url" : "https://bato.to/title/113742-futsutsuka-na-akujo-de-wa-gozaimasu-ga-suuguu-chouso-torikae-den-official", + "#category": ("", "bato", "manga"), + "#class" : bato.BatoMangaExtractor, + "#count" : ">= 21", + + "manga" : "Futsutsuka na Akujo de wa Gozaimasu ga - Suuguu Chouso Torikae Den (Official)", +}, +{ + "#url" : "https://bato.to/title/104929-86-eighty-six-official", + "#comment" : "Manga with number in name", + "#category": ("", "bato", "manga"), + "#class" : bato.BatoMangaExtractor, + "#count" : ">= 18", + + "manga" : "86--EIGHTY-SIX (Official)", +}, +{ + "#url" : "https://bato.to/title/140046-the-grand-duke-s-fox-princess-mgchan", + "#comment" : "Non-English translation (Indonesian)", + "#category": ("", "bato", "manga"), + "#class" : bato.BatoMangaExtractor, + "#count" : ">= 29", + + "manga" : "The Grand Duke’s Fox Princess ⎝⎝MGCHAN⎠⎠", +}, +{ + "#url" : "https://bato.to/title/134270-removed", + "#category": ("", "bato", "manga"), + "#class" : bato.BatoMangaExtractor, + "#exception": exception.NotFoundError +} +)