[bato] add support

pull/4979/head
bug-assassin 9 months ago
parent f9544194c0
commit 74c225f94e

@ -97,6 +97,12 @@ Consider all listed sites to potentially be NSFW.
<td>Albums, Artwork Listings, Challenges, Followed Users, individual Images, Likes, Search Results, User Profiles</td>
<td></td>
</tr>
<tr>
<td>Bato</td>
<td>https://bato.to</td>
<td>Chapters, Manga</td>
<td></td>
</tr>
<tr>
<td>BBC</td>
<td>https://bbc.co.uk/</td>

@ -24,6 +24,7 @@ modules = [
"architizer",
"artstation",
"aryion",
"bato",
"bbc",
"behance",
"blogger",

@ -0,0 +1,113 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://bato.to and aliases (v3x only)"""
from .common import ChapterExtractor, MangaExtractor
from .. import text, exception
import re
BASE_PATTERN = r"(?:https?://)?(?:bato\.to|dto\.to|batotoo\.com|wto\.to)"
MANGA_PATTERN = r"/title/\d+(?:-[0-9a-z]+)*/?"
CHAPTER_PATTERN = r"/\d+(?:-vol_\d+)?-ch_\d+\.?\d*/?"
class BatoBase():
"""Base class for bato v3x extractors"""
category = "bato"
root = "https://bato.to"
class BatoChapterExtractor(BatoBase, ChapterExtractor):
"""Extractor for manga chapters from bato.to"""
pattern = BASE_PATTERN + "(" + MANGA_PATTERN + CHAPTER_PATTERN + ")"
# There are three possible patterns for a chapter
example = "https://bato.to/title/12345-manga-name-with-spaces/54212-ch_1.5"
example1 = "https://bato.to/title/12345-manga-name-with-spaces/54212-vol1-ch_1.5"
example2 = "https://bato.to/title/12345/54212"
# v2x, not supported
example3 = "https://bato.to/chapter/54212"
def __init__(self, match):
self.path = match.group(1)
ChapterExtractor.__init__(self, match, self.root + self.path)
def metadata(self, page):
info, _ = text.extract(page, '<title>', r' - Read Free Manga Online at Bato.To</title>')
info = info.encode('latin-1').decode('utf-8').replace("\n", "")
match = re.match(
r"(.+) - "
r"(?:Volume *(\d+) )?"
r"Chapter *([\d\.]+)", info)
manga, volume, chapter = match.groups() if match else ("", "", info)
chapter, sep, minor = chapter.partition(".")
title_container = text.extr(page, f'<a href="{self.path}"', "</a>")
title = text.extr(title_container, "<!-- -->", "</span>")
return {
"manga" : text.unescape(manga),
"title" : text.unescape(title),
"author" : "",
"volume" : text.parse_int(volume),
"chapter" : text.parse_int(chapter),
"chapter_minor": sep + minor,
}
def images(self, page):
images_container = text.extr(page, 'pageOpts', ':[0,0]}"')
images_container = text.unescape(images_container)
return [(url, None) for url in text.extract_iter(images_container, r'\"', r'\"')]
class BatoMangaExtractor(BatoBase, MangaExtractor):
"""Extractor for manga from bato.to"""
reverse = False
chapterclass = BatoChapterExtractor
pattern = BASE_PATTERN + "(" + MANGA_PATTERN + "$" + ")"
# There are two possible patterns for a manga
example = "https://bato.to/title/12345-manga-name-with-spaces/"
example2 = "https://bato.to/title/12345/"
# v2x, not supported
example3 = "https://bato.to/series/12345/manga-name-with-space"
def chapters(self, page):
data = {}
num_chapters, _ = text.extract(page, ">Chapters<", "</div>")
num_chapters, _ = text.extract(num_chapters, r"<!-- -->", r"<!-- -->")
num_chapters = text.parse_int(num_chapters)
if num_chapters == 0:
raise exception.NotFoundError("chapter")
manga, _ = text.extract(page, '<title>', r' - Read Free Manga Online at Bato.To</title>')
manga = manga.encode('latin-1').decode('utf-8').replace("\n", "")
data["manga"] = manga
results = []
for chapter_num in range(num_chapters):
chapter, _ = text.extract(page, f'<div data-hk="0-0-{chapter_num}-0"', r"</time><!--/-->")
chapter += r"</time><!--/-->" # Add this back in so we can match the date
url, pos = text.extract(chapter, '<a href="', '"')
chapter_no = re.search(r"-ch_([\d\.]+)", url).group(1)
chapter_major, sep, chapter_minor = chapter_no.partition(".")
title, _ = text.extract(chapter, f'<span data-hk="0-0-{chapter_num}-1"', '</span>')
title, _ = text.extract(title, r"<!--#-->", r"<!--/-->")
if title is None or title == "" or title == "<!--/-->":
title, _ = text.extract(chapter, ">", "</a>", pos)
date, _ = text.extract(chapter, "<time", "</time>")
date, _ = text.extract(date, 'time="', '"')
data["date"] = date
data["title"] = title
data["chapter"] = text.parse_int(chapter_major)
data["chapter_minor"] = sep + chapter_minor
if url.startswith("/"):
url = self.root + url
results.append((url, data.copy()))
return results

@ -32,6 +32,7 @@ CATEGORY_MAP = {
"atfbooru" : "ATFBooru",
"b4k" : "arch.b4k.co",
"baraag" : "baraag",
"bato" : "Bato",
"bbc" : "BBC",
"comicvine" : "Comic Vine",
"coomerparty" : "Coomer",

@ -0,0 +1,65 @@
# -*- coding: utf-8 -*-
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
from gallery_dl.extractor import bato
from gallery_dl import exception
__tests__ = (
{
"#url" : "https://bato.to/title/86408-i-shall-master-this-family-official/1681030-ch_8",
"#category": ("", "bato", "chapter"),
"#class" : bato.BatoChapterExtractor,
"#count" : 66,
"manga" : "I Shall Master this Family! [Official]",
"title" : "Observing",
"chapter" : 8,
},
{
"#url" : "https://bato.to/title/104929-86-eighty-six-official/1943513-vol_1-ch_5",
"#comment" : "volume (vol) in url",
"#category": ("", "bato", "chapter"),
"#class" : bato.BatoChapterExtractor,
"#count" : 7,
"manga" : "86--EIGHTY-SIX (Official)",
"title" : "The Spearhead Squadron's Power",
"volume" : 1,
"chapter" : 5,
},
{
"#url" : "https://bato.to/title/113742-futsutsuka-na-akujo-de-wa-gozaimasu-ga-suuguu-chouso-torikae-den-official",
"#category": ("", "bato", "manga"),
"#class" : bato.BatoMangaExtractor,
"#count" : ">= 21",
"manga" : "Futsutsuka na Akujo de wa Gozaimasu ga - Suuguu Chouso Torikae Den (Official)",
},
{
"#url" : "https://bato.to/title/104929-86-eighty-six-official",
"#comment" : "Manga with number in name",
"#category": ("", "bato", "manga"),
"#class" : bato.BatoMangaExtractor,
"#count" : ">= 18",
"manga" : "86--EIGHTY-SIX (Official)",
},
{
"#url" : "https://bato.to/title/140046-the-grand-duke-s-fox-princess-mgchan",
"#comment" : "Non-English translation (Indonesian)",
"#category": ("", "bato", "manga"),
"#class" : bato.BatoMangaExtractor,
"#count" : ">= 29",
"manga" : "The Grand Dukes Fox Princess ⎝⎝MGCHAN⎠⎠",
},
{
"#url" : "https://bato.to/title/134270-removed",
"#category": ("", "bato", "manga"),
"#class" : bato.BatoMangaExtractor,
"#exception": exception.NotFoundError
}
)
Loading…
Cancel
Save