[bato] add support

9 months ago · 74c225f94e
parent f9544194c0
commit 74c225f94e
5 changed files with 186 additions and 0 deletions
--- a/docs/supportedsites.md
+++ b/docs/supportedsites.md
@ -97,6 +97,12 @@ Consider all listed sites to potentially be NSFW.
    <td>Albums, Artwork Listings, Challenges, Followed Users, individual Images, Likes, Search Results, User Profiles</td>
    <td></td>
 </tr>
+<tr>
+    <td>Bato</td>
+    <td>https://bato.to</td>
+    <td>Chapters, Manga</td>
+    <td></td>
+</tr>
 <tr>
    <td>BBC</td>
    <td>https://bbc.co.uk/</td>
--- a/gallery_dl/extractor/init.py
+++ b/gallery_dl/extractor/init.py
@ -24,6 +24,7 @@ modules = [
    "architizer",
    "artstation",
    "aryion",
+    "bato",
    "bbc",
    "behance",
    "blogger",
--- a/gallery_dl/extractor/bato.py
+++ b/gallery_dl/extractor/bato.py
@ -0,0 +1,113 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+"""Extractors for https://bato.to and aliases (v3x only)"""
+
+from .common import ChapterExtractor, MangaExtractor
+from .. import text, exception
+import re
+
+BASE_PATTERN = r"(?:https?://)?(?:bato\.to|dto\.to|batotoo\.com|wto\.to)"
+MANGA_PATTERN = r"/title/\d+(?:-[0-9a-z]+)*/?"
+CHAPTER_PATTERN = r"/\d+(?:-vol_\d+)?-ch_\d+\.?\d*/?"
+
+class BatoBase():
+    """Base class for bato v3x extractors"""
+    category = "bato"
+    root = "https://bato.to"
+
+class BatoChapterExtractor(BatoBase, ChapterExtractor):
+    """Extractor for manga chapters from bato.to"""
+    pattern = BASE_PATTERN + "(" + MANGA_PATTERN + CHAPTER_PATTERN + ")"
+    # There are three possible patterns for a chapter
+    example = "https://bato.to/title/12345-manga-name-with-spaces/54212-ch_1.5"
+    example1 = "https://bato.to/title/12345-manga-name-with-spaces/54212-vol1-ch_1.5"
+    example2 = "https://bato.to/title/12345/54212"
+    # v2x, not supported
+    example3 = "https://bato.to/chapter/54212"
+
+    def __init__(self, match):
+        self.path = match.group(1)
+        ChapterExtractor.__init__(self, match, self.root + self.path)
+
+    def metadata(self, page):
+        info, _ = text.extract(page, '<title>', r' - Read Free Manga Online at Bato.To</title>')
+        info = info.encode('latin-1').decode('utf-8').replace("\n", "")
+
+        match = re.match(
+            r"(.+) - "
+            r"(?:Volume *(\d+) )?"
+            r"Chapter *([\d\.]+)", info)
+        manga, volume, chapter = match.groups() if match else ("", "", info)
+        chapter, sep, minor = chapter.partition(".")
+        title_container = text.extr(page, f'<a href="{self.path}"', "</a>")
+        title = text.extr(title_container, "<!-- -->", "</span>")
+
+        return {
+            "manga"        : text.unescape(manga),
+            "title"        : text.unescape(title),
+            "author"       : "",
+            "volume"       : text.parse_int(volume),
+            "chapter"      : text.parse_int(chapter),
+            "chapter_minor": sep + minor,
+        }
+
+    def images(self, page):
+        images_container = text.extr(page, 'pageOpts', ':[0,0]}"')
+        images_container = text.unescape(images_container)
+        
+        return [(url, None) for url in text.extract_iter(images_container, r'\"', r'\"')]
+
+
+class BatoMangaExtractor(BatoBase, MangaExtractor):
+    """Extractor for manga from bato.to"""
+    reverse = False
+    chapterclass = BatoChapterExtractor
+    pattern = BASE_PATTERN + "(" + MANGA_PATTERN + "$" + ")"
+    # There are two possible patterns for a manga
+    example = "https://bato.to/title/12345-manga-name-with-spaces/"
+    example2 = "https://bato.to/title/12345/"
+    # v2x, not supported
+    example3 = "https://bato.to/series/12345/manga-name-with-space"
+
+    def chapters(self, page):
+        data = {}
+        num_chapters, _ = text.extract(page, ">Chapters<", "</div>")
+        num_chapters, _ = text.extract(num_chapters, r"<!-- -->", r"<!-- -->")
+        num_chapters = text.parse_int(num_chapters)
+        if num_chapters == 0:
+            raise exception.NotFoundError("chapter")
+        
+        manga, _ = text.extract(page, '<title>', r' - Read Free Manga Online at Bato.To</title>')
+        manga = manga.encode('latin-1').decode('utf-8').replace("\n", "")
+        data["manga"] = manga
+        
+        results = []
+        for chapter_num in range(num_chapters):
+            chapter, _ = text.extract(page, f'<div data-hk="0-0-{chapter_num}-0"', r"</time><!--/-->")
+            chapter += r"</time><!--/-->" # Add this back in so we can match the date
+            url, pos = text.extract(chapter, '<a href="', '"')
+
+            chapter_no = re.search(r"-ch_([\d\.]+)", url).group(1)
+            chapter_major, sep, chapter_minor = chapter_no.partition(".")
+            
+            title, _ = text.extract(chapter, f'<span data-hk="0-0-{chapter_num}-1"', '</span>')
+            title, _ = text.extract(title, r"<!--#-->", r"<!--/-->")
+            if title is None or title == "" or title == "<!--/-->":
+                title, _ = text.extract(chapter, ">", "</a>", pos)
+
+            date, _ = text.extract(chapter, "<time", "</time>")
+            date, _ = text.extract(date, 'time="', '"')
+                                   
+            data["date"] = date
+            data["title"] = title
+            data["chapter"] = text.parse_int(chapter_major)
+            data["chapter_minor"] = sep + chapter_minor
+
+            if url.startswith("/"):
+                url = self.root + url
+            results.append((url, data.copy()))
+        return results
--- a/scripts/supportedsites.py
+++ b/scripts/supportedsites.py
@ -32,6 +32,7 @@ CATEGORY_MAP = {
    "atfbooru"       : "ATFBooru",
    "b4k"            : "arch.b4k.co",
    "baraag"         : "baraag",
+    "bato"           : "Bato",
    "bbc"            : "BBC",
    "comicvine"      : "Comic Vine",
    "coomerparty"    : "Coomer",
--- a/test/results/bato.py
+++ b/test/results/bato.py
@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License version 2 as
+# published by the Free Software Foundation.
+
+from gallery_dl.extractor import bato
+from gallery_dl import exception
+
+__tests__ = (
+{
+    "#url"     : "https://bato.to/title/86408-i-shall-master-this-family-official/1681030-ch_8",
+    "#category": ("", "bato", "chapter"),
+    "#class"   : bato.BatoChapterExtractor,
+    "#count"   : 66,
+
+    "manga"        : "I Shall Master this Family! [Official]",
+    "title"        : "Observing",
+    "chapter"      : 8,
+},
+{
+    "#url"     : "https://bato.to/title/104929-86-eighty-six-official/1943513-vol_1-ch_5",
+    "#comment" : "volume (vol) in url",
+    "#category": ("", "bato", "chapter"),
+    "#class"   : bato.BatoChapterExtractor,
+    "#count"   : 7,
+
+    "manga"        : "86--EIGHTY-SIX (Official)",
+    "title"        : "The Spearhead Squadron's Power",
+    "volume"       : 1,
+    "chapter"      : 5,
+},
+{
+    "#url"     : "https://bato.to/title/113742-futsutsuka-na-akujo-de-wa-gozaimasu-ga-suuguu-chouso-torikae-den-official",
+    "#category": ("", "bato", "manga"),
+    "#class"   : bato.BatoMangaExtractor,
+    "#count"   : ">= 21",
+
+    "manga"        : "Futsutsuka na Akujo de wa Gozaimasu ga - Suuguu Chouso Torikae Den (Official)",
+},
+{
+    "#url"     : "https://bato.to/title/104929-86-eighty-six-official",
+    "#comment" : "Manga with number in name",
+    "#category": ("", "bato", "manga"),
+    "#class"   : bato.BatoMangaExtractor,
+    "#count"   : ">= 18",
+
+    "manga"        : "86--EIGHTY-SIX (Official)",
+},
+{
+    "#url"     : "https://bato.to/title/140046-the-grand-duke-s-fox-princess-mgchan",
+    "#comment" : "Non-English translation (Indonesian)",
+    "#category": ("", "bato", "manga"),
+    "#class"   : bato.BatoMangaExtractor,
+    "#count"   : ">= 29",
+
+    "manga"        : "The Grand Duke’s Fox Princess ⎝⎝MGCHAN⎠⎠",
+},
+{
+    "#url"     : "https://bato.to/title/134270-removed",
+    "#category": ("", "bato", "manga"),
+    "#class"   : bato.BatoMangaExtractor,
+    "#exception": exception.NotFoundError
+}
+)