diff --git a/docs/supportedsites.md b/docs/supportedsites.md index e6a1c845..0ce12f8a 100644 --- a/docs/supportedsites.md +++ b/docs/supportedsites.md @@ -1052,6 +1052,16 @@ Consider all sites to be NSFW unless otherwise known. + + jschan Imageboards + + + 94chan + https://94chan.org/ + Boards, Threads + + + LynxChan Imageboards diff --git a/gallery_dl/extractor/__init__.py b/gallery_dl/extractor/__init__.py index 444075c1..b913e3c1 100644 --- a/gallery_dl/extractor/__init__.py +++ b/gallery_dl/extractor/__init__.py @@ -71,6 +71,7 @@ modules = [ "instagram", "issuu", "itaku", + "jschan", "kabeuchi", "keenspot", "kemonoparty", diff --git a/gallery_dl/extractor/jschan.py b/gallery_dl/extractor/jschan.py new file mode 100644 index 00000000..cc2c7dee --- /dev/null +++ b/gallery_dl/extractor/jschan.py @@ -0,0 +1,96 @@ +# -*- coding: utf-8 -*- + +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License version 2 as +# published by the Free Software Foundation. + +"""Extractors for jschan Imageboards""" + +from .common import BaseExtractor, Message +from .. import text +import itertools + + +class JschanExtractor(BaseExtractor): + basecategory = "jschan" + + +BASE_PATTERN = JschanExtractor.update({ + "94chan": { + "root": "https://94chan.org", + "pattern": r"94chan\.org" + } +}) + + +class JschanThreadExtractor(JschanExtractor): + """Extractor for jschan threads""" + subcategory = "thread" + directory_fmt = ("{category}", "{board}", + "{threadId} {subject[:50]|message[:50]}") + filename_fmt = "{postId}{num:?-//} {filename}.{extension}" + archive_fmt = "{board}_{postId}_{num}" + pattern = BASE_PATTERN + r"/([^/?#]+)/thread/(\d+)\.html" + test = ( + ("https://94chan.org/art/thread/25.html", { + "pattern": r"https://94chan.org/file/[0-9a-f]{64}(\.\w+)?", + "count": ">= 15" + }) + ) + + def __init__(self, match): + JschanExtractor.__init__(self, match) + index = match.lastindex + self.board = match.group(index-1) + self.thread = match.group(index) + + def items(self): + url = "{}/{}/thread/{}.json".format( + self.root, self.board, self.thread) + thread = self.request(url).json() + thread["threadId"] = thread["postId"] + posts = thread.pop("replies", ()) + + yield Message.Directory, thread + for post in itertools.chain((thread,), posts): + files = post.pop("files", ()) + if files: + thread.update(post) + for num, file in enumerate(files): + file.update(thread) + url = self.root + "/file/" + file["filename"] + file["num"] = num + file["count"] = len(files) + file["siteFilename"] = file["filename"] + text.nameext_from_url(file["originalFilename"], file) + yield Message.Url, url, file + + +class JschanBoardExtractor(JschanExtractor): + """Extractor for jschan boards""" + subcategory = "board" + pattern = ( + BASE_PATTERN + r"/([^/?#]+)(?:/index\.html|" + r"/catalog\.html|/\d+\.html|/?$)" + ) + test = ( + ("https://94chan.org/art/", { + "pattern": JschanThreadExtractor.pattern, + "count": ">= 30" + }), + ("https://94chan.org/art/2.html"), + ("https://94chan.org/art/catalog.html"), + ("https://94chan.org/art/index.html"), + ) + + def __init__(self, match): + JschanExtractor.__init__(self, match) + self.board = match.group(match.lastindex) + + def items(self): + url = "{}/{}/catalog.json".format(self.root, self.board) + for thread in self.request(url).json(): + url = "{}/{}/thread/{}.html".format( + self.root, self.board, thread["postId"]) + thread["_extractor"] = JschanThreadExtractor + yield Message.Queue, url, thread diff --git a/scripts/supportedsites.py b/scripts/supportedsites.py index 1df98ca4..e4c68eb6 100755 --- a/scripts/supportedsites.py +++ b/scripts/supportedsites.py @@ -254,6 +254,7 @@ BASE_MAP = { "foolslide" : "FoOlSlide Instances", "gelbooru_v01": "Gelbooru Beta 0.1.11", "gelbooru_v02": "Gelbooru Beta 0.2", + "jschan" : "jschan Imageboards", "lolisafe" : "lolisafe and chibisafe", "lynxchan" : "LynxChan Imageboards", "moebooru" : "Moebooru and MyImouto",