[foolslide] dynamically generate extractor classes

pull/170/head
Mike Fährmann 6 years ago
parent 58a9eede38
commit e1bf3b225e
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -19,7 +19,6 @@ modules = [
"bobx",
"danbooru",
"deviantart",
"dokireader",
"dynastyscans",
"e621",
"exhentai",
@ -29,7 +28,7 @@ modules = [
"gfycat",
"hbrowse",
"hentai2read",
"hentaicafe",
# "hentaicafe",
"hentaifoundry",
"hentaifox",
"hentaihere",
@ -41,9 +40,7 @@ modules = [
"imgth",
"imgur",
"instagram",
"jaiminisbox",
"khinsider",
"kireicake",
"kissmanga",
"komikcast",
"konachan",
@ -65,17 +62,14 @@ modules = [
"piczel",
"pinterest",
"pixiv",
"powermanga",
"reactor",
"readcomiconline",
"reddit",
"rule34",
"safebooru",
"sankaku",
"seaotterscans",
"seiga",
"senmanga",
"sensescans",
"simplyhentai",
"slideshare",
"smugmug",
@ -84,11 +78,11 @@ modules = [
"twitter",
"wallhaven",
"warosu",
"worldthree",
"yandere",
"xvideos",
"yuki",
"foolfuuka",
"foolslide",
"mastodon",
"imagehosts",
"directlink",

@ -1,32 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2016-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://kobato.hologfx.com/reader/"""
from . import foolslide
class DokireaderChapterExtractor(foolslide.FoolslideChapterExtractor):
"""Extractor for manga-chapters from kobato.hologfx.com"""
category = "dokireader"
pattern = foolslide.chapter_pattern(r"kobato\.hologfx\.com/reader")
test = [(("https://kobato.hologfx.com/reader/read/"
"hitoribocchi_no_oo_seikatsu/en/3/34"), {
"keyword": "998d1d523da028284b8dd4b7b54ceae4af6cb65a",
})]
class DokireaderMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from kobato.hologfx.com"""
category = "dokireader"
pattern = foolslide.manga_pattern(r"kobato\.hologfx\.com/reader")
test = [(("https://kobato.hologfx.com/reader/series/"
"boku_ha_ohimesama_ni_narenai/"), {
"url": "1c1f5a7258ce4f631f5fc32be548d78a6a57990d",
"keyword": "614d89a6045b85c822cbd3e67578ea7577dfc995",
})]

@ -1,48 +1,27 @@
# -*- coding: utf-8 -*-
# Copyright 2016-2018 Mike Fährmann
# Copyright 2016-2019 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Base classes for extractors for FoOlSlide based sites"""
"""Extractors for FoOlSlide based sites"""
from .common import SharedConfigExtractor, MangaExtractor, Message
from .. import text, util
from .. import text, util, config
import base64
import json
CHAPTER_RE = (
r"/read/[^/]+"
r"/(?P<lang>[a-z-]+)"
r"/(?P<volume>\d+)"
r"/(?P<chapter>\d+)"
r"(?:/(?P<chapter_minor>\d+))?)"
)
MANGA_RE = (
r"/series/[^/]+/?$)"
)
def chapter_pattern(domain_re):
return [r"(?:https?://)?(" + domain_re + CHAPTER_RE]
def manga_pattern(domain_re):
return [r"(?:https?://)?(" + domain_re + MANGA_RE]
import re
class FoolslideExtractor(SharedConfigExtractor):
"""Base class for FoOlSlide extractors"""
basecategory = "foolslide"
scheme = "https"
def request(self, url):
return SharedConfigExtractor.request(
self, url, encoding="utf-8", method="post", data={"adult": "true"})
self, url, encoding="utf-8", method="POST", data={"adult": "true"})
@staticmethod
def parse_chapter_url(url, data):
@ -59,15 +38,16 @@ class FoolslideExtractor(SharedConfigExtractor):
class FoolslideChapterExtractor(FoolslideExtractor):
"""Base class for chapter extractors for FoOlSlide based sites"""
subcategory = "chapter"
directory_fmt = ["{category}", "{manga}", "{chapter_string}"]
directory_fmt = [
"{category}", "{manga}", "{chapter_string}"]
filename_fmt = (
"{manga}_c{chapter:>03}{chapter_minor}_{page:>03}.{extension}")
archive_fmt = "{id}"
method = "default"
decode = "default"
def __init__(self, match, url=None):
def __init__(self, match):
FoolslideExtractor.__init__(self)
self.url = url or self.scheme + "://" + match.group(1)
self.url = self.root + match.group(1)
def items(self):
page = self.request(self.url).text
@ -107,10 +87,10 @@ class FoolslideChapterExtractor(FoolslideExtractor):
def get_images(self, page):
"""Return a list of all images in this chapter"""
if self.method == "base64":
if self.decode == "base64":
base64_data = text.extract(page, 'atob("', '"')[0].encode()
data = base64.b64decode(base64_data).decode()
elif self.method == "double":
elif self.decode == "double":
pos = page.find("[{")
data = text.extract(page, " = ", ";", pos)[0]
else:
@ -121,6 +101,10 @@ class FoolslideChapterExtractor(FoolslideExtractor):
class FoolslideMangaExtractor(FoolslideExtractor, MangaExtractor):
"""Base class for manga extractors for FoOlSlide based sites"""
def __init__(self, match):
FoolslideExtractor.__init__(self)
self.url = self.root + match.group(1)
def chapters(self, page):
"""Return a list of all chapter urls"""
manga , pos = text.extract(page, '<h1 class="title">', '</h1>')
@ -143,3 +127,193 @@ class FoolslideMangaExtractor(FoolslideExtractor, MangaExtractor):
"group": group, "chapter_string": chapter,
"title": chapter.partition(": ")[2] or "",
})))
def generate_extractors():
"""Dynamically generate Extractor classes for FoOlSlide instances"""
symtable = globals()
extractors = config.get(("extractor", "foolslide"))
if extractors:
EXTRACTORS.update(extractors)
for category, info in EXTRACTORS.items():
if not isinstance(info, dict):
continue
root = info["root"]
domain = root[root.index(":") + 3:]
pattern = info.get("pattern") or re.escape(domain)
name = (info.get("name") or category).capitalize()
class ChExtr(FoolslideChapterExtractor):
pass
ChExtr.__name__ = ChExtr.__qualname__ = name + "ChapterExtractor"
ChExtr.__doc__ = "Extractor for manga-chapters from " + domain
ChExtr.category = category
ChExtr.pattern = [r"(?:https?://)?" + pattern +
r"(/read/[^/?&#]+/[a-z-]+/\d+/\d+(?:/\d+)?)"]
ChExtr.test = info.get("test-chapter")
ChExtr.root = root
if "decode" in info:
ChExtr.decode = info["decode"]
symtable[ChExtr.__name__] = ChExtr
class MaExtr(FoolslideMangaExtractor):
pass
MaExtr.__name__ = MaExtr.__qualname__ = name + "MangaExtractor"
MaExtr.__doc__ = "Extractor for manga from " + domain
MaExtr.category = category
MaExtr.pattern = [r"(?:https?://)?" + pattern + r"(/series/[^/?&#]+)"]
MaExtr.test = info.get("test-manga")
MaExtr.root = root
symtable[MaExtr.__name__] = MaExtr
EXTRACTORS = {
"dokireader": {
"root": "https://kobato.hologfx.com/reader",
"test-chapter": [
(("https://kobato.hologfx.com/reader/read/"
"hitoribocchi_no_oo_seikatsu/en/3/34"), {
"keyword": "998d1d523da028284b8dd4b7b54ceae4af6cb65a",
}),
],
"test-manga": [
(("https://kobato.hologfx.com/reader/series/"
"boku_ha_ohimesama_ni_narenai/"), {
"url": "1c1f5a7258ce4f631f5fc32be548d78a6a57990d",
"keyword": "614d89a6045b85c822cbd3e67578ea7577dfc995",
}),
],
},
"jaiminisbox": {
"root": "https://jaiminisbox.com/reader",
"pattern": r"(?:www\.)?jaiminisbox\.com/reader",
"decode": "base64",
"test-chapter": [
("https://jaiminisbox.com/reader/read/uratarou/en/0/1/", {
"keyword": "d8919bc8f0351b44e938862214e654401962b5a5",
}),
("https://jaiminisbox.com/reader/read/dr-stone/en/0/16/", {
"keyword": "9b658599651f1ae87cab3e0e29dd21e8337a362c",
}),
],
"test-manga": [
("https://jaiminisbox.com/reader/series/sora_no_kian/", {
"url": "66612be177dc3b3fa1d1f537ef02f4f701b163ea",
"keyword": "0908a4145bb03acc4210f5d01169988969f5acd1",
}),
]
},
"kireicake": {
"root": "https://reader.kireicake.com",
"test-chapter": [
("https://reader.kireicake.com/read/wonderland/en/1/1/", {
"url": "b2d36bc0bc67e4c461c3a4d6444a2fd339f5d07e",
"keyword": "47e0cf69f95ab3b820bda05014aec38d3b824018",
}),
],
"test-manga": [
("https://reader.kireicake.com/series/wonderland/", {
"url": "d067b649af1cc88fa8c8b698fde04a10909fd169",
"keyword": "99caa336a9d48e27e3b8e56a0a1e6faf9fc13a51",
}),
],
},
"powermanga": {
"root": "https://read.powermanga.org",
"pattern": r"read(?:er)?\.powermanga\.org",
"test-chapter": [
(("https://read.powermanga.org"
"/read/one_piece_digital_colour_comics/en/0/75/"), {
"url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384",
"keyword": "9985bcb78491dff9c725958b06bba606be51b6d3",
}),
],
"test-manga": [
(("https://read.powermanga.org"
"/series/one_piece_digital_colour_comics/"), {
"count": ">= 1",
"keyword": {
"chapter": int,
"chapter_minor": str,
"chapter_string": str,
"group": "PowerManga",
"lang": "en",
"language": "English",
"manga": "One Piece Digital Colour Comics",
"title": str,
"volume": int,
},
}),
],
},
"seaotterscans": {
"root": "https://reader.seaotterscans.com",
"test-chapter": [
("https://reader.seaotterscans.com/read/100_days/en/0/5/", {
"url": "63d46b8883cc652dfe8bd5be4492160dd31f06a8",
"keyword": "5349c2fbaa88070e6af600de17a6c4e212243e8e",
}),
],
"test-manga": [
("https://reader.seaotterscans.com/series/marry_me/", {
"url": "fdbacabfa566a6baeb3f01bb46cbda0577bd4bbe",
"keyword": "61d3388d73df12f64361892b47a9398df4a5947c",
}),
],
},
"sensescans": {
"root": "http://sensescans.com/reader",
"pattern": r"(?:(?:www\.)?sensescans\.com/reader"
r"|reader\.sensescans\.com)",
"test-chapter": [
(("http://reader.sensescans.com/read/"
"magi__labyrinth_of_magic/en/37/369/"), {
"url": "a399ef037cdfbc25b09d435cc2ea1e3e454a6812",
"keyword": "43ba75615d3e77d507808b0f3a8fd7fc72232a60",
}),
(("http://sensescans.com/reader/read/"
"magi__labyrinth_of_magic/en/37/369/"), {
"url": "a399ef037cdfbc25b09d435cc2ea1e3e454a6812",
"keyword": "43ba75615d3e77d507808b0f3a8fd7fc72232a60",
}),
],
"test-manga": [
("http://sensescans.com/reader/series/hakkenden/", {
"url": "2360ccb0ead0ff2f5e27b7aef7eb17b9329de2f2",
"keyword": "122cf92c32e6428c50f56ffaf29d06b96750ed71",
}),
],
},
"worldthree": {
"root": "http://www.slide.world-three.org",
"pattern": r"(?:www\.)?slide\.world-three\.org",
"test-chapter": [
(("http://www.slide.world-three.org"
"/read/black_bullet/en/2/7/page/1"), {
"url": "be2f04f6e2d311b35188094cfd3e768583271584",
"keyword": "28edfeccc92f7ea29546d5616e689dcfcbac59d9",
}),
(("http://www.slide.world-three.org"
"/read/idolmster_cg_shuffle/en/0/4/2/"), {
"url": "6028ea5ca282744f925dfad92eeb98509f9cc78c",
"keyword": "d478e9f20847deb1844dba318acaa8b91c19468a",
}),
],
"test-manga": [
("http://www.slide.world-three.org/series/black_bullet/", {
"url": "5743b93512d26e6b540d90a7a5d69208b6d4a738",
"keyword": "3a24f1088b4d7f3b798a96163f21ca251293a120",
}),
],
},
}
generate_extractors()

@ -1,36 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2016-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://jaiminisbox.com/"""
from . import foolslide
class JaiminisboxChapterExtractor(foolslide.FoolslideChapterExtractor):
"""Extractor for manga-chapters from jaiminisbox.com"""
category = "jaiminisbox"
pattern = foolslide.chapter_pattern(r"(?:www\.)?jaiminisbox\.com/reader")
test = [
("https://jaiminisbox.com/reader/read/uratarou/en/0/1/", {
"keyword": "d8919bc8f0351b44e938862214e654401962b5a5",
}),
("https://jaiminisbox.com/reader/read/dr-stone/en/0/16/", {
"keyword": "9b658599651f1ae87cab3e0e29dd21e8337a362c",
}),
]
method = "base64"
class JaiminisboxMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from jaiminisbox.com"""
category = "jaiminisbox"
pattern = foolslide.manga_pattern(r"(?:www\.)?jaiminisbox\.com/reader")
test = [("https://jaiminisbox.com/reader/series/sora_no_kian/", {
"url": "66612be177dc3b3fa1d1f537ef02f4f701b163ea",
"keyword": "0908a4145bb03acc4210f5d01169988969f5acd1",
})]

@ -1,31 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://reader.kireicake.com/"""
from . import foolslide
class KireicakeChapterExtractor(foolslide.FoolslideChapterExtractor):
"""Extractor for manga-chapters from reader.kireicake.com"""
category = "kireicake"
pattern = foolslide.chapter_pattern(r"reader\.kireicake\.com")
test = [("https://reader.kireicake.com/read/wonderland/en/1/1/", {
"url": "b2d36bc0bc67e4c461c3a4d6444a2fd339f5d07e",
"keyword": "47e0cf69f95ab3b820bda05014aec38d3b824018",
})]
class KireicakeMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from reader.kireicake.com"""
category = "kireicake"
pattern = foolslide.manga_pattern(r"reader\.kireicake\.com")
test = [("https://reader.kireicake.com/series/wonderland/", {
"url": "d067b649af1cc88fa8c8b698fde04a10909fd169",
"keyword": "99caa336a9d48e27e3b8e56a0a1e6faf9fc13a51",
})]

@ -1,43 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2015-2018 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://powermanga.org/"""
from . import foolslide
class PowermangaChapterExtractor(foolslide.FoolslideChapterExtractor):
"""Extractor for manga-chapters from powermanga.org"""
category = "powermanga"
pattern = foolslide.chapter_pattern(r"read(?:er)?\.powermanga\.org")
test = [(("https://read.powermanga.org"
"/read/one_piece_digital_colour_comics/en/0/75/"), {
"url": "854c5817f8f767e1bccd05fa9d58ffb5a4b09384",
"keyword": "9985bcb78491dff9c725958b06bba606be51b6d3",
})]
class PowermangaMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from powermanga.org"""
category = "powermanga"
pattern = foolslide.manga_pattern(r"read\.powermanga\.org")
test = [(("https://read.powermanga.org"
"/series/one_piece_digital_colour_comics/"), {
"count": ">= 1",
"keyword": {
"chapter": int,
"chapter_minor": str,
"chapter_string": str,
"group": "PowerManga",
"lang": "en",
"language": "English",
"manga": "One Piece Digital Colour Comics",
"title": str,
"volume": int,
},
})]

@ -1,31 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://reader.seaotterscans.com/"""
from . import foolslide
class SeaotterscansChapterExtractor(foolslide.FoolslideChapterExtractor):
"""Extractor for manga-chapters from reader.seaotterscans.com"""
category = "seaotterscans"
pattern = foolslide.chapter_pattern(r"reader\.seaotterscans\.com")
test = [("https://reader.seaotterscans.com/read/100_days/en/0/5/", {
"url": "63d46b8883cc652dfe8bd5be4492160dd31f06a8",
"keyword": "5349c2fbaa88070e6af600de17a6c4e212243e8e",
})]
class SeaotterscansMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from reader.seaotterscans.com"""
category = "seaotterscans"
pattern = foolslide.manga_pattern(r"reader\.seaotterscans\.com")
test = [("https://reader.seaotterscans.com/series/marry_me/", {
"url": "fdbacabfa566a6baeb3f01bb46cbda0577bd4bbe",
"keyword": "61d3388d73df12f64361892b47a9398df4a5947c",
})]

@ -1,50 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2016-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for http://sensescans.com/"""
from . import foolslide
class SensescansExtractor():
"""Base class for extractors for sensescans.com"""
category = "sensescans"
def __init__(self, match):
url = "http://sensescans.com/reader" + match.group(1)
super().__init__(match, url)
class SensescansChapterExtractor(SensescansExtractor,
foolslide.FoolslideChapterExtractor):
"""Extractor for manga-chapters from sensescans.com"""
pattern = [(r"(?:https?://)?(?:www\.|reader\.)?sensescans\.com"
r"(?:/reader)?(" + foolslide.CHAPTER_RE)]
test = [
(("http://reader.sensescans.com/read/"
"magi__labyrinth_of_magic/en/37/369/"), {
"url": "a399ef037cdfbc25b09d435cc2ea1e3e454a6812",
"keyword": "43ba75615d3e77d507808b0f3a8fd7fc72232a60",
}),
(("http://sensescans.com/reader/read/"
"magi__labyrinth_of_magic/en/37/369/"), {
"url": "a399ef037cdfbc25b09d435cc2ea1e3e454a6812",
"keyword": "43ba75615d3e77d507808b0f3a8fd7fc72232a60",
}),
]
class SensescansMangaExtractor(SensescansExtractor,
foolslide.FoolslideMangaExtractor):
"""Extractor for manga from sensescans.com"""
pattern = [(r"(?:https?://)?(?:www\.|reader\.)?sensescans\.com"
r"(?:/reader)?(" + foolslide.MANGA_RE)]
test = [("http://sensescans.com/reader/series/hakkenden/", {
"url": "2360ccb0ead0ff2f5e27b7aef7eb17b9329de2f2",
"keyword": "122cf92c32e6428c50f56ffaf29d06b96750ed71",
})]

@ -1,41 +0,0 @@
# -*- coding: utf-8 -*-
# Copyright 2016-2017 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for http://www.slide.world-three.org/"""
from . import foolslide
class WorldthreeChapterExtractor(foolslide.FoolslideChapterExtractor):
"""Extractor for manga-chapters from slide.world-three.org"""
category = "worldthree"
pattern = foolslide.chapter_pattern(r"(?:www\.)?slide\.world-three\.org")
test = [
(("http://www.slide.world-three.org"
"/read/black_bullet/en/2/7/page/1"), {
"url": "be2f04f6e2d311b35188094cfd3e768583271584",
"keyword": "28edfeccc92f7ea29546d5616e689dcfcbac59d9",
}),
(("http://www.slide.world-three.org"
"/read/idolmster_cg_shuffle/en/0/4/2/"), {
"url": "6028ea5ca282744f925dfad92eeb98509f9cc78c",
"keyword": "d478e9f20847deb1844dba318acaa8b91c19468a",
}),
]
scheme = "http"
class WorldthreeMangaExtractor(foolslide.FoolslideMangaExtractor):
"""Extractor for manga from slide.world-three.org"""
category = "worldthree"
pattern = foolslide.manga_pattern(r"(?:www\.)?slide\.world-three\.org")
test = [("http://www.slide.world-three.org/series/black_bullet/", {
"url": "5743b93512d26e6b540d90a7a5d69208b6d4a738",
"keyword": "3a24f1088b4d7f3b798a96163f21ca251293a120",
})]
scheme = "http"
Loading…
Cancel
Save