parent
b58449fd88
commit
749fbbfa6c
@ -0,0 +1,136 @@
|
||||
{
|
||||
"base-directory": "/tmp/",
|
||||
"netrc": false,
|
||||
|
||||
"downloader":
|
||||
{
|
||||
"part": true,
|
||||
"part-directory": null,
|
||||
"http":
|
||||
{
|
||||
"rate": null,
|
||||
"retries": 5,
|
||||
"timeout": 30,
|
||||
"verify": true
|
||||
}
|
||||
},
|
||||
"extractor":
|
||||
{
|
||||
"archive": null,
|
||||
"proxy": null,
|
||||
"skip": true,
|
||||
"sleep": 0,
|
||||
|
||||
"pixiv":
|
||||
{
|
||||
"user":
|
||||
{
|
||||
"directory": ["{category}", "{user[id]}"]
|
||||
},
|
||||
"bookmark":
|
||||
{
|
||||
"directory": ["{category}", "my bookmarks"]
|
||||
},
|
||||
"ugoira": true,
|
||||
"username": null,
|
||||
"password": null
|
||||
},
|
||||
"batoto":
|
||||
{
|
||||
"username": null,
|
||||
"password": null
|
||||
},
|
||||
"exhentai":
|
||||
{
|
||||
"wait-min": 3,
|
||||
"wait-max": 6,
|
||||
"original": true,
|
||||
"username": null,
|
||||
"password": null,
|
||||
"cookies": {
|
||||
"igneous": null,
|
||||
"s": null,
|
||||
"yay": "louder"
|
||||
}
|
||||
},
|
||||
"nijie":
|
||||
{
|
||||
"username": null,
|
||||
"password": null
|
||||
},
|
||||
"sankaku":
|
||||
{
|
||||
"wait-min": 2,
|
||||
"wait-max": 4,
|
||||
"username": null,
|
||||
"password": null
|
||||
},
|
||||
"seiga":
|
||||
{
|
||||
"username": null,
|
||||
"password": null
|
||||
},
|
||||
"gelbooru":
|
||||
{
|
||||
"filename": "{category}_{id:>07}_{md5}.{extension}",
|
||||
"api": true
|
||||
},
|
||||
"reddit":
|
||||
{
|
||||
"refresh-token": null,
|
||||
"comments": 500,
|
||||
"morecomments": false,
|
||||
"date-min": 0,
|
||||
"date-max": 253402210800,
|
||||
"date-format": "%Y-%m-%dT%H:%M:%S",
|
||||
"id-min": "0",
|
||||
"id-max": "ZIK0ZJ",
|
||||
"recursion": 0
|
||||
},
|
||||
"flickr":
|
||||
{
|
||||
"access-token": null,
|
||||
"access-token-secret": null,
|
||||
"metadata": false,
|
||||
"size-max": null
|
||||
},
|
||||
"deviantart":
|
||||
{
|
||||
"refresh-token": null,
|
||||
"flat": true,
|
||||
"mature": true,
|
||||
"original": true
|
||||
},
|
||||
"gfycat":
|
||||
{
|
||||
"format": "mp4"
|
||||
},
|
||||
"imgur":
|
||||
{
|
||||
"mp4": true
|
||||
},
|
||||
"tumblr":
|
||||
{
|
||||
"posts": "photo",
|
||||
"inline": false,
|
||||
"reblogs": true,
|
||||
"external": false
|
||||
},
|
||||
"recursive":
|
||||
{
|
||||
"blacklist": ["directlink", "oauth", "recursive", "test"]
|
||||
},
|
||||
"oauth":
|
||||
{
|
||||
"browser": true
|
||||
}
|
||||
},
|
||||
"output":
|
||||
{
|
||||
"mode": "auto",
|
||||
"shorten": true,
|
||||
"progress": true,
|
||||
"logfile": null,
|
||||
"unsupportedfile": null
|
||||
}
|
||||
}
|
@ -0,0 +1,148 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
# Copyright 2018 Mike Fährmann
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License version 2 as
|
||||
# published by the Free Software Foundation.
|
||||
|
||||
"""Extract manga-chapters and entire manga from https://mangadex.org/"""
|
||||
|
||||
from .common import ChapterExtractor, MangaExtractor
|
||||
from .. import text, util
|
||||
from urllib.parse import urljoin
|
||||
import json
|
||||
import re
|
||||
|
||||
|
||||
class MangadexExtractor():
|
||||
"""Base class for mangadex extractors"""
|
||||
category = "mangadex"
|
||||
root = "https://mangadex.org"
|
||||
|
||||
|
||||
class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor):
|
||||
"""Extractor for manga-chapters from mangadex.org"""
|
||||
pattern = [r"(?:https?://)?(?:www\.)?mangadex\.(?:org|com)/chapter/(\d+)"]
|
||||
test = [
|
||||
("https://mangadex.org/chapter/122094", {
|
||||
"keyword": "b4c83fe41f125eae745c2e00d29e087cc4eb78df",
|
||||
"content": "7ab3bef5caccb62b881f8e6e70359d3c7be8137f",
|
||||
}),
|
||||
# oneshot
|
||||
("https://mangadex.org/chapter/138086", {
|
||||
"count": 64,
|
||||
"keyword": "9b1b7292f7dbcf10983fbdc34b8cdceeb47328ee",
|
||||
}),
|
||||
]
|
||||
|
||||
def __init__(self, match):
|
||||
self.chapter_id = match.group(1)
|
||||
url = self.root + "/chapter/" + self.chapter_id
|
||||
ChapterExtractor.__init__(self, url)
|
||||
|
||||
def get_metadata(self, page):
|
||||
info , pos = text.extract(page, '="og:title" content="', '"')
|
||||
manga_id, pos = text.extract(page, '/images/manga/', '.', pos)
|
||||
_ , pos = text.extract(page, ' id="jump_group"', '', pos)
|
||||
_ , pos = text.extract(page, ' selected ', '', pos)
|
||||
language, ___ = text.extract(page, " title='", "'", pos-100)
|
||||
group , pos = text.extract(page, '>', '<', pos)
|
||||
|
||||
info = text.unescape(info)
|
||||
match = re.match(
|
||||
r"(?:(?:Vol\. (\d+) )?Ch\. (\d+)([^ ]*)|(.*)) "
|
||||
r"\(([^)]+)\)",
|
||||
info)
|
||||
|
||||
return {
|
||||
"manga": match.group(5),
|
||||
"manga_id": util.safe_int(manga_id),
|
||||
"volume": util.safe_int(match.group(1)),
|
||||
"chapter": util.safe_int(match.group(2)),
|
||||
"chapter_minor": match.group(3) or "",
|
||||
"chapter_id": util.safe_int(self.chapter_id),
|
||||
"chapter_string": info.rstrip(" - MangaDex"),
|
||||
"group": text.unescape(group),
|
||||
"lang": util.language_to_code(language),
|
||||
"language": language,
|
||||
}
|
||||
|
||||
def get_images(self, page):
|
||||
dataurl , pos = text.extract(page, "var dataurl = '", "'")
|
||||
pagelist, pos = text.extract(page, "var page_array = [", "]", pos)
|
||||
server , pos = text.extract(page, "var server = '", "'", pos)
|
||||
|
||||
base = urljoin(self.root, server + dataurl + "/")
|
||||
|
||||
return [
|
||||
(base + page, None)
|
||||
for page in json.loads(
|
||||
"[" + pagelist.replace("'", '"').rstrip(",") + "]"
|
||||
)
|
||||
]
|
||||
|
||||
|
||||
class MangadexMangaExtractor(MangadexExtractor, MangaExtractor):
|
||||
"""Extractor for manga from mangadex.org"""
|
||||
pattern = [r"(?:https?://)?(?:www\.)?(mangadex\.(?:org|com)/manga/\d+)"]
|
||||
test = [
|
||||
("https://mangadex.org/manga/2946/souten-no-koumori", {
|
||||
"url": "9e77934759828458d0424473922e41f348719472",
|
||||
"keywords": {
|
||||
"manga": "Souten no Koumori",
|
||||
"manga_id": 2946,
|
||||
"title": "Oneshot",
|
||||
"volume": int,
|
||||
"chapter": int,
|
||||
"chapter_minor": str,
|
||||
"chapter_id": int,
|
||||
"group": str,
|
||||
"contributor": str,
|
||||
"date": str,
|
||||
"views": int,
|
||||
"lang": str,
|
||||
"language": str,
|
||||
},
|
||||
}),
|
||||
]
|
||||
|
||||
def chapters(self, page):
|
||||
results = []
|
||||
extr = text.extract
|
||||
|
||||
manga = text.unescape(extr(
|
||||
page, '"og:title" content="', '"')[0].rpartition(" (")[0])
|
||||
manga_id = util.safe_int(extr(
|
||||
page, '/images/manga/', '.')[0])
|
||||
|
||||
for info in text.extract_iter(page, "<tr id=", "</tr>"):
|
||||
chid , pos = extr(info, 'data-chapter-id="', '"')
|
||||
chapter , pos = extr(info, 'data-chapter-num="', '"', pos)
|
||||
volume , pos = extr(info, 'data-volume-num="', '"', pos)
|
||||
title , pos = extr(info, 'data-chapter-name="', '"', pos)
|
||||
language, pos = extr(info, " title='", "'", pos)
|
||||
group , pos = extr(info, "<td>", "</td>", pos)
|
||||
user , pos = extr(info, "<td>", "</td>", pos)
|
||||
views , pos = extr(info, ">", "<", pos)
|
||||
date , pos = extr(info, ' datetime="', '"', pos)
|
||||
|
||||
chapter, sep, minor = chapter.partition(".")
|
||||
|
||||
results.append((self.root + "/chapter/" + chid, {
|
||||
"manga": manga,
|
||||
"manga_id": util.safe_int(manga_id),
|
||||
"title": text.unescape(title),
|
||||
"volume": util.safe_int(volume),
|
||||
"chapter": util.safe_int(chapter),
|
||||
"chapter_minor": sep + minor,
|
||||
"chapter_id": util.safe_int(chid),
|
||||
"group": text.unescape(text.remove_html(group)),
|
||||
"contributor": text.remove_html(user),
|
||||
"views": util.safe_int(views),
|
||||
"date": date,
|
||||
"lang": util.language_to_code(language),
|
||||
"language": language,
|
||||
}))
|
||||
|
||||
return results
|
Loading…
Reference in new issue