|
|
|
@ -9,7 +9,7 @@
|
|
|
|
|
"""Extract manga-chapters and entire manga from https://mangadex.org/"""
|
|
|
|
|
|
|
|
|
|
from .common import ChapterExtractor, MangaExtractor
|
|
|
|
|
from .. import text, util
|
|
|
|
|
from .. import text, util, exception
|
|
|
|
|
from urllib.parse import urljoin
|
|
|
|
|
import json
|
|
|
|
|
import re
|
|
|
|
@ -23,6 +23,7 @@ class MangadexExtractor():
|
|
|
|
|
|
|
|
|
|
class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor):
|
|
|
|
|
"""Extractor for manga-chapters from mangadex.org"""
|
|
|
|
|
archive_fmt = "{chapter_id}_{page}"
|
|
|
|
|
pattern = [r"(?:https?://)?(?:www\.)?mangadex\.(?:org|com)/chapter/(\d+)"]
|
|
|
|
|
test = [
|
|
|
|
|
("https://mangadex.org/chapter/122094", {
|
|
|
|
@ -34,6 +35,10 @@ class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor):
|
|
|
|
|
"count": 64,
|
|
|
|
|
"keyword": "9b1b7292f7dbcf10983fbdc34b8cdceeb47328ee",
|
|
|
|
|
}),
|
|
|
|
|
# NotFoundError
|
|
|
|
|
("https://mangadex.org/chapter/1", {
|
|
|
|
|
"exception": exception.NotFoundError,
|
|
|
|
|
})
|
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
def __init__(self, match):
|
|
|
|
@ -42,6 +47,9 @@ class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor):
|
|
|
|
|
ChapterExtractor.__init__(self, url)
|
|
|
|
|
|
|
|
|
|
def get_metadata(self, page):
|
|
|
|
|
if "title='Warning'" in page and " does not exist." in page:
|
|
|
|
|
raise exception.NotFoundError("chapter")
|
|
|
|
|
|
|
|
|
|
info , pos = text.extract(page, '="og:title" content="', '"')
|
|
|
|
|
manga_id, pos = text.extract(page, '/images/manga/', '.', pos)
|
|
|
|
|
_ , pos = text.extract(page, ' id="jump_group"', '', pos)
|
|
|
|
@ -88,14 +96,14 @@ class MangadexMangaExtractor(MangadexExtractor, MangaExtractor):
|
|
|
|
|
pattern = [r"(?:https?://)?(?:www\.)?(mangadex\.(?:org|com)/manga/\d+)"]
|
|
|
|
|
test = [
|
|
|
|
|
("https://mangadex.org/manga/2946/souten-no-koumori", {
|
|
|
|
|
"url": "9e77934759828458d0424473922e41f348719472",
|
|
|
|
|
"count": ">= 1",
|
|
|
|
|
"keywords": {
|
|
|
|
|
"manga": "Souten no Koumori",
|
|
|
|
|
"manga_id": 2946,
|
|
|
|
|
"title": "Oneshot",
|
|
|
|
|
"volume": int,
|
|
|
|
|
"chapter": int,
|
|
|
|
|
"chapter_minor": str,
|
|
|
|
|
"volume": 0,
|
|
|
|
|
"chapter": 0,
|
|
|
|
|
"chapter_minor": "",
|
|
|
|
|
"chapter_id": int,
|
|
|
|
|
"group": str,
|
|
|
|
|
"contributor": str,
|
|
|
|
@ -106,16 +114,22 @@ class MangadexMangaExtractor(MangadexExtractor, MangaExtractor):
|
|
|
|
|
},
|
|
|
|
|
}),
|
|
|
|
|
]
|
|
|
|
|
scheme = "https"
|
|
|
|
|
per_page = 100
|
|
|
|
|
|
|
|
|
|
def chapters(self, page):
|
|
|
|
|
results = []
|
|
|
|
|
extr = text.extract
|
|
|
|
|
num = 1
|
|
|
|
|
|
|
|
|
|
manga = text.unescape(extr(
|
|
|
|
|
page, '"og:title" content="', '"')[0].rpartition(" (")[0])
|
|
|
|
|
manga_id = util.safe_int(extr(
|
|
|
|
|
page, '/images/manga/', '.')[0])
|
|
|
|
|
|
|
|
|
|
while True:
|
|
|
|
|
before = len(results)
|
|
|
|
|
|
|
|
|
|
for info in text.extract_iter(page, "<tr id=", "</tr>"):
|
|
|
|
|
chid , pos = extr(info, 'data-chapter-id="', '"')
|
|
|
|
|
chapter , pos = extr(info, 'data-chapter-num="', '"', pos)
|
|
|
|
@ -145,4 +159,8 @@ class MangadexMangaExtractor(MangadexExtractor, MangaExtractor):
|
|
|
|
|
"language": language,
|
|
|
|
|
}))
|
|
|
|
|
|
|
|
|
|
if len(results) - before != self.per_page:
|
|
|
|
|
return results
|
|
|
|
|
|
|
|
|
|
num += 1
|
|
|
|
|
page = self.request("{}/_/{}/".format(self.url, num)).text
|
|
|
|
|