[mangadex] general improvements

- support >100 chapter entries per manga
- custom archive ID format
- detect non-existing chapters
pull/81/head
Mike Fährmann 7 years ago
parent 749fbbfa6c
commit 1400868f53
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -1,6 +1,8 @@
# Changelog # Changelog
## Unreleased ## Unreleased
- Added support for:
- `mangadex` - https://mangadex.org/
## 1.3.0 - 2018-03-02 ## 1.3.0 - 2018-03-02
- Added `--proxy` to explicitly specify a proxy server ([#76](https://github.com/mikf/gallery-dl/issues/76)) - Added `--proxy` to explicitly specify a proxy server ([#76](https://github.com/mikf/gallery-dl/issues/76))

@ -47,7 +47,7 @@ Luscious https://luscious.net/ Albums
Manga Fox http://fanfox.net/ Chapters Manga Fox http://fanfox.net/ Chapters
Manga Here http://www.mangahere.co/ Chapters, Manga Manga Here http://www.mangahere.co/ Chapters, Manga
Manga Stream https://mangastream.com/ Chapters Manga Stream https://mangastream.com/ Chapters
Mangadex https://mangadex.org/ Chapters, Manga MangaDex https://mangadex.org/ Chapters, Manga
Mangapanda https://www.mangapanda.com/ Chapters, Manga Mangapanda https://www.mangapanda.com/ Chapters, Manga
MangaPark https://mangapark.me/ Chapters, Manga MangaPark https://mangapark.me/ Chapters, Manga
Mangareader https://www.mangareader.net/ Chapters, Manga Mangareader https://www.mangareader.net/ Chapters, Manga

@ -9,7 +9,7 @@
"""Extract manga-chapters and entire manga from https://mangadex.org/""" """Extract manga-chapters and entire manga from https://mangadex.org/"""
from .common import ChapterExtractor, MangaExtractor from .common import ChapterExtractor, MangaExtractor
from .. import text, util from .. import text, util, exception
from urllib.parse import urljoin from urllib.parse import urljoin
import json import json
import re import re
@ -23,6 +23,7 @@ class MangadexExtractor():
class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor): class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor):
"""Extractor for manga-chapters from mangadex.org""" """Extractor for manga-chapters from mangadex.org"""
archive_fmt = "{chapter_id}_{page}"
pattern = [r"(?:https?://)?(?:www\.)?mangadex\.(?:org|com)/chapter/(\d+)"] pattern = [r"(?:https?://)?(?:www\.)?mangadex\.(?:org|com)/chapter/(\d+)"]
test = [ test = [
("https://mangadex.org/chapter/122094", { ("https://mangadex.org/chapter/122094", {
@ -34,6 +35,10 @@ class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor):
"count": 64, "count": 64,
"keyword": "9b1b7292f7dbcf10983fbdc34b8cdceeb47328ee", "keyword": "9b1b7292f7dbcf10983fbdc34b8cdceeb47328ee",
}), }),
# NotFoundError
("https://mangadex.org/chapter/1", {
"exception": exception.NotFoundError,
})
] ]
def __init__(self, match): def __init__(self, match):
@ -42,6 +47,9 @@ class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor):
ChapterExtractor.__init__(self, url) ChapterExtractor.__init__(self, url)
def get_metadata(self, page): def get_metadata(self, page):
if "title='Warning'" in page and " does not exist." in page:
raise exception.NotFoundError("chapter")
info , pos = text.extract(page, '="og:title" content="', '"') info , pos = text.extract(page, '="og:title" content="', '"')
manga_id, pos = text.extract(page, '/images/manga/', '.', pos) manga_id, pos = text.extract(page, '/images/manga/', '.', pos)
_ , pos = text.extract(page, ' id="jump_group"', '', pos) _ , pos = text.extract(page, ' id="jump_group"', '', pos)
@ -88,14 +96,14 @@ class MangadexMangaExtractor(MangadexExtractor, MangaExtractor):
pattern = [r"(?:https?://)?(?:www\.)?(mangadex\.(?:org|com)/manga/\d+)"] pattern = [r"(?:https?://)?(?:www\.)?(mangadex\.(?:org|com)/manga/\d+)"]
test = [ test = [
("https://mangadex.org/manga/2946/souten-no-koumori", { ("https://mangadex.org/manga/2946/souten-no-koumori", {
"url": "9e77934759828458d0424473922e41f348719472", "count": ">= 1",
"keywords": { "keywords": {
"manga": "Souten no Koumori", "manga": "Souten no Koumori",
"manga_id": 2946, "manga_id": 2946,
"title": "Oneshot", "title": "Oneshot",
"volume": int, "volume": 0,
"chapter": int, "chapter": 0,
"chapter_minor": str, "chapter_minor": "",
"chapter_id": int, "chapter_id": int,
"group": str, "group": str,
"contributor": str, "contributor": str,
@ -106,43 +114,53 @@ class MangadexMangaExtractor(MangadexExtractor, MangaExtractor):
}, },
}), }),
] ]
scheme = "https"
per_page = 100
def chapters(self, page): def chapters(self, page):
results = [] results = []
extr = text.extract extr = text.extract
num = 1
manga = text.unescape(extr( manga = text.unescape(extr(
page, '"og:title" content="', '"')[0].rpartition(" (")[0]) page, '"og:title" content="', '"')[0].rpartition(" (")[0])
manga_id = util.safe_int(extr( manga_id = util.safe_int(extr(
page, '/images/manga/', '.')[0]) page, '/images/manga/', '.')[0])
for info in text.extract_iter(page, "<tr id=", "</tr>"): while True:
chid , pos = extr(info, 'data-chapter-id="', '"') before = len(results)
chapter , pos = extr(info, 'data-chapter-num="', '"', pos)
volume , pos = extr(info, 'data-volume-num="', '"', pos) for info in text.extract_iter(page, "<tr id=", "</tr>"):
title , pos = extr(info, 'data-chapter-name="', '"', pos) chid , pos = extr(info, 'data-chapter-id="', '"')
language, pos = extr(info, " title='", "'", pos) chapter , pos = extr(info, 'data-chapter-num="', '"', pos)
group , pos = extr(info, "<td>", "</td>", pos) volume , pos = extr(info, 'data-volume-num="', '"', pos)
user , pos = extr(info, "<td>", "</td>", pos) title , pos = extr(info, 'data-chapter-name="', '"', pos)
views , pos = extr(info, ">", "<", pos) language, pos = extr(info, " title='", "'", pos)
date , pos = extr(info, ' datetime="', '"', pos) group , pos = extr(info, "<td>", "</td>", pos)
user , pos = extr(info, "<td>", "</td>", pos)
chapter, sep, minor = chapter.partition(".") views , pos = extr(info, ">", "<", pos)
date , pos = extr(info, ' datetime="', '"', pos)
results.append((self.root + "/chapter/" + chid, {
"manga": manga, chapter, sep, minor = chapter.partition(".")
"manga_id": util.safe_int(manga_id),
"title": text.unescape(title), results.append((self.root + "/chapter/" + chid, {
"volume": util.safe_int(volume), "manga": manga,
"chapter": util.safe_int(chapter), "manga_id": util.safe_int(manga_id),
"chapter_minor": sep + minor, "title": text.unescape(title),
"chapter_id": util.safe_int(chid), "volume": util.safe_int(volume),
"group": text.unescape(text.remove_html(group)), "chapter": util.safe_int(chapter),
"contributor": text.remove_html(user), "chapter_minor": sep + minor,
"views": util.safe_int(views), "chapter_id": util.safe_int(chid),
"date": date, "group": text.unescape(text.remove_html(group)),
"lang": util.language_to_code(language), "contributor": text.remove_html(user),
"language": language, "views": util.safe_int(views),
})) "date": date,
"lang": util.language_to_code(language),
return results "language": language,
}))
if len(results) - before != self.per_page:
return results
num += 1
page = self.request("{}/_/{}/".format(self.url, num)).text

@ -37,6 +37,7 @@ CATEGORY_MAP = {
"kisscomic" : "KissComic", "kisscomic" : "KissComic",
"kissmanga" : "KissManga", "kissmanga" : "KissManga",
"loveisover" : "Love is Over Archive", "loveisover" : "Love is Over Archive",
"mangadex" : "MangaDex",
"mangafox" : "Manga Fox", "mangafox" : "Manga Fox",
"mangahere" : "Manga Here", "mangahere" : "Manga Here",
"mangapark" : "MangaPark", "mangapark" : "MangaPark",

Loading…
Cancel
Save