[mangadex] general improvements

- support >100 chapter entries per manga
- custom archive ID format
- detect non-existing chapters
pull/81/head
Mike Fährmann 7 years ago
parent 749fbbfa6c
commit 1400868f53
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -1,6 +1,8 @@
# Changelog
## Unreleased
- Added support for:
- `mangadex` - https://mangadex.org/
## 1.3.0 - 2018-03-02
- Added `--proxy` to explicitly specify a proxy server ([#76](https://github.com/mikf/gallery-dl/issues/76))

@ -47,7 +47,7 @@ Luscious https://luscious.net/ Albums
Manga Fox http://fanfox.net/ Chapters
Manga Here http://www.mangahere.co/ Chapters, Manga
Manga Stream https://mangastream.com/ Chapters
Mangadex https://mangadex.org/ Chapters, Manga
MangaDex https://mangadex.org/ Chapters, Manga
Mangapanda https://www.mangapanda.com/ Chapters, Manga
MangaPark https://mangapark.me/ Chapters, Manga
Mangareader https://www.mangareader.net/ Chapters, Manga

@ -9,7 +9,7 @@
"""Extract manga-chapters and entire manga from https://mangadex.org/"""
from .common import ChapterExtractor, MangaExtractor
from .. import text, util
from .. import text, util, exception
from urllib.parse import urljoin
import json
import re
@ -23,6 +23,7 @@ class MangadexExtractor():
class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor):
"""Extractor for manga-chapters from mangadex.org"""
archive_fmt = "{chapter_id}_{page}"
pattern = [r"(?:https?://)?(?:www\.)?mangadex\.(?:org|com)/chapter/(\d+)"]
test = [
("https://mangadex.org/chapter/122094", {
@ -34,6 +35,10 @@ class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor):
"count": 64,
"keyword": "9b1b7292f7dbcf10983fbdc34b8cdceeb47328ee",
}),
# NotFoundError
("https://mangadex.org/chapter/1", {
"exception": exception.NotFoundError,
})
]
def __init__(self, match):
@ -42,6 +47,9 @@ class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor):
ChapterExtractor.__init__(self, url)
def get_metadata(self, page):
if "title='Warning'" in page and " does not exist." in page:
raise exception.NotFoundError("chapter")
info , pos = text.extract(page, '="og:title" content="', '"')
manga_id, pos = text.extract(page, '/images/manga/', '.', pos)
_ , pos = text.extract(page, ' id="jump_group"', '', pos)
@ -88,14 +96,14 @@ class MangadexMangaExtractor(MangadexExtractor, MangaExtractor):
pattern = [r"(?:https?://)?(?:www\.)?(mangadex\.(?:org|com)/manga/\d+)"]
test = [
("https://mangadex.org/manga/2946/souten-no-koumori", {
"url": "9e77934759828458d0424473922e41f348719472",
"count": ">= 1",
"keywords": {
"manga": "Souten no Koumori",
"manga_id": 2946,
"title": "Oneshot",
"volume": int,
"chapter": int,
"chapter_minor": str,
"volume": 0,
"chapter": 0,
"chapter_minor": "",
"chapter_id": int,
"group": str,
"contributor": str,
@ -106,43 +114,53 @@ class MangadexMangaExtractor(MangadexExtractor, MangaExtractor):
},
}),
]
scheme = "https"
per_page = 100
def chapters(self, page):
results = []
extr = text.extract
num = 1
manga = text.unescape(extr(
page, '"og:title" content="', '"')[0].rpartition(" (")[0])
manga_id = util.safe_int(extr(
page, '/images/manga/', '.')[0])
for info in text.extract_iter(page, "<tr id=", "</tr>"):
chid , pos = extr(info, 'data-chapter-id="', '"')
chapter , pos = extr(info, 'data-chapter-num="', '"', pos)
volume , pos = extr(info, 'data-volume-num="', '"', pos)
title , pos = extr(info, 'data-chapter-name="', '"', pos)
language, pos = extr(info, " title='", "'", pos)
group , pos = extr(info, "<td>", "</td>", pos)
user , pos = extr(info, "<td>", "</td>", pos)
views , pos = extr(info, ">", "<", pos)
date , pos = extr(info, ' datetime="', '"', pos)
chapter, sep, minor = chapter.partition(".")
results.append((self.root + "/chapter/" + chid, {
"manga": manga,
"manga_id": util.safe_int(manga_id),
"title": text.unescape(title),
"volume": util.safe_int(volume),
"chapter": util.safe_int(chapter),
"chapter_minor": sep + minor,
"chapter_id": util.safe_int(chid),
"group": text.unescape(text.remove_html(group)),
"contributor": text.remove_html(user),
"views": util.safe_int(views),
"date": date,
"lang": util.language_to_code(language),
"language": language,
}))
return results
while True:
before = len(results)
for info in text.extract_iter(page, "<tr id=", "</tr>"):
chid , pos = extr(info, 'data-chapter-id="', '"')
chapter , pos = extr(info, 'data-chapter-num="', '"', pos)
volume , pos = extr(info, 'data-volume-num="', '"', pos)
title , pos = extr(info, 'data-chapter-name="', '"', pos)
language, pos = extr(info, " title='", "'", pos)
group , pos = extr(info, "<td>", "</td>", pos)
user , pos = extr(info, "<td>", "</td>", pos)
views , pos = extr(info, ">", "<", pos)
date , pos = extr(info, ' datetime="', '"', pos)
chapter, sep, minor = chapter.partition(".")
results.append((self.root + "/chapter/" + chid, {
"manga": manga,
"manga_id": util.safe_int(manga_id),
"title": text.unescape(title),
"volume": util.safe_int(volume),
"chapter": util.safe_int(chapter),
"chapter_minor": sep + minor,
"chapter_id": util.safe_int(chid),
"group": text.unescape(text.remove_html(group)),
"contributor": text.remove_html(user),
"views": util.safe_int(views),
"date": date,
"lang": util.language_to_code(language),
"language": language,
}))
if len(results) - before != self.per_page:
return results
num += 1
page = self.request("{}/_/{}/".format(self.url, num)).text

@ -37,6 +37,7 @@ CATEGORY_MAP = {
"kisscomic" : "KissComic",
"kissmanga" : "KissManga",
"loveisover" : "Love is Over Archive",
"mangadex" : "MangaDex",
"mangafox" : "Manga Fox",
"mangahere" : "Manga Here",
"mangapark" : "MangaPark",

Loading…
Cancel
Save