[mangadex] add title info for chapter extractors

7 years ago · e54b43be08
parent a2020c736e
commit e54b43be08
3 changed files with 14 additions and 9 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -1,5 +1,7 @@
 # Changelog

+## Unreleased
+
 ## 1.3.4 - 2018-04-20
 - Added support for custom OAuth2 credentials for `pinterest`
 - Improved rate limit handling for `tumblr` extractors
--- a/gallery_dl/extractor/mangadex.py
+++ b/gallery_dl/extractor/mangadex.py
@ -27,13 +27,13 @@ class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor):
    pattern = [r"(?:https?://)?(?:www\.)?mangadex\.(?:org|com)/chapter/(\d+)"]
    test = [
        ("https://mangadex.org/chapter/122094", {
-            "keyword": "b4c83fe41f125eae745c2e00d29e087cc4eb78df",
+            "keyword": "fe9f66f61ef3a31d9e5a0bd47c672f1b2433a682",
            "content": "7ab3bef5caccb62b881f8e6e70359d3c7be8137f",
        }),
        # oneshot
        ("https://mangadex.org/chapter/138086", {
            "count": 64,
-            "keyword": "9b1b7292f7dbcf10983fbdc34b8cdceeb47328ee",
+            "keyword": "0e27e78e498debf905199ff9540cffe5c352ae21",
        }),
        # NotFoundError
        ("https://mangadex.org/chapter/1", {
@ -42,8 +42,7 @@ class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor):
    ]

    def __init__(self, match):
-        self.chapter_id = match.group(1)
-        url = self.root + "/chapter/" + self.chapter_id
+        url = self.root + "/chapter/" + match.group(1)
        ChapterExtractor.__init__(self, url)

    def get_metadata(self, page):
@ -51,12 +50,15 @@ class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor):
            raise exception.NotFoundError("chapter")

        info    , pos = text.extract(page, '="og:title" content="', '"')
-        manga_id, pos = text.extract(page, '/images/manga/', '.', pos)
        _       , pos = text.extract(page, ' id="jump_group"', '', pos)
        _       , pos = text.extract(page, ' selected ', '', pos)
        language, ___ = text.extract(page, " title='", "'", pos-100)
        group   , pos = text.extract(page, '>', '<', pos)

+        data = json.loads(
+            text.extract(page, 'data-type="chapter">', '<', pos)[0]
+        )
+
        info = text.unescape(info)
        match = re.match(
            r"(?:(?:Vol\. (\d+) )?Ch\. (\d+)([^ ]*)|(.*)) "
@ -64,12 +66,13 @@ class MangadexChapterExtractor(MangadexExtractor, ChapterExtractor):
            info)

        return {
-            "manga": match.group(5),
-            "manga_id": util.safe_int(manga_id),
+            "manga": data["manga_title"],
+            "manga_id": data["manga_id"],
+            "title": data["chapter_title"],
            "volume": util.safe_int(match.group(1)),
            "chapter": util.safe_int(match.group(2)),
            "chapter_minor": match.group(3) or "",
-            "chapter_id": util.safe_int(self.chapter_id),
+            "chapter_id": data["chapter_id"],
            "chapter_string": info.replace(" - MangaDex", ""),
            "group": text.unescape(group),
            "lang": util.language_to_code(language),
--- a/gallery_dl/version.py
+++ b/gallery_dl/version.py
@ -6,4 +6,4 @@
 # it under the terms of the GNU General Public License version 2 as
 # published by the Free Software Foundation.

-__version__ = "1.3.4"
+__version__ = "1.3.5-dev"