gallery-dl/gallery_dl/extractor/mangapark.py

# -*- coding: utf-8 -*-

# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.

"""Extractors for https://mangapark.net/"""

from .common import ChapterExtractor, Extractor, Message
from .. import text, util, exception
import re

BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangapark\.(?:net|com|org|io|me)"


class MangaparkBase():
    """Base class for mangapark extractors"""
    category = "mangapark"
    _match_title = None

    def _parse_chapter_title(self, title):
        if not self._match_title:
            MangaparkBase._match_title = re.compile(
                r"(?i)"
                r"(?:vol(?:\.|ume)?\s*(\d+)\s*)?"
                r"ch(?:\.|apter)?\s*(\d+)([^\s:]*)"
                r"(?:\s*:\s*(.*))?"
            ).match
        match = self._match_title(title)
        return match.groups() if match else (0, 0, "", "")


class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
    """Extractor for manga-chapters from mangapark.net"""
    pattern = BASE_PATTERN + r"/title/[^/?#]+/(\d+)"
    example = "https://mangapark.net/title/MANGA/12345-en-ch.01"

    def __init__(self, match):
        self.root = text.root_from_url(match.group(0))
        url = "{}/title/_/{}".format(self.root, match.group(1))
        ChapterExtractor.__init__(self, match, url)

    def metadata(self, page):
        data = util.json_loads(text.extr(
            page, 'id="__NEXT_DATA__" type="application/json">', '<'))
        chapter = (data["props"]["pageProps"]["dehydratedState"]
                   ["queries"][0]["state"]["data"]["data"])
        manga = chapter["comicNode"]["data"]
        source = chapter["sourceNode"]["data"]

        self._urls = chapter["imageSet"]["httpLis"]
        self._params = chapter["imageSet"]["wordLis"]
        vol, ch, minor, title = self._parse_chapter_title(chapter["dname"])

        return {
            "manga"     : manga["name"],
            "manga_id"  : manga["id"],
            "artist"    : source["artists"],
            "author"    : source["authors"],
            "genre"     : source["genres"],
            "volume"    : text.parse_int(vol),
            "chapter"   : text.parse_int(ch),
            "chapter_minor": minor,
            "chapter_id": chapter["id"],
            "title"     : chapter["title"] or title or "",
            "lang"      : chapter["lang"],
            "language"  : util.code_to_language(chapter["lang"]),
            "source"    : source["srcTitle"],
            "source_id" : source["id"],
            "date"      : text.parse_timestamp(chapter["dateCreate"] // 1000),
        }

    def images(self, page):
        return [
            (url + "?" + params, None)
            for url, params in zip(self._urls, self._params)
        ]


class MangaparkMangaExtractor(MangaparkBase, Extractor):
    """Extractor for manga from mangapark.net"""
    subcategory = "manga"
    pattern = BASE_PATTERN + r"/title/(\d+)(?:-[^/?#]*)?/?$"
    example = "https://mangapark.net/title/12345-MANGA"

    def __init__(self, match):
        self.root = text.root_from_url(match.group(0))
        self.manga_id = int(match.group(1))
        Extractor.__init__(self, match)

    def items(self):
        for chapter in self.chapters():
            chapter = chapter["data"]
            url = self.root + chapter["urlPath"]

            vol, ch, minor, title = self._parse_chapter_title(chapter["dname"])
            data = {
                "manga_id"  : self.manga_id,
                "volume"    : text.parse_int(vol),
                "chapter"   : text.parse_int(ch),
                "chapter_minor": minor,
                "chapter_id": chapter["id"],
                "title"     : chapter["title"] or title or "",
                "lang"      : chapter["lang"],
                "language"  : util.code_to_language(chapter["lang"]),
                "source"    : chapter["srcTitle"],
                "source_id" : chapter["sourceId"],
                "date"      : text.parse_timestamp(
                    chapter["dateCreate"] // 1000),
                "_extractor": MangaparkChapterExtractor,
            }
            yield Message.Queue, url, data

    def chapters(self):
        source = self.config("source")
        if not source:
            return self.chapters_all()

        source_id = self._select_source(source)
        self.log.debug("Requesting chapters for source_id %s", source_id)
        return self.chapters_source(source_id)

    def chapters_all(self):
        pnum = 0
        variables = {
            "select": {
                "comicId": self.manga_id,
                "range"  : None,
                "isAsc"  : not self.config("chapter-reverse"),
            }
        }

        while True:
            data = self._request_graphql(
                "get_content_comicChapterRangeList", variables)

            for item in data["items"]:
                yield from item["chapterNodes"]

            if not pnum:
                pager = data["pager"]
            pnum += 1

            try:
                variables["select"]["range"] = pager[pnum]
            except IndexError:
                return

    def chapters_source(self, source_id):
        variables = {
            "sourceId": source_id,
        }
        chapters = self._request_graphql(
            "get_content_source_chapterList", variables)

        if self.config("chapter-reverse"):
            chapters.reverse()
        return chapters

    def _select_source(self, source):
        if isinstance(source, int):
            return source

        group, _, lang = source.partition(":")
        group = group.lower()

        variables = {
            "comicId"    : self.manga_id,
            "dbStatuss"  : ["normal"],
            "haveChapter": True,
        }
        for item in self._request_graphql(
                "get_content_comic_sources", variables):
            data = item["data"]
            if (not group or data["srcTitle"].lower() == group) and (
                    not lang or data["lang"] == lang):
                return data["id"]

        raise exception.StopExtraction(
            "'%s' does not match any available source", source)

    def _request_graphql(self, opname, variables):
        url = self.root + "/apo/"
        data = {
            "query"        : QUERIES[opname],
            "variables"    : util.json_dumps(variables),
            "operationName": opname,
        }
        return self.request(
            url, method="POST", json=data).json()["data"][opname]


QUERIES = {
    "get_content_comicChapterRangeList": """
  query get_content_comicChapterRangeList($select: Content_ComicChapterRangeList_Select) {
    get_content_comicChapterRangeList(
      select: $select
    ) {
      reqRange{x y}
      missing
      pager {x y}
      items{
        serial
        chapterNodes {

  id
  data {


  id
  sourceId

  dbStatus
  isNormal
  isHidden
  isDeleted
  isFinal

  dateCreate
  datePublic
  dateModify
  lang
  volume
  serial
  dname
  title
  urlPath

  srcTitle srcColor

  count_images

  stat_count_post_child
  stat_count_post_reply
  stat_count_views_login
  stat_count_views_guest

  userId
  userNode {

  id
  data {

id
name
uniq
avatarUrl
urlPath

verified
deleted
banned

dateCreate
dateOnline

stat_count_chapters_normal
stat_count_chapters_others

is_adm is_mod is_vip is_upr

  }

  }

  disqusId


  }

          sser_read
        }
      }

    }
  }
""",

    "get_content_source_chapterList": """
  query get_content_source_chapterList($sourceId: Int!) {
    get_content_source_chapterList(
      sourceId: $sourceId
    ) {

  id
  data {


  id
  sourceId

  dbStatus
  isNormal
  isHidden
  isDeleted
  isFinal

  dateCreate
  datePublic
  dateModify
  lang
  volume
  serial
  dname
  title
  urlPath

  srcTitle srcColor

  count_images

  stat_count_post_child
  stat_count_post_reply
  stat_count_views_login
  stat_count_views_guest

  userId
  userNode {

  id
  data {

id
name
uniq
avatarUrl
urlPath

verified
deleted
banned

dateCreate
dateOnline

stat_count_chapters_normal
stat_count_chapters_others

is_adm is_mod is_vip is_upr

  }

  }

  disqusId


  }

    }
  }
""",

    "get_content_comic_sources": """
  query get_content_comic_sources($comicId: Int!, $dbStatuss: [String] = [], $userId: Int, $haveChapter: Boolean, $sortFor: String) {
    get_content_comic_sources(
      comicId: $comicId
      dbStatuss: $dbStatuss
      userId: $userId
      haveChapter: $haveChapter
      sortFor: $sortFor
    ) {

id
data{

  id

  dbStatus
  isNormal
  isHidden
  isDeleted

  lang name altNames authors artists

  release
  genres summary{code} extraInfo{code}

  urlCover600
  urlCover300
  urlCoverOri

  srcTitle srcColor

  chapterCount
  chapterNode_last {
    id
    data {
      dateCreate datePublic dateModify
      volume serial
      dname title
      urlPath
      userNode {
        id data {uniq name}
      }
    }
  }
}

    }
  }
""",
}