You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
gallery-dl/gallery_dl/extractor/mangapark.py

406 lines
8.9 KiB

# -*- coding: utf-8 -*-
# Copyright 2015-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Extractors for https://mangapark.net/"""
from .common import ChapterExtractor, Extractor, Message
from .. import text, util, exception
import re
BASE_PATTERN = r"(?:https?://)?(?:www\.)?mangapark\.(?:net|com|org|io|me)"
class MangaparkBase():
"""Base class for mangapark extractors"""
category = "mangapark"
_match_title = None
def _parse_chapter_title(self, title):
if not self._match_title:
MangaparkBase._match_title = re.compile(
r"(?i)"
r"(?:vol(?:\.|ume)?\s*(\d+)\s*)?"
r"ch(?:\.|apter)?\s*(\d+)([^\s:]*)"
r"(?:\s*:\s*(.*))?"
).match
match = self._match_title(title)
return match.groups() if match else (0, 0, "", "")
class MangaparkChapterExtractor(MangaparkBase, ChapterExtractor):
"""Extractor for manga-chapters from mangapark.net"""
pattern = BASE_PATTERN + r"/title/[^/?#]+/(\d+)"
example = "https://mangapark.net/title/MANGA/12345-en-ch.01"
def __init__(self, match):
self.root = text.root_from_url(match.group(0))
url = "{}/title/_/{}".format(self.root, match.group(1))
ChapterExtractor.__init__(self, match, url)
def metadata(self, page):
data = util.json_loads(text.extr(
page, 'id="__NEXT_DATA__" type="application/json">', '<'))
chapter = (data["props"]["pageProps"]["dehydratedState"]
["queries"][0]["state"]["data"]["data"])
manga = chapter["comicNode"]["data"]
source = chapter["sourceNode"]["data"]
self._urls = chapter["imageSet"]["httpLis"]
self._params = chapter["imageSet"]["wordLis"]
vol, ch, minor, title = self._parse_chapter_title(chapter["dname"])
return {
"manga" : manga["name"],
"manga_id" : manga["id"],
"artist" : source["artists"],
"author" : source["authors"],
"genre" : source["genres"],
"volume" : text.parse_int(vol),
"chapter" : text.parse_int(ch),
"chapter_minor": minor,
"chapter_id": chapter["id"],
"title" : chapter["title"] or title or "",
"lang" : chapter["lang"],
"language" : util.code_to_language(chapter["lang"]),
"source" : source["srcTitle"],
"source_id" : source["id"],
"date" : text.parse_timestamp(chapter["dateCreate"] // 1000),
}
def images(self, page):
return [
(url + "?" + params, None)
for url, params in zip(self._urls, self._params)
]
class MangaparkMangaExtractor(MangaparkBase, Extractor):
"""Extractor for manga from mangapark.net"""
subcategory = "manga"
pattern = BASE_PATTERN + r"/title/(\d+)(?:-[^/?#]*)?/?$"
example = "https://mangapark.net/title/12345-MANGA"
def __init__(self, match):
self.root = text.root_from_url(match.group(0))
self.manga_id = int(match.group(1))
Extractor.__init__(self, match)
def items(self):
for chapter in self.chapters():
chapter = chapter["data"]
url = self.root + chapter["urlPath"]
vol, ch, minor, title = self._parse_chapter_title(chapter["dname"])
data = {
"manga_id" : self.manga_id,
"volume" : text.parse_int(vol),
"chapter" : text.parse_int(ch),
"chapter_minor": minor,
"chapter_id": chapter["id"],
"title" : chapter["title"] or title or "",
"lang" : chapter["lang"],
"language" : util.code_to_language(chapter["lang"]),
"source" : chapter["srcTitle"],
"source_id" : chapter["sourceId"],
"date" : text.parse_timestamp(
chapter["dateCreate"] // 1000),
"_extractor": MangaparkChapterExtractor,
}
yield Message.Queue, url, data
def chapters(self):
source = self.config("source")
if not source:
return self.chapters_all()
source_id = self._select_source(source)
self.log.debug("Requesting chapters for source_id %s", source_id)
return self.chapters_source(source_id)
def chapters_all(self):
pnum = 0
variables = {
"select": {
"comicId": self.manga_id,
"range" : None,
"isAsc" : not self.config("chapter-reverse"),
}
}
while True:
data = self._request_graphql(
"get_content_comicChapterRangeList", variables)
for item in data["items"]:
yield from item["chapterNodes"]
if not pnum:
pager = data["pager"]
pnum += 1
try:
variables["select"]["range"] = pager[pnum]
except IndexError:
return
def chapters_source(self, source_id):
variables = {
"sourceId": source_id,
}
chapters = self._request_graphql(
"get_content_source_chapterList", variables)
if self.config("chapter-reverse"):
chapters.reverse()
return chapters
def _select_source(self, source):
if isinstance(source, int):
return source
group, _, lang = source.partition(":")
group = group.lower()
variables = {
"comicId" : self.manga_id,
"dbStatuss" : ["normal"],
"haveChapter": True,
}
for item in self._request_graphql(
"get_content_comic_sources", variables):
data = item["data"]
if (not group or data["srcTitle"].lower() == group) and (
not lang or data["lang"] == lang):
return data["id"]
raise exception.StopExtraction(
"'%s' does not match any available source", source)
def _request_graphql(self, opname, variables):
url = self.root + "/apo/"
data = {
"query" : QUERIES[opname],
"variables" : util.json_dumps(variables),
"operationName": opname,
}
return self.request(
url, method="POST", json=data).json()["data"][opname]
QUERIES = {
"get_content_comicChapterRangeList": """
query get_content_comicChapterRangeList($select: Content_ComicChapterRangeList_Select) {
get_content_comicChapterRangeList(
select: $select
) {
reqRange{x y}
missing
pager {x y}
items{
serial
chapterNodes {
id
data {
id
sourceId
dbStatus
isNormal
isHidden
isDeleted
isFinal
dateCreate
datePublic
dateModify
lang
volume
serial
dname
title
urlPath
srcTitle srcColor
count_images
stat_count_post_child
stat_count_post_reply
stat_count_views_login
stat_count_views_guest
userId
userNode {
id
data {
id
name
uniq
avatarUrl
urlPath
verified
deleted
banned
dateCreate
dateOnline
stat_count_chapters_normal
stat_count_chapters_others
is_adm is_mod is_vip is_upr
}
}
disqusId
}
sser_read
}
}
}
}
""",
"get_content_source_chapterList": """
query get_content_source_chapterList($sourceId: Int!) {
get_content_source_chapterList(
sourceId: $sourceId
) {
id
data {
id
sourceId
dbStatus
isNormal
isHidden
isDeleted
isFinal
dateCreate
datePublic
dateModify
lang
volume
serial
dname
title
urlPath
srcTitle srcColor
count_images
stat_count_post_child
stat_count_post_reply
stat_count_views_login
stat_count_views_guest
userId
userNode {
id
data {
id
name
uniq
avatarUrl
urlPath
verified
deleted
banned
dateCreate
dateOnline
stat_count_chapters_normal
stat_count_chapters_others
is_adm is_mod is_vip is_upr
}
}
disqusId
}
}
}
""",
"get_content_comic_sources": """
query get_content_comic_sources($comicId: Int!, $dbStatuss: [String] = [], $userId: Int, $haveChapter: Boolean, $sortFor: String) {
get_content_comic_sources(
comicId: $comicId
dbStatuss: $dbStatuss
userId: $userId
haveChapter: $haveChapter
sortFor: $sortFor
) {
id
data{
id
dbStatus
isNormal
isHidden
isDeleted
lang name altNames authors artists
release
genres summary{code} extraInfo{code}
urlCover600
urlCover300
urlCoverOri
srcTitle srcColor
chapterCount
chapterNode_last {
id
data {
dateCreate datePublic dateModify
volume serial
dname title
urlPath
userNode {
id data {uniq name}
}
}
}
}
}
}
""",
}