[mangasee] extract 'author' and 'genre' metadata (#3703)

Both are lists/arrays. Use {author!S} or {genre:J, } to format them.
pull/3713/head
Mike Fährmann 2 years ago
parent 6b03506655
commit a70a3e5da6
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -42,6 +42,7 @@ class MangaseeChapterExtractor(MangaseeBase, ChapterExtractor):
"pattern": r"https://[^/]+/manga/Tokyo-Innocent/0004\.5-00\d\.png",
"count": 8,
"keyword": {
"author": ["NARUMI Naru"],
"chapter": 4,
"chapter_minor": ".5",
"chapter_string": "100045",
@ -49,6 +50,8 @@ class MangaseeChapterExtractor(MangaseeBase, ChapterExtractor):
"date": "dt:2020-01-20 21:52:53",
"extension": "png",
"filename": r"re:0004\.5-00\d",
"genre": ["Comedy", "Fantasy", "Harem", "Romance", "Shounen",
"Supernatural"],
"index": "1",
"lang": "en",
"language": "English",
@ -62,6 +65,7 @@ class MangaseeChapterExtractor(MangaseeBase, ChapterExtractor):
"pattern": r"https://[^/]+/manga/One-Piece/1063-0\d\d\.png",
"count": 13,
"keyword": {
"author": ["ODA Eiichiro"],
"chapter": 1063,
"chapter_minor": "",
"chapter_string": "110630",
@ -69,6 +73,8 @@ class MangaseeChapterExtractor(MangaseeBase, ChapterExtractor):
"date": "dt:2022-10-16 17:32:54",
"extension": "png",
"filename": r"re:1063-0\d\d",
"genre": ["Action", "Adventure", "Comedy", "Drama", "Fantasy",
"Shounen"],
"index": "1",
"lang": "en",
"language": "English",
@ -93,12 +99,16 @@ class MangaseeChapterExtractor(MangaseeBase, ChapterExtractor):
def metadata(self, page):
extr = text.extract_from(page)
author = util.json_loads(extr('"author":', '],') + "]")
genre = util.json_loads(extr('"genre":', '],') + "]")
self.chapter = data = util.json_loads(extr("vm.CurChapter =", ";\r\n"))
self.domain = extr('vm.CurPathName = "', '"')
self.slug = extr('vm.IndexName = "', '"')
data = self._transform_chapter(data)
data["manga"] = text.unescape(extr('vm.SeriesName = "', '"'))
data["author"] = author
data["genre"] = genre
return data
def images(self, page):
@ -127,10 +137,38 @@ class MangaseeMangaExtractor(MangaseeBase, MangaExtractor):
"/Nakamura-Koedo-To-Daizu-Keisuke-Wa-Umaku-Ikanai"), {
"pattern": MangaseeChapterExtractor.pattern,
"count": ">= 17",
"keyword": {
"author": ["TAKASE Masaya"],
"chapter": int,
"chapter_minor": r"re:^|\.5$",
"chapter_string": r"re:100\d\d\d",
"date": "type:datetime",
"genre": ["Comedy", "Romance", "School Life", "Shounen",
"Slice of Life"],
"index": "1",
"lang": "en",
"language": "English",
"manga": "Nakamura-Koedo-To-Daizu-Keisuke-Wa-Umaku-Ikanai",
"title": "",
},
}),
("https://manga4life.com/manga/Ano-Musume-Ni-Kiss-To-Shirayuri-O", {
"pattern": MangaseeChapterExtractor.pattern,
"count": ">= 50",
"keyword": {
"author": ["Canno"],
"chapter": int,
"chapter_minor": r"re:^|\.5$",
"chapter_string": r"re:100\d\d\d",
"date": "type:datetime",
"genre": ["Comedy", "Romance", "School Life", "Seinen",
"Shoujo Ai"],
"index": "1",
"lang": "en",
"language": "English",
"manga": "Ano-Musume-Ni-Kiss-To-Shirayuri-O",
"title": ""
},
}),
)
@ -141,9 +179,11 @@ class MangaseeMangaExtractor(MangaseeBase, MangaExtractor):
MangaExtractor.__init__(self, match, self.root + match.group(2))
def chapters(self, page):
slug, pos = text.extract(page, 'vm.IndexName = "', '"')
chapters = util.json_loads(text.extract(
page, "vm.Chapters = ", ";\r\n", pos)[0])
extr = text.extract_from(page)
author = util.json_loads(extr('"author":', '],') + "]")
genre = util.json_loads(extr('"genre":', '],') + "]")
slug = extr('vm.IndexName = "', '"')
chapters = util.json_loads(extr("vm.Chapters = ", ";\r\n"))
result = []
for data in map(self._transform_chapter, chapters):
@ -154,5 +194,7 @@ class MangaseeMangaExtractor(MangaseeBase, MangaExtractor):
url += "-page-1.html"
data["manga"] = slug
data["author"] = author
data["genre"] = genre
result.append((url, data))
return result

Loading…
Cancel
Save