merge #5061: [webtoons] extract more metadata

- author_name
- comic_name
- episode_name
- username
pull/2340/head
Mike Fährmann 8 months ago
commit 799a8206ad
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -87,23 +87,31 @@ class WebtoonsEpisodeExtractor(WebtoonsBase, GalleryExtractor):
self.episode_no = params.get("episode_no")
def metadata(self, page):
keywords, pos = text.extract(
page, '<meta name="keywords" content="', '"')
title, pos = text.extract(
page, '<meta property="og:title" content="', '"', pos)
descr, pos = text.extract(
page, '<meta property="og:description" content="', '"', pos)
extr = text.extract_from(page)
keywords = extr('<meta name="keywords" content="', '"').split(", ")
title = extr('<meta property="og:title" content="', '"')
descr = extr('<meta property="og:description" content="', '"')
if extr('<div class="author_area"', '\n'):
username = extr('/creator/', '"')
author_name = extr('<span>', '</span>')
else:
username = author_name = ""
return {
"genre" : self.genre,
"comic" : self.comic,
"title_no" : self.title_no,
"episode_no" : self.episode_no,
"title" : text.unescape(title),
"episode" : keywords.split(", ")[1],
"description": text.unescape(descr),
"lang" : self.lang,
"language" : util.code_to_language(self.lang),
"genre" : self.genre,
"comic" : self.comic,
"title_no" : self.title_no,
"episode_no" : self.episode_no,
"title" : text.unescape(title),
"episode" : keywords[1],
"comic_name" : text.unescape(keywords[0]),
"episode_name": text.unescape(keywords[2]),
"username" : username,
"author_name" : text.unescape(author_name),
"description" : text.unescape(descr),
"lang" : self.lang,
"language" : util.code_to_language(self.lang),
}
@staticmethod

@ -20,6 +20,22 @@ __tests__ = (
"42055e44659f6ffc410b3fb6557346dfbb993df3",
"49e1f2def04c6f7a6a3dacf245a1cd9abe77a6a9",
],
"author_name" : "Chris McCoy",
"comic" : "safely-endangered",
"comic_name" : "Safely Endangered",
"count" : 5,
"description" : "Silly comics for silly people.",
"episode" : "572",
"episode_name": "Ep. 572 - Earth",
"episode_no" : "572",
"genre" : "comedy",
"lang" : "en",
"language" : "English",
"num" : range(1, 5),
"title" : "Safely Endangered - Ep. 572 - Earth",
"title_no" : "352",
"username" : "safelyendangered",
},
{
@ -37,6 +53,18 @@ __tests__ = (
"title_no" : "312584",
},
{
"#url" : "https://www.webtoons.com/en/canvas/i-want-to-be-a-cute-anime-girl/209-the-storys-story/viewer?title_no=349416&episode_no=214",
"#category": ("", "webtoons", "episode"),
"#class" : webtoons.WebtoonsEpisodeExtractor,
"#count" : 4,
"comic_name" : "I want to be a cute anime girl",
"episode_name": "209 - The story's story",
"username" : "m9huj",
"author_name" : "Azul Crescent",
},
{
"#url" : "https://www.webtoons.com/en/comedy/live-with-yourself/list?title_no=919",
"#comment" : "english",

Loading…
Cancel
Save