implement and use 'util.safe_int()'

same as Python's 'int()', except it doesn't raise any exceptions and
accepts a default value
pull/54/head
Mike Fährmann 7 years ago
parent 8a97bd0433
commit 9fc1d0c901
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -60,15 +60,17 @@ class BatotoExtractor():
return {c: response.cookies[c] for c in self.cookienames}
@staticmethod
def _parse_chapter_string(data):
def parse_chapter_string(data):
"""Parse 'chapter_string' value contained in 'data'"""
data["chapter_string"] = text.unescape(data["chapter_string"])
pattern = r"(?:Vol.(\d+) )?Ch\.(\d+)([^ :]*)(?::? (.+))"
match = re.match(pattern, data["chapter_string"])
volume, chapter, data["chapter_minor"], title = match.groups()
data["volume"] = int(volume) if volume else 0
data["chapter"] = int(chapter)
data["volume"] = util.safe_int(volume)
data["chapter"] = util.safe_int(chapter)
data["title"] = title if title != "Read Online" else ""
return data
class BatotoMangaExtractor(BatotoExtractor, MangaExtractor):
@ -99,7 +101,7 @@ class BatotoMangaExtractor(BatotoExtractor, MangaExtractor):
if not data["token"]:
return results
self._parse_chapter_string(data)
self.parse_chapter_string(data)
data["lang"] = util.language_to_code(data["language"])
data["group"] = text.unescape(data["group"])
data["contributor"] = text.unescape(data["contributor"])
@ -117,7 +119,7 @@ class BatotoChapterExtractor(BatotoExtractor, AsynchronousExtractor):
test = [
("http://bato.to/reader#459878c8fda07502", {
"url": "432d7958506ad913b0a9e42664a89e46a63e9296",
"keyword": "a6ca65532ad5653d0690b0ccc83f53b6e952f1bf",
"keyword": "96598b6f94d2b26d11c2780f8173cd6ab5fe9906",
}),
("http://bato.to/reader#459878c8fda07503", {
"exception": exception.NotFoundError,
@ -148,15 +150,14 @@ class BatotoChapterExtractor(BatotoExtractor, AsynchronousExtractor):
elif error == "10020":
raise exception.NotFoundError("chapter")
else:
raise Exception("[batoto] unexpected error code: " + error)
raise Exception("error code: " + error)
page = response.text
data = self.get_job_metadata(page)
yield Message.Version, 1
yield Message.Directory, data.copy()
for i in range(int(data["count"])):
for data["page"] in range(1, data["count"]+1):
next_url, image_url = self.get_page_urls(page)
text.nameext_from_url(image_url, data)
data["page"] = i+1
yield Message.Url, image_url, data.copy()
if next_url:
params["p"] += 1
@ -181,10 +182,9 @@ class BatotoChapterExtractor(BatotoExtractor, AsynchronousExtractor):
"group": text.unescape(group),
"lang": util.language_to_code(lang),
"language": lang,
"count": count,
"count": util.safe_int(count),
}
self._parse_chapter_string(data)
return data
return self.parse_chapter_string(data)
@staticmethod
def get_page_urls(page):

@ -9,7 +9,7 @@
"""Extract images from https://www.deviantart.com/"""
from .common import Extractor, Message
from .. import text, exception
from .. import text, util, exception
from ..cache import cache, memcache
import itertools
import datetime
@ -57,7 +57,7 @@ class DeviantartExtractor(Extractor):
if "videos" in deviation:
video = max(deviation["videos"],
key=lambda x: int(x["quality"][:-1]))
key=lambda x: util.safe_int(x["quality"][:-1]))
yield self.commit(deviation, video)
if "flash" in deviation:

@ -25,7 +25,7 @@ class ExhentaiGalleryExtractor(Extractor):
pattern = [r"(?:https?://)?(g\.e-|e-|ex)hentai\.org/g/(\d+)/([\da-f]{10})"]
test = [
("https://exhentai.org/g/960460/4f0e369d82/", {
"keyword": "d837276b02c4e91e96c1b40fe4415cbb73b56577",
"keyword": "173277161e28162dcc755d2e7a88e6cd750f2477",
"content": "493d759de534355c9f55f8e365565b62411de146",
}),
("https://exhentai.org/g/960461/4f0e369d82/", {
@ -44,6 +44,7 @@ class ExhentaiGalleryExtractor(Extractor):
self.key = {}
self.count = 0
self.version, self.gid, self.token = match.groups()
self.gid = util.safe_int(self.gid)
self.original = self.config("original", True)
self.wait_min = self.config("wait-min", 3)
self.wait_max = self.config("wait-max", 6)
@ -72,7 +73,7 @@ class ExhentaiGalleryExtractor(Extractor):
raise exception.NotFoundError("gallery")
data = self.get_job_metadata(page)
self.count = int(data["count"])
self.count = data["count"]
yield Message.Directory, data
for url, image in self.get_images(page):
@ -100,6 +101,7 @@ class ExhentaiGalleryExtractor(Extractor):
data["lang"] = util.language_to_code(data["language"])
data["title"] = text.unescape(data["title"])
data["title_jp"] = text.unescape(data["title_jp"])
data["count"] = util.safe_int(data["count"])
return data
def get_images(self, page):
@ -141,7 +143,7 @@ class ExhentaiGalleryExtractor(Extractor):
nextkey = self.key["next"]
request = {
"method" : "showpage",
"gid" : int(self.gid),
"gid" : self.gid,
"imgkey" : nextkey,
"showkey": self.key["show"],
}

@ -108,9 +108,11 @@ class FallenangelsMangaExtractor(MangaExtractor):
title , pos = text.extract(page, '<em>', '</em>', pos)
manga, _, chapter = chapter.rpartition(" ")
chapter, _, minor = chapter.partition(".")
chapter, dot, minor = chapter.partition(".")
results.append((url, {
"manga": manga, "title": title, "volume": int(volume),
"chapter": int(chapter), "chapter_minor": minor,
"manga": manga, "title": title,
"volume": util.safe_int(volume),
"chapter": util.safe_int(chapter),
"chapter_minor": dot + minor,
"lang": self.lang, "language": language,
}))

@ -9,7 +9,7 @@
"""Extract images from http://www.hbrowse.com/"""
from .common import Extractor, MangaExtractor, Message
from .. import text
from .. import text, util
import json
@ -19,7 +19,8 @@ class HbrowseExtractor(Extractor):
root = "http://www.hbrowse.com"
@staticmethod
def _parse_page(page, data):
def parse_page(page, data):
"""Parse metadata on 'page' and add it to 'data'"""
text.extract_all(page, (
('manga' , '<td class="listLong">', '</td>'),
('artist', '<td class="listLong">', '</td>'),
@ -28,9 +29,10 @@ class HbrowseExtractor(Extractor):
), values=data)
data["manga"] = text.unescape(data["manga"])
data["total"] = int(data["total"])
data["total"] = util.safe_int(data["total"])
data["artist"] = text.remove_html(data["artist"])
data["origin"] = text.remove_html(data["origin"])
return data
class HbrowseMangaExtractor(MangaExtractor, HbrowseExtractor):
@ -44,8 +46,10 @@ class HbrowseMangaExtractor(MangaExtractor, HbrowseExtractor):
def chapters(self, page):
results = []
data = {"manga_id": int(self.url.rstrip("/").rpartition("/")[2])}
self._parse_page(page, data)
data = self.parse_page(page, {
"manga_id": util.safe_int(
self.url.rstrip("/").rpartition("/")[2])
})
pos = 0
needle = '<td class="listMiddle">\n<a class="listLink" href="'
@ -54,7 +58,7 @@ class HbrowseMangaExtractor(MangaExtractor, HbrowseExtractor):
if not url:
return results
title, pos = text.extract(page, '>View ', '<', pos)
data["chapter"] = int(url.rpartition("/")[2][1:])
data["chapter"] = util.safe_int(url.rpartition("/")[2][1:])
data["title"] = title
results.append((url, data.copy()))
@ -87,9 +91,10 @@ class HbrowseChapterExtractor(HbrowseExtractor):
def get_job_metadata(self, page):
"""Collect metadata for extractor-job"""
data = {"manga_id": int(self.gid), "chapter": int(self.chapter)}
self._parse_page(page, data)
return data
return self.parse_page(page, {
"manga_id": util.safe_int(self.gid),
"chapter": util.safe_int(self.chapter)
})
def get_image_urls(self, page):
"""Yield all image-urls for a 'chapter'"""

@ -9,7 +9,7 @@
"""Extract hentai-manga from https://hentai2read.com/"""
from .common import MangaExtractor
from .. import text
from .. import text, util
from . import hentaicdn
import re
import json
@ -37,7 +37,7 @@ class Hentai2readMangaExtractor(MangaExtractor):
page, '<span itemprop="itemreviewed">', '</span>')
mtype, pos = text.extract(
page, '<small class="text-danger">[', ']</small>', pos)
manga_id = int(text.extract(page, 'data-mid="', '"', pos)[0])
manga_id = util.safe_int(text.extract(page, 'data-mid="', '"', pos)[0])
page, pos = text.extract(
page, '<ul class="nav-chapters remove-margin-b">', '</ul>\n</div>')
@ -51,7 +51,8 @@ class Hentai2readMangaExtractor(MangaExtractor):
chapter, _, title = text.unescape(chapter).strip().partition(" - ")
results.append((url, {
"manga_id": manga_id, "manga": manga, "type": mtype,
"chapter_id": int(chapter_id), "chapter": int(chapter),
"chapter_id": util.safe_int(chapter_id),
"chapter": util.safe_int(chapter),
"title": title, "lang": "en", "language": "English",
}))

@ -9,7 +9,7 @@
"""Extract images from https://www.hentai-foundry.com/"""
from .common import Extractor, Message
from .. import text, exception
from .. import text, util, exception
class HentaifoundryUserExtractor(Extractor):
@ -23,7 +23,7 @@ class HentaifoundryUserExtractor(Extractor):
test = [
("https://www.hentai-foundry.com/pictures/user/Tenpura", {
"url": "ebbc981a85073745e3ca64a0f2ab31fab967fc28",
"keyword": "6e9a549feb9bafebd9d9342ef3c8ccad33a7031c",
"keyword": "f8fecc8aa89978ecf402ec221243978fe791bd54",
}),
("http://www.hentai-foundry.com/user/asdq/profile", {
"exception": exception.NotFoundError,
@ -40,7 +40,7 @@ class HentaifoundryUserExtractor(Extractor):
self.set_filters(token)
yield Message.Version, 1
yield Message.Directory, data
for url, image in self.get_images(int(data["count"])):
for url, image in self.get_images(data["count"]):
image.update(data)
yield Message.Url, url, image
@ -68,7 +68,7 @@ class HentaifoundryUserExtractor(Extractor):
page = response.text
token, pos = text.extract(page, 'hidden" value="', '"')
count, pos = text.extract(page, 'class="active" >Pictures (', ')', pos)
return {"artist": self.artist, "count": count}, token
return {"artist": self.artist, "count": util.safe_int(count)}, token
def get_image_metadata(self, url):
"""Collect metadata for an image"""
@ -79,7 +79,7 @@ class HentaifoundryUserExtractor(Extractor):
page, 'Pictures</a> &raquo; <span>', '<')
url, pos = text.extract(
page, '//pictures.hentai-foundry.com', '"', pos)
data = {"index": index, "title": text.unescape(title)}
data = {"index": util.safe_int(index), "title": text.unescape(title)}
text.nameext_from_url(url, data)
return "https://pictures.hentai-foundry.com" + url, data
@ -127,7 +127,7 @@ class HentaifoundryImageExtractor(Extractor):
(("http://www.hentai-foundry.com/"
"pictures/user/Tenpura/407501/shimakaze"), {
"url": "fbf2fd74906738094e2575d2728e8dc3de18a8a3",
"keyword": "304479cfe00fbb723886be78b2bd6b9306a31d8a",
"keyword": "85b8e26fa93d00ae1333cb7b418078f1792dc4a8",
"content": "91bf01497c39254b6dfb234a18e8f01629c77fd1",
}),
("http://www.hentai-foundry.com/pictures/user/Tenpura/340853/", {
@ -160,7 +160,7 @@ class HentaifoundryImageExtractor(Extractor):
url , pos = extr(page, '//pictures.hentai-foundry.com', '"', pos)
data = {
"artist": artist,
"index": self.index,
"index": util.safe_int(self.index),
"title": text.unescape(title),
}
text.nameext_from_url(url, data)

@ -9,7 +9,7 @@
"""Extract hentai-manga from https://hentaihere.com/"""
from .common import MangaExtractor
from .. import text
from .. import text, util
from . import hentaicdn
import re
@ -32,7 +32,8 @@ class HentaihereMangaExtractor(MangaExtractor):
def chapters(self, page):
results = []
manga_id = int(self.url.rstrip("/").rpartition("/")[2][1:])
manga_id = util.safe_int(
self.url.rstrip("/").rpartition("/")[2][1:])
manga, pos = text.extract(
page, '<span itemprop="name">', '</span>')
mtype, pos = text.extract(
@ -48,7 +49,8 @@ class HentaihereMangaExtractor(MangaExtractor):
chapter, _, title = text.unescape(chapter).strip().partition(" - ")
results.append((url, {
"manga_id": manga_id, "manga": manga, "type": mtype,
"chapter_id": int(chapter_id), "chapter": int(chapter),
"chapter_id": util.safe_int(chapter_id),
"chapter": util.safe_int(chapter),
"title": title, "lang": "en", "language": "English",
}))

@ -9,7 +9,7 @@
"""Extract images from http://imagefap.com/"""
from .common import Extractor, Message
from .. import text
from .. import text, util
import json
@ -159,7 +159,7 @@ class ImagefapUserExtractor(Extractor):
yield Message.Version, 1
for gid, name in self.get_gallery_data():
url = "http://www.imagefap.com/gallery/" + gid
data = {"gallery_id": int(gid), "name": name}
data = {"gallery_id": util.safe_int(gid), "name": name}
yield Message.Queue, url, data
def get_gallery_data(self):

@ -9,7 +9,7 @@
"""Extract manga-chapters and entire manga from http://kissmanga.com/"""
from .common import Extractor, MangaExtractor, Message
from .. import text, cloudflare, aes
from .. import text, util, cloudflare, aes
from ..cache import cache
import re
import hashlib
@ -38,7 +38,8 @@ class KissmangaExtractor(Extractor):
request = cloudflare.request_func
@staticmethod
def _parse_chapter_string(data):
def parse_chapter_string(data):
"""Parse 'chapter_string' value contained in 'data'"""
data["chapter_string"] = text.unescape(data["chapter_string"])
match = re.match((
@ -49,16 +50,16 @@ class KissmangaExtractor(Extractor):
), data["chapter_string"])
if not match:
match = re.match((
r"[\w ]+?(?: -)? 0*()(\d+)()(?: *[:-]? *(.+))?"
# r"[\w ]+?(?: -)? 0*()(\d+)(?: (.+))?(?: - (.+))?"
), data["chapter_string"])
match = re.match(
r"[\w ]+?(?: -)? 0*()(\d+)()(?: *[:-]? *(.+))?",
data["chapter_string"])
volume, chapter, minor, title = match.groups()
data["volume"] = int(volume) if volume else 0
data["chapter"] = int(chapter) if chapter else 0
data["volume"] = util.safe_int(volume)
data["chapter"] = util.safe_int(chapter)
data["chapter_minor"] = "." + minor if minor else ""
data["title"] = title if title and title != "Read Online" else ""
return data
class KissmangaMangaExtractor(KissmangaExtractor, MangaExtractor):
@ -87,7 +88,7 @@ class KissmangaMangaExtractor(KissmangaExtractor, MangaExtractor):
"manga": manga, "id": url.rpartition("=")[2],
"chapter_string": chapter, "lang": "en", "language": "English",
}
self._parse_chapter_string(data)
self.parse_chapter_string(data)
results.append((self.root + url, data))
return results
@ -133,8 +134,7 @@ class KissmangaChapterExtractor(KissmangaExtractor):
"lang": "en",
"language": "English",
}
self._parse_chapter_string(data)
return data
return self.parse_chapter_string(data)
def get_image_urls(self, page):
"""Extract list of all image-urls for a manga chapter"""
@ -148,7 +148,7 @@ class KissmangaChapterExtractor(KissmangaExtractor):
]
except UnicodeDecodeError:
self.log.error("Failed to decrypt image URls")
except (ValueError, IndexError) as e:
except (ValueError, IndexError):
self.log.error("Failed to get AES key")
return []

@ -9,7 +9,7 @@
"""Extract manga-chapters and entire manga from http://www.mangafox.me/"""
from .common import AsynchronousExtractor, Message
from .. import text, exception
from .. import text, util, exception
import re
@ -24,7 +24,7 @@ class MangafoxChapterExtractor(AsynchronousExtractor):
r"[^/]+/(v\d+/)?c\d+[^/]*)")]
test = [(("http://mangafox.me/manga/kidou_keisatsu_patlabor/"
"v05/c006.2/1.html"), {
"keyword": "ef2757d6136ef6b02eafe12d98a05f189fe8b2ba",
"keyword": "36b570e9ef11b4748407324fe08bebbe4856e6fd",
"content": "5c50c252dcf12ffecf68801f4db8a2167265f66c",
})]
@ -38,7 +38,7 @@ class MangafoxChapterExtractor(AsynchronousExtractor):
raise exception.AuthorizationError()
data = self.get_metadata(page)
urls = zip(
range(1, int(data["count"])+1),
range(1, data["count"]+1),
self.get_image_urls(page),
)
yield Message.Version, 1
@ -50,17 +50,19 @@ class MangafoxChapterExtractor(AsynchronousExtractor):
def get_metadata(self, page):
"""Collect metadata for extractor-job"""
data = text.extract_all(page, (
("manga" , " - Read ", " Manga Scans "),
("sid" , "var sid=", ";"),
("cid" , "var cid=", ";"),
("count" , "var total_pages=", ";"),
("chapter", 'var current_chapter="', '";'),
("manga" , " - Read ", " Manga Scans "),
("sid" , "var sid=", ";"),
("cid" , "var cid=", ";"),
("count" , "var total_pages=", ";"),
("chapter_string", 'var current_chapter="', '"'),
))[0]
match = re.match(r"(v0*(\d+)/)?c0*(\d+)(.*)", data["chapter"])
data["volume"] = match.group(2) or ""
match = re.match(r"(v0*(\d+)/)?c0*(\d+)(.*)", data["chapter_string"])
data["volume"] = match.group(2)
data["chapter"] = match.group(3)
data["chapter_minor"] = match.group(4) or ""
data["manga"] = data["manga"].rpartition(" ")[0]
for key in ("sid", "cid", "count", "volume", "chapter"):
data[key] = util.safe_int(data[key])
return data
def get_image_urls(self, page):

@ -9,7 +9,7 @@
"""Extract manga-chapters and entire manga from http://www.mangahere.co/"""
from .common import MangaExtractor, AsynchronousExtractor, Message
from .. import text
from .. import text, util
import re
@ -46,8 +46,9 @@ class MangahereMangaExtractor(MangaExtractor):
date, pos = text.extract(page, 'class="right">', '</span>', pos)
results.append((url, {
"manga": manga, "title": title, "date": date,
"chapter": int(chapter), "chapter_minor": dot + minor,
"volume": int(volume.rpartition(" ")[2]) if volume else 0,
"volume": util.safe_int(volume.rpartition(" ")[2]),
"chapter": util.safe_int(chapter),
"chapter_minor": dot + minor,
"lang": "en", "language": "English",
}))
@ -62,7 +63,7 @@ class MangahereChapterExtractor(AsynchronousExtractor):
pattern = [(r"(?:https?://)?(?:www\.)?mangahere\.co/manga/"
r"([^/]+(?:/v0*(\d+))?/c0*(\d+)(\.\d+)?)")]
test = [("http://www.mangahere.co/manga/dongguo_xiaojie/c003.2/", {
"keyword": "8cb9f9512b68d2cdcbea2419592b9247304c149b",
"keyword": "0c263b83f803524baa8717d2b4d841617aa8d775",
"content": "dd8454469429c6c717cbc3cad228e76ef8c6e420",
})]
url_fmt = "http://www.mangahere.co/manga/{}/{}.html"
@ -75,7 +76,7 @@ class MangahereChapterExtractor(AsynchronousExtractor):
page = self.request(self.url_fmt.format(self.part, 1)).text
data = self.get_job_metadata(page)
urls = zip(
range(1, int(data["count"])+1),
range(1, data["count"]+1),
self.get_image_urls(page),
)
yield Message.Version, 1
@ -96,11 +97,11 @@ class MangahereChapterExtractor(AsynchronousExtractor):
return {
"manga": text.unescape(manga),
# "title": TODO,
"volume": self.volume or "",
"chapter": self.chapter,
"volume": util.safe_int(self.volume),
"chapter": util.safe_int(self.chapter),
"chapter_minor": self.chminor or "",
"chapter_id": chid,
"count": count,
"chapter_id": util.safe_int(chid),
"count": util.safe_int(count),
"lang": "en",
"language": "English",
}

@ -9,7 +9,7 @@
"""Extract manga-chapters and entire manga from http://mangapark.me/"""
from .common import Extractor, MangaExtractor, Message
from .. import text
from .. import text, util
class MangaparkExtractor(Extractor):
@ -18,17 +18,18 @@ class MangaparkExtractor(Extractor):
root = "http://mangapark.me"
@staticmethod
def _parse_chapter_path(path, data):
def parse_chapter_path(path, data):
"""Get volume/chapter information from url-path of a chapter"""
data["volume"], data["chapter_minor"] = 0, ""
for part in path.split("/")[3:]:
key, value = part[0], part[1:]
if key == "s":
data["version"] = int(value)
data["version"] = util.safe_int(value)
elif key == "v":
data["volume"] = int(value)
data["volume"] = util.safe_int(value)
elif key == "c":
chapter, dot, minor = value.partition(".")
data["chapter"] = int(chapter)
data["chapter"] = util.safe_int(chapter)
data["chapter_minor"] = dot + minor
elif key == "e":
data["chapter_minor"] = "v" + value
@ -59,10 +60,10 @@ class MangaparkMangaExtractor(MangaparkExtractor, MangaExtractor):
date , pos = text.extract(page, '<i>', '</i>', pos)
count, pos = text.extract(page, '\tof ', ' ', pos)
self._parse_chapter_path(path, data)
self.parse_chapter_path(path, data)
data["title"] = title[3:].strip()
data["date"] = date
data["count"] = int(count)
data["count"] = util.safe_int(count)
results.append((self.root + path, data.copy()))
@ -107,7 +108,7 @@ class MangaparkChapterExtractor(MangaparkExtractor):
def get_job_metadata(self, page):
"""Collect metadata for extractor-job"""
data = {"lang": "en", "language": "English"}
self._parse_chapter_path(self.path, data)
self.parse_chapter_path(self.path, data)
text.extract_all(page, (
("manga_id" , "var _manga_id = '", "'"),
("chapter_id", "var _book_id = '", "'"),
@ -119,7 +120,7 @@ class MangaparkChapterExtractor(MangaparkExtractor):
data["manga"], _, data["type"] = data["manga"].rpartition(" ")
data["manga"] = text.unescape(data["manga"])
data["title"] = data["title"].partition(": ")[2]
data["count"] = int(data["count"])
data["count"] = util.safe_int(data["count"])
return data
@staticmethod

@ -9,7 +9,7 @@
"""Extract manga-chapters and entire manga from http://www.mangareader.net/"""
from .common import AsynchronousExtractor, MangaExtractor, Message
from .. import text
from .. import text, util
class MangareaderBase():
@ -20,7 +20,8 @@ class MangareaderBase():
root = "http://www.mangareader.net"
@staticmethod
def _parse_page(page, data):
def parse_page(page, data):
"""Parse metadata on 'page' and add it to 'data'"""
text.extract_all(page, (
("manga" , '<h2 class="aname">', '</h2>'),
("release", '>Year of Release:</td>\n<td>', '</td>'),
@ -30,6 +31,7 @@ class MangareaderBase():
data["manga"] = data["manga"].strip()
data["author"] = text.unescape(data["author"])
data["artist"] = text.unescape(data["artist"])
return data
class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
@ -43,8 +45,7 @@ class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
def chapters(self, page):
results = []
data = {"lang": "en", "language": "English"}
self._parse_page(page, data)
data = self.parse_page(page, {"lang": "en", "language": "English"})
needle = '<div class="chico_manga"></div>\n<a href="'
pos = page.index('<div id="chapterlist">')
@ -54,7 +55,7 @@ class MangareaderMangaExtractor(MangareaderBase, MangaExtractor):
return results
data["title"], pos = text.extract(page, '</a> : ', '</td>', pos)
data["date"] , pos = text.extract(page, '<td>', '</td>', pos)
data["chapter"] = int(url.rpartition("/")[2])
data["chapter"] = util.safe_int(url.rpartition("/")[2])
results.append((self.root + url, data.copy()))
@ -91,17 +92,16 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
def get_job_metadata(self, chapter_page):
"""Collect metadata for extractor-job"""
page = self.request(self.root + self.url_title).text
data = {
"chapter": int(self.chapter),
data = self.parse_page(page, {
"chapter": util.safe_int(self.chapter),
"lang": "en",
"language": "English",
}
self._parse_page(page, data)
})
text.extract_all(page, (
('title', ' ' + self.chapter + '</a> : ', '</td>'),
('date', '<td>', '</td>'),
), page.index('<div id="chapterlist">'), data)
data["count"] = int(text.extract(
data["count"] = util.safe_int(text.extract(
chapter_page, '</select> of ', '<')[0]
)
return data
@ -123,6 +123,6 @@ class MangareaderChapterExtractor(MangareaderBase, AsynchronousExtractor):
height, pos = extr(page, ' height="', '"', pos)
image, pos = extr(page, ' src="', '"', pos)
return self.root + url, image, text.nameext_from_url(image, {
"width": int(width),
"height": int(height),
"width": util.safe_int(width),
"height": util.safe_int(height),
})

@ -9,7 +9,7 @@
"""Extract manga-chapters from https://mangastream.com/"""
from .common import AsynchronousExtractor, Message
from .. import text
from .. import text, util
from urllib.parse import urljoin
@ -32,8 +32,8 @@ class MangastreamChapterExtractor(AsynchronousExtractor):
data = self.get_job_metadata(page)
next_url = None
yield Message.Version, 1
yield Message.Directory, data
for data["page"] in range(1, int(data["count"])+1):
yield Message.Directory, data.copy()
for data["page"] in range(1, data["count"]+1):
if next_url:
page = self.request(next_url).text
next_url, image_url = self.get_page_metadata(page)
@ -44,21 +44,19 @@ class MangastreamChapterExtractor(AsynchronousExtractor):
def get_job_metadata(self, page):
"""Collect metadata for extractor-job"""
manga, pos = text.extract(
page, '<span class="hidden-xs hidden-sm">', "<"
)
page, '<span class="hidden-xs hidden-sm">', "<")
pos = page.find(self.part, pos)
title, pos = text.extract(page, ' - ', '<', pos)
count, pos = text.extract(page, 'Last Page (', ')', pos)
data = {
return {
"manga": manga,
"chapter": text.unquote(self.chapter),
"chapter-id": self.ch_id,
"chapter_id": util.safe_int(self.ch_id),
"title": title,
"count": count,
"count": util.safe_int(count, 1),
"lang": "en",
"language": "English",
}
return data
@staticmethod
def get_page_metadata(page):

@ -105,7 +105,7 @@ class MangazukiMangaExtractor(MangaExtractor):
for url in urls:
chapter = url.rpartition("/")[2]
chapter, dot, minor = chapter.partition(".")
data["chapter"] = int(chapter)
data["chapter"] = util.safe_int(chapter)
data["chapter_minor"] = dot + minor
results.append((url, data.copy()))
if 'class="next disabled"' in page:

@ -9,7 +9,7 @@
"""Extract manga pages from http://www.thespectrum.net/manga_scans/"""
from .common import MangaExtractor, AsynchronousExtractor, Message
from .. import text
from .. import text, util
class SpectrumnexusMangaExtractor(MangaExtractor):
@ -19,6 +19,7 @@ class SpectrumnexusMangaExtractor(MangaExtractor):
reverse = False
test = [("http://view.thespectrum.net/series/kare-kano-volume-01.html", {
"url": "b2b175aad5ef1701cc4aee7c24f1ca3a93aba9cb",
"keyword": "5ed9d5c7c69d2d03417c853c4e8eae30f1e5febf",
})]
def chapters(self, page):
@ -47,7 +48,7 @@ class SpectrumnexusChapterExtractor(AsynchronousExtractor):
test = [(("http://view.thespectrum.net/series/"
"toriko.html?ch=Chapter+343&page=1"), {
"url": "c0fc7dc594841217cc622a67edd79f06e9900333",
"keyword": "8499166b62db0c87e7109cc5f9aa837b4815dd9c",
"keyword": "3d0cb57b6b1c2cbecc7aed33f83c24891a4ff53f",
})]
def __init__(self, match):
@ -66,27 +67,28 @@ class SpectrumnexusChapterExtractor(AsynchronousExtractor):
data = self.get_job_metadata(page)
yield Message.Version, 1
yield Message.Directory, data.copy()
count = int(data["count"])
for i in range(1, count+1):
for i in range(1, data["count"]+1):
url = self.get_image_url(page)
text.nameext_from_url(url, data)
data["page"] = i
yield Message.Url, url, data.copy()
if i < count:
if i < data["count"]:
params["page"] += 1
page = self.request(self.url, params=params).text
def get_job_metadata(self, page):
"""Collect metadata for extractor-job"""
data = {
"chapter": self.chapter or "",
"volume": self.volume or "",
"chapter": util.safe_int(self.chapter),
"volume": util.safe_int(self.volume),
"identifier": self.identifier.replace("+", " "),
}
return text.extract_all(page, (
data = text.extract_all(page, (
('manga', '<title>', ' &#183; SPECTRUM NEXUS </title>'),
('count', '<div class="viewerLabel"> of ', '<'),
), values=data)[0]
data["count"] = util.safe_int(data["count"])
return data
@staticmethod
def get_image_url(page):

@ -90,6 +90,16 @@ def combine_dict(a, b):
return a
def safe_int(value, default=0):
"""Safely convert value to integer"""
if value is None or value == "":
return default
try:
return int(value)
except (ValueError, TypeError):
return default
def code_to_language(code, default=None):
"""Map an ISO 639-1 language code to its actual name"""
return CODES.get((code or "").lower(), default)

@ -160,6 +160,16 @@ class TestOther(unittest.TestCase):
{1: {2: {3: {4: {"1": "A", "3": "C"}}}}}),
{1: {2: {3: {4: {"1": "A", "2": "b", "3": "C"}}}}})
def test_safe_int(self):
self.assertEqual(util.safe_int(123), 123)
self.assertEqual(util.safe_int("123"), 123)
self.assertEqual(util.safe_int("zzz"), 0)
self.assertEqual(util.safe_int(""), 0)
self.assertEqual(util.safe_int(None), 0)
self.assertEqual(util.safe_int("zzz", "default"), "default")
self.assertEqual(util.safe_int("", "default"), "default")
self.assertEqual(util.safe_int(None, "default"), "default")
if __name__ == '__main__':
unittest.main()

Loading…
Cancel
Save