|
|
@ -110,7 +110,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|
|
|
r"|/s/([\da-f]{10})/(\d+)-(\d+))")
|
|
|
|
r"|/s/([\da-f]{10})/(\d+)-(\d+))")
|
|
|
|
test = (
|
|
|
|
test = (
|
|
|
|
("https://exhentai.org/g/960460/4f0e369d82/", {
|
|
|
|
("https://exhentai.org/g/960460/4f0e369d82/", {
|
|
|
|
"keyword": "993bfaf68b4823084fbd0d3339564666463b1432",
|
|
|
|
"keyword": "1532ca4d0e4e0738dc994ca725a228af04a4e480",
|
|
|
|
"content": "493d759de534355c9f55f8e365565b62411de146",
|
|
|
|
"content": "493d759de534355c9f55f8e365565b62411de146",
|
|
|
|
}),
|
|
|
|
}),
|
|
|
|
("https://exhentai.org/g/960461/4f0e369d82/", {
|
|
|
|
("https://exhentai.org/g/960461/4f0e369d82/", {
|
|
|
@ -169,57 +169,55 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
|
|
|
|
|
|
|
|
|
|
|
|
def get_metadata(self, page):
|
|
|
|
def get_metadata(self, page):
|
|
|
|
"""Extract gallery metadata"""
|
|
|
|
"""Extract gallery metadata"""
|
|
|
|
data, pos = text.extract_all(page, (
|
|
|
|
extr = text.extract_from(page)
|
|
|
|
("title" , '<h1 id="gn">', '</h1>'),
|
|
|
|
data = {
|
|
|
|
("title_jp" , '<h1 id="gj">', '</h1>'),
|
|
|
|
"gallery_id" : self.gallery_id,
|
|
|
|
("date" , '>Posted:</td><td class="gdt2">', '</td>'),
|
|
|
|
"gallery_token": self.gallery_token,
|
|
|
|
("parent" , '>Parent:</td><td class="gdt2"><a href="', '"'),
|
|
|
|
"title" : text.unescape(extr('<h1 id="gn">', '</h1>')),
|
|
|
|
("visible" , '>Visible:</td><td class="gdt2">', '<'),
|
|
|
|
"title_jp" : text.unescape(extr('<h1 id="gj">', '</h1>')),
|
|
|
|
("language" , '>Language:</td><td class="gdt2">', ' '),
|
|
|
|
"date" : text.parse_datetime(extr(
|
|
|
|
("gallery_size", '>File Size:</td><td class="gdt2">', '<'),
|
|
|
|
'>Posted:</td><td class="gdt2">', '</td>'), "%Y-%m-%d %H:%M"),
|
|
|
|
("count" , '>Length:</td><td class="gdt2">', ' '),
|
|
|
|
"parent" : extr(
|
|
|
|
))
|
|
|
|
'>Parent:</td><td class="gdt2"><a href="', '"'),
|
|
|
|
|
|
|
|
"visible" : extr(
|
|
|
|
|
|
|
|
'>Visible:</td><td class="gdt2">', '<'),
|
|
|
|
|
|
|
|
"language" : extr(
|
|
|
|
|
|
|
|
'>Language:</td><td class="gdt2">', ' '),
|
|
|
|
|
|
|
|
"gallery_size" : text.parse_bytes(extr(
|
|
|
|
|
|
|
|
'>File Size:</td><td class="gdt2">', '<').rstrip("Bb")),
|
|
|
|
|
|
|
|
"count" : text.parse_int(extr(
|
|
|
|
|
|
|
|
'>Length:</td><td class="gdt2">', ' ')),
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
data["lang"] = util.language_to_code(data["language"])
|
|
|
|
data["lang"] = util.language_to_code(data["language"])
|
|
|
|
data["title"] = text.unescape(data["title"])
|
|
|
|
|
|
|
|
data["title_jp"] = text.unescape(data["title_jp"])
|
|
|
|
|
|
|
|
data["count"] = text.parse_int(data["count"])
|
|
|
|
|
|
|
|
data["gallery_id"] = self.gallery_id
|
|
|
|
|
|
|
|
data["gallery_token"] = self.gallery_token
|
|
|
|
|
|
|
|
data["gallery_size"] = text.parse_bytes(
|
|
|
|
|
|
|
|
data["gallery_size"].rstrip("Bb"))
|
|
|
|
|
|
|
|
data["tags"] = [
|
|
|
|
data["tags"] = [
|
|
|
|
text.unquote(tag)
|
|
|
|
text.unquote(tag)
|
|
|
|
for tag in text.extract_iter(page, 'hentai.org/tag/', '"', pos)
|
|
|
|
for tag in text.extract_iter(page, 'hentai.org/tag/', '"')
|
|
|
|
]
|
|
|
|
]
|
|
|
|
|
|
|
|
|
|
|
|
return data
|
|
|
|
return data
|
|
|
|
|
|
|
|
|
|
|
|
def image_from_page(self, page):
|
|
|
|
def image_from_page(self, page):
|
|
|
|
"""Get image url and data from webpage"""
|
|
|
|
"""Get image url and data from webpage"""
|
|
|
|
info = text.extract_all(page, (
|
|
|
|
pos = page.index('<div id="i3"><a onclick="return load_image(') + 26
|
|
|
|
(None , '<div id="i3"><a onclick="return load_image(', ''),
|
|
|
|
extr = text.extract_from(page, pos)
|
|
|
|
("nextkey" , "'", "'"),
|
|
|
|
|
|
|
|
("url" , '<img id="img" src="', '"'),
|
|
|
|
self.key["next"] = extr("'", "'")
|
|
|
|
("origurl" , 'hentai.org/fullimg.php', '"'),
|
|
|
|
iurl = extr('<img id="img" src="', '"')
|
|
|
|
("originfo", 'ownload original', '<'),
|
|
|
|
orig = extr('hentai.org/fullimg.php', '"')
|
|
|
|
("startkey", 'var startkey="', '";'),
|
|
|
|
|
|
|
|
("showkey" , 'var showkey="', '";'),
|
|
|
|
if self.original and orig:
|
|
|
|
))[0]
|
|
|
|
url = self.root + "/fullimg.php" + text.unescape(orig)
|
|
|
|
self.key["start"] = info["startkey"]
|
|
|
|
data = self._parse_original_info(extr('ownload original', '<'))
|
|
|
|
self.key["show"] = info["showkey"]
|
|
|
|
|
|
|
|
self.key["next"] = info["nextkey"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
if self.original and info["origurl"]:
|
|
|
|
|
|
|
|
part = text.unescape(info["origurl"])
|
|
|
|
|
|
|
|
url = self.root + "/fullimg.php" + part
|
|
|
|
|
|
|
|
data = self._parse_original_info(info["originfo"])
|
|
|
|
|
|
|
|
else:
|
|
|
|
else:
|
|
|
|
url = info["url"]
|
|
|
|
url = iurl
|
|
|
|
data = self._parse_image_info(url)
|
|
|
|
data = self._parse_image_info(url)
|
|
|
|
|
|
|
|
|
|
|
|
data["num"] = self.image_num
|
|
|
|
data["num"] = self.image_num
|
|
|
|
data["image_token"] = info["startkey"]
|
|
|
|
data["image_token"] = self.key["start"] = extr('var startkey="', '";')
|
|
|
|
return url, text.nameext_from_url(info["url"], data)
|
|
|
|
self.key["show"] = extr('var showkey="', '";')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
return url, text.nameext_from_url(iurl, data)
|
|
|
|
|
|
|
|
|
|
|
|
def images_from_api(self):
|
|
|
|
def images_from_api(self):
|
|
|
|
"""Get image url and data from api calls"""
|
|
|
|
"""Get image url and data from api calls"""
|
|
|
|