[exhentai] fix 'imgkey' handling

pull/13/head
Mike Fährmann 9 years ago
parent 4440b40234
commit 9c63027d2c
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -94,14 +94,15 @@ class ExhentaiExtractor(Extractor):
page = self.request(url).text page = self.request(url).text
data, pos = text.extract_all(page, ( data, pos = text.extract_all(page, (
(None , '<div id="i3"><a onclick="return load_image(', ''), (None , '<div id="i3"><a onclick="return load_image(', ''),
("imgkey" , "'", "'"), ("imgkey-next", "'", "'"),
("url" , '<img id="img" src="', '"'), ("url" , '<img id="img" src="', '"'),
("title" , '<div id="i4"><div>', ' :: '), ("title" , '<div id="i4"><div>', ' :: '),
("origurl" , 'http://exhentai.org/fullimg.php', '"'), ("origurl" , 'http://exhentai.org/fullimg.php', '"'),
("gid" , 'var gid=', ';'), ("gid" , 'var gid=', ';'),
("startkey", 'var startkey="', '";'), ("startkey" , 'var startkey="', '";'),
("showkey" , 'var showkey="', '";'), ("showkey" , 'var showkey="', '";'),
)) ))
data["imgkey"] = data["startkey"]
if data["origurl"]: if data["origurl"]:
data["origurl"] = "http://exhentai.org/fullimg.php" + text.unescape(data["origurl"]) data["origurl"] = "http://exhentai.org/fullimg.php" + text.unescape(data["origurl"])
else: else:
@ -112,22 +113,23 @@ class ExhentaiExtractor(Extractor):
"method" : "showpage", "method" : "showpage",
"page" : 2, "page" : 2,
"gid" : int(data["gid"]), "gid" : int(data["gid"]),
"imgkey" : data["imgkey"], "imgkey" : data["imgkey-next"],
"showkey": data["showkey"], "showkey": data["showkey"],
} }
while True: while True:
if data["imgkey"] == data["imgkey-next"]:
return
time.sleep(random.uniform(3, 6)) time.sleep(random.uniform(3, 6))
page = self.session.post(self.api_url, json=request).json() page = self.session.post(self.api_url, json=request).json()
data["imgkey"] , pos = text.extract(page["i3"], "'", "'") data["imgkey"] = data["imgkey-next"]
data["imgkey-next"], pos = text.extract(page["i3"], "'", "'")
data["url"] , pos = text.extract(page["i3"], '<img id="img" src="', '"', pos) data["url"] , pos = text.extract(page["i3"], '<img id="img" src="', '"', pos)
data["title"] , pos = text.extract(page["i" ], '<div>', ' :: ') data["title"] , pos = text.extract(page["i" ], '<div>', ' :: ')
data["origurl"], pos = text.extract(page["i7"], '<a href="', '"') data["origurl"] , pos = text.extract(page["i7"], '<a href="', '"')
if data["origurl"]: if data["origurl"]:
data["origurl"] = text.unescape(data["origurl"]) data["origurl"] = text.unescape(data["origurl"])
else: else:
data["origurl"] = data["url"] data["origurl"] = data["url"]
yield data yield data
if request["imgkey"] == data["imgkey"]: request["imgkey"] = data["imgkey-next"]
return
request["imgkey"] = data["imgkey"]
request["page"] += 1 request["page"] += 1

Loading…
Cancel
Save