fix extraction and update test results

- fixes for hbrowse, imgyt, imgcandy, hosturimage
- test updates for deviantart, gfycat
pull/40/head
Mike Fährmann 7 years ago
parent abd7c559cd
commit 0245a0ba5f
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -284,7 +284,7 @@ class DeviantartCollectionExtractor(DeviantartExtractor):
r"/favourites/(\d+)/([^/?&#]+)"]
test = [("http://rosuuri.deviantart.com/favourites/58951174/Useful", {
"url": "65d070eae215b9375b4437a1ab4659efdad204e3",
"keyword": "a8b7b7c8ef9a4eba87c96cfaf098a7ef1c1e8be5",
"keyword": "b4abbad60f87a42fb6c1a021cb3a8efd9d31bfb7",
})]
def __init__(self, match):

@ -45,7 +45,7 @@ class GfycatImageExtractor(GfycatExtractor):
test = [
("https://gfycat.com/GrayGenerousCowrie", {
"url": "e0b5e1d7223108249b15c3c7898dd358dbfae045",
"keyword": "77a65049939823071b532cbdb7dec77582c6ee5b",
"keyword": "f92a5792df3ae61817627768897f1d0dd134c2e4",
"content": "3157cd8b3799205c5a0df98a7ee31aa85bf6491e",
}),
(("https://thumbs.gfycat.com/SillyLameIsabellinewheatear"

@ -65,10 +65,10 @@ class HbrowseChapterExtractor(Extractor):
return text.extract_all(page, (
('title' , '<td class="listLong">', '</td>'),
(None , '<td class="listLong">', ''),
('artist' , 'title="">', '<'),
('artist' , '>', '<'),
('count-total', '<td class="listLong">', ' '),
(None , '<td class="listLong">', ''),
('origin' , 'title="">', '<'),
('origin' , '>', '<'),
), values=data)[0]
def get_image_urls(self, page):

@ -76,11 +76,11 @@ class ImgytImageExtractor(ImagehostImageExtractor):
https = True
def get_info(self, page):
url, pos = text.extract(page, "<img class='centred' src='", "'")
url, pos = text.extract(page, '<img class="centred" src="', '"')
if not url:
raise exception.NotFoundError("image")
filename, pos = text.extract(page, " alt='", "'", pos)
filename += splitext(url)[1] if filename else url
filename, pos = text.extract(page, ' alt="', '"', pos)
filename = (filename + splitext(url)[1]) if filename else url
return url, filename
@ -183,7 +183,7 @@ class HosturimageImageExtractor(ImagehostImageExtractor):
def get_info(self, page):
_ , pos = text.extract(page, '<div id="image_details">', '')
url, pos = text.extract(page, "src='", "'", pos)
url, pos = text.extract(page, 'src="', '"', pos)
return url, url

Loading…
Cancel
Save