[imgbox] handle non-existing images/galleries

pull/13/head
Mike Fährmann 8 years ago
parent 68fe58ee72
commit 4f51d72d22
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -9,7 +9,7 @@
"""Extract images from galleries at https://imgbox.com/"""
from .common import Extractor, AsynchronousExtractor, Message
from .. import text
from .. import text, exception
import re
class ImgboxGalleryExtractor(AsynchronousExtractor):
@ -33,14 +33,17 @@ class ImgboxGalleryExtractor(AsynchronousExtractor):
def items(self):
page = self.request(self.url_base + "/g/" + self.key).text
if "The specified gallery could not be found." in page:
raise exception.NotFoundError("gallery")
self.metadata = self.get_job_metadata(page)
yield Message.Version, 1
yield Message.Directory, self.metadata
for match in re.finditer(r'<a href="([^"]+)"><img alt="', page):
imgpage = self.request(self.url_base + match.group(1)).text
data = self.get_file_metadata(imgpage)
data = text.nameext_from_url(data["filename"], data)
yield Message.Url, self.get_file_url(imgpage), data
if data["filename"]:
data = text.nameext_from_url(data["filename"], data)
yield Message.Url, self.get_file_url(imgpage), data
def get_job_metadata(self, page):
"""Collect metadata for extractor-job"""
@ -88,7 +91,9 @@ class ImgboxImageExtractor(Extractor):
def items(self):
page = self.request("https://imgbox.com/" + self.key).text
url , pos = text.extract(page, 'src="https://i.', '"')
url , pos = text.extract(page, 'src="https://i.', '"')
if not url:
raise exception.NotFoundError("image")
filename, pos = text.extract(page, ' title="', '"', pos)
data = text.nameext_from_url(filename, {"image-key": self.key})
yield Message.Version, 1

Loading…
Cancel
Save