improve 'extractor.request'

- add 'fatal' argument
- improve internal logic and flow
- raise known exception on error
- update exception hierarchy
pull/40/head
Mike Fährmann 7 years ago
parent dcd573806e
commit 915a0137de
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -6,22 +6,49 @@
# it under the terms of the GNU General Public License version 2 as
# published by the Free Software Foundation.
"""Exception classes used by gallery-dl
class NoExtractorError(Exception):
"""No extractor can handle the given URL"""
Class Hierarchy:
Exception
+-- GalleryDLException
+-- ExtractionError
| +-- AuthenticationError
| +-- AuthorizationError
| +-- NotFoundError
| +-- HttpError
+-- NoExtractorError
+-- StopExtraction
"""
class GalleryDLException(Exception):
"""Base class for GalleryDL exceptions"""
class ExtractionError(GalleryDLException):
"""Base class for exceptions during information extraction"""
class AuthenticationError(Exception):
class AuthenticationError(ExtractionError):
"""Invalid or missing login information"""
class AuthorizationError(Exception):
class AuthorizationError(ExtractionError):
"""Insufficient privileges to access a resource"""
class NotFoundError(Exception):
class NotFoundError(ExtractionError):
"""Requested resource (gallery/image) does not exist"""
class StopExtraction(Exception):
class HttpError(ExtractionError):
"""HTTP request during extraction failed"""
class NoExtractorError(GalleryDLException):
"""No extractor can handle the given URL"""
class StopExtraction(GalleryDLException):
"""Extraction should stop"""

@ -107,7 +107,7 @@ class BatotoChapterExtractor(BatotoExtractor, AsynchronousExtractor):
"p": 1,
"supress_webtoon": "t",
}
response = self.session.get(self.reader_url, params=params)
response = self.request(self.reader_url, params=params, fatal=False)
if response.status_code == 405:
error = text.extract(response.text, "ERROR [", "]")[0]
if error == "10030":

@ -18,7 +18,7 @@ import requests
import threading
import http.cookiejar
from .message import Message
from .. import config
from .. import config, exception
class Extractor():
@ -47,11 +47,22 @@ class Extractor():
return config.interpolate(
("extractor", self.category, self.subcategory, key), default)
def request(self, url, encoding=None, *args, **kwargs):
response = safe_request(self.session, url, *args, **kwargs)
if encoding:
response.encoding = encoding
return response
def request(self, url, method="GET", encoding=None, fatal=True, retries=3,
*args, **kwargs):
while True:
try:
response = self.session.request(method, url, *args, **kwargs)
if fatal:
response.raise_for_status()
if encoding:
response.encoding = encoding
return response
except requests.exceptions.RequestException as exc:
msg = exc
retries -= 1
if not retries:
raise exception.HttpError(msg)
time.sleep(1)
def _get_auth_info(self):
"""Return authentication information as (username, password) tuple"""
@ -164,33 +175,8 @@ class MangaExtractor(Extractor):
return []
def safe_request(session, url, method="GET", *args, **kwargs):
tries = 0
while True:
# try to connect to remote source
try:
r = session.request(method, url, *args, **kwargs)
except requests.exceptions.ConnectionError:
tries += 1
time.sleep(1)
if tries == 5:
raise
continue
# reject error-status-codes
if r.status_code != requests.codes.ok:
tries += 1
time.sleep(1)
if tries == 5:
r.raise_for_status()
continue
# everything ok -- proceed to download
return r
# Reduce strictness of the expected magic string in cookie jar files.
# (This allows the use of Wget-generated cookiejar files without modification)
# Reduce strictness of the expected magic string in cookiejar files.
# (This allows the use of Wget-generated cookiejars without modification)
http.cookiejar.MozillaCookieJar.magic_re = re.compile(
"#( Netscape)? HTTP Cookie File", re.IGNORECASE)

@ -242,7 +242,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
self.url = "https://" + match.group(1)
def deviations(self):
response = self.session.get(self.url)
response = self.request(self.url, fatal=False)
deviation_id = text.extract(response.text, '//deviation/', '"')[0]
if response.status_code != 200 or not deviation_id:
raise exception.NotFoundError("image")

@ -56,7 +56,7 @@ class ExhentaiGalleryExtractor(Extractor):
yield Message.Version, 1
url = "{}/g/{}/{}/".format(self.root, self.gid, self.token)
response = self.session.get(url)
response = self.request(url, fatal=False)
page = response.text
if response.status_code == 404 and "Gallery Not Available" in page:
raise exception.AuthorizationError()
@ -196,7 +196,7 @@ class ExhentaiGalleryExtractor(Extractor):
"""Actual login implementation"""
self.log.info("Logging in as %s", username)
url = "https://forums.e-hentai.org/index.php?act=Login&CODE=01"
params = {
data = {
"CookieDate": "1",
"b": "d",
"bt": "1-1",
@ -206,7 +206,7 @@ class ExhentaiGalleryExtractor(Extractor):
}
referer = "https://e-hentai.org/bounce_login.php?b=d&bt=1-1"
self.session.headers["Referer"] = referer
response = self.session.post(url, data=params)
response = self.request(url, method="POST", data=data)
if "You are now logged in as:" not in response.text:
raise exception.AuthenticationError()

@ -65,7 +65,7 @@ class GfycatImageExtractor(GfycatExtractor):
def _get_info(self, gfycat_id):
url = "https://gfycat.com/cajax/get/" + gfycat_id
data = self.session.get(url).json()
data = self.request(url).json()
if "error" in data:
raise exception.NotFoundError()
raise exception.NotFoundError("animation")
return data["gfyItem"]

@ -62,7 +62,7 @@ class HentaifoundryUserExtractor(Extractor):
def get_job_metadata(self):
"""Collect metadata for extractor-job"""
url = self.url_base + self.artist + "?enterAgree=1"
response = self.session.get(url)
response = self.request(url, fatal=False)
if response.status_code == 404:
raise exception.NotFoundError("user")
page = response.text
@ -150,7 +150,7 @@ class HentaifoundryImageExtractor(Extractor):
"""Collect metadata for an image"""
url = "https://www.hentai-foundry.com/pictures/user/{}/{}".format(
self.artist, self.index)
response = self.session.get(url + "?enterAgree=1")
response = self.request(url + "?enterAgree=1", fatal=False)
if response.status_code == 404:
raise exception.NotFoundError("image")
extr = text.extract

@ -23,7 +23,7 @@ class ImgurExtractor(Extractor):
self.mp4 = self.config("mp4", True)
def _get_data(self, urlpart):
response = self.session.get("https://imgur.com/" + urlpart)
response = self.request("https://imgur.com/" + urlpart, fatal=False)
if response.status_code == 404:
raise exception.NotFoundError(self.subcategory)
data = text.extract(response.text, "image : ", ",\n")[0]

@ -71,9 +71,9 @@ class NijieExtractor(AsynchronousExtractor):
def _login_impl(self, username, password):
"""Actual login implementation"""
self.log.info("Logging in as %s", username)
params = {"email": username, "password": password}
page = self.session.post("https://nijie.info/login_int.php",
data=params).text
data = {"email": username, "password": password}
page = self.request("https://nijie.info/login_int.php",
method="POST", data=data).text
if "//nijie.info/login.php" in page:
raise exception.AuthenticationError()
return self.session.cookies
@ -102,7 +102,7 @@ class NijieUserExtractor(NijieExtractor):
params = {"id": self.artist_id, "p": 1}
url = "https://nijie.info/members_illust.php"
while True:
response = self.session.get(url, params=params)
response = self.request(url, params=params, fatal=False)
if response.status_code == 404:
raise exception.NotFoundError("artist")
ids = list(text.extract_iter(response.text, ' illust_id="', '"'))
@ -133,8 +133,8 @@ class NijieImageExtractor(NijieExtractor):
self.page = ""
def get_job_metadata(self):
response = self.session.get(self.popup_url + self.image_id,
allow_redirects=False)
response = self.request(self.popup_url + self.image_id,
allow_redirects=False)
if 300 <= response.status_code < 400:
raise exception.NotFoundError("image")
self.page = response.text

@ -135,12 +135,10 @@ class MastodonAPI():
"""Get an account's statuses"""
url = "{}/api/v1/accounts/{}/statuses?only_media=1".format(
self.root, account_id)
while True:
while url:
response = self.session.get(url)
yield from self._parse(response)
url = response.links.get("next", {}).get("url")
if not url:
break
@staticmethod
def _parse(response):

@ -57,8 +57,8 @@ class SeigaExtractor(Extractor):
"""Actual login implementation"""
self.log.info("Logging in as %s", username)
url = "https://account.nicovideo.jp/api/v1/login"
params = {"mail_tel": username, "password": password}
self.session.post(url, data=params).close()
data = {"mail_tel": username, "password": password}
self.request(url, method="POST", data=data)
if "user_session" not in self.session.cookies:
raise exception.AuthenticationError()
del self.session.cookies["nicosid"]

@ -49,9 +49,11 @@ class Job():
except exception.AuthorizationError:
log.error("You do not have permission to access the resource "
"at '%s'", self.url)
except exception.NotFoundError as err:
res = str(err) or "resource (gallery/image/user)"
except exception.NotFoundError as exc:
res = str(exc) or "resource (gallery/image/user)"
log.error("The %s at '%s' does not exist", res, self.url)
except exception.HttpError as exc:
log.error("HTTP request failed:\n%s", exc)
except exception.StopExtraction:
pass
except Exception as exc:

Loading…
Cancel
Save