improve extractor.request()

Replace the 'fatal' parameter with 'expect', which is a list/range
of HTTP status codes >= 400 that should also be accepted.
pull/133/head
Mike Fährmann 6 years ago
parent b84e71da91
commit 017188d268
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -73,7 +73,7 @@ class ArtstationExtractor(Extractor):
def get_user_info(self, username):
"""Return metadata for a specific user"""
url = "{}/users/{}/quick.json".format(self.root, username.lower())
response = self.request(url, fatal=False)
response = self.request(url, expect=(404,))
if response.status_code == 404:
raise exception.NotFoundError("user")
return response.json()

@ -51,7 +51,7 @@ class Extractor():
return config.interpolate(
("extractor", self.category, self.subcategory, key), default)
def request(self, url, method="GET", encoding=None, fatal=True, retries=3,
def request(self, url, method="GET", encoding=None, expect=(), retries=3,
*args, **kwargs):
max_tries = retries
while True:
@ -62,14 +62,15 @@ class Extractor():
except requests.exceptions.RequestException as exc:
raise exception.HttpError(exc)
else:
if 200 <= response.status_code < 400 or not fatal:
code = response.status_code
if 200 <= code < 400 or code in expect:
if encoding:
response.encoding = encoding
return response
msg = "{} HTTP Error: {} for url: {}".format(
response.status_code, response.reason, url)
if response.status_code < 500 and response.status_code != 429:
code, response.reason, url)
if code < 500 and code != 429:
break
if not retries:

@ -264,9 +264,9 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
self.url = "https://" + match.group(1)
def deviations(self):
response = self.request(self.url, fatal=False)
response = self.request(self.url, expect=range(400, 500))
deviation_id = text.extract(response.text, '//deviation/', '"')[0]
if response.status_code != 200 or not deviation_id:
if response.status_code >= 400 or not deviation_id:
raise exception.NotFoundError("image")
return (self.api.deviation(deviation_id),)

@ -127,7 +127,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor):
yield Message.Version, 1
url = "{}/g/{}/{}/".format(self.root, self.gid, self.token)
response = self.request(url, fatal=False)
response = self.request(url, expect=range(400, 500))
page = response.text
if response.status_code == 404 and "Gallery Not Available" in page:
@ -327,7 +327,7 @@ class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor):
r"/favorites\.php(?:\?(.*))?"]
test = [
("https://exhentai.org/favorites.php", None),
("https://exhentai.org/favorites.php?favcat=1&f_search=henreader"
("https://exhentai.org/favorites.php?favcat=1&f_search=touhou"
"&f_apply=Search+Favorites", None),
]

@ -99,7 +99,8 @@ class HentaifoundryUserExtractor(HentaifoundryExtractor):
num += 1
def get_job_metadata(self):
response = self.request(self.artist_url + "?enterAgree=1", fatal=False)
url = self.artist_url + "?enterAgree=1"
response = self.request(url, expect=(404,))
if response.status_code == 404:
raise exception.NotFoundError("user")

@ -23,7 +23,7 @@ class ImgurExtractor(Extractor):
self.mp4 = self.config("mp4", True)
def _get_data(self, urlpart):
response = self.request("https://imgur.com/" + urlpart, fatal=False)
response = self.request("https://imgur.com/" + urlpart, expect=(404,))
if response.status_code == 404:
raise exception.NotFoundError(self.subcategory)
data = text.extract(response.text, "image : ", ",\n")[0]

@ -98,7 +98,8 @@ class NhentaiSearchExtractor(NHentaiExtractor):
params["page"] = text.parse_int(params.get("page"), 1)
while True:
data = self.request(url, params=params, fatal=False).json()
data = self.request(
url, params=params, expect=range(400, 500)).json()
if "error" in data:
self.log.error("API request failed: \"%s\"", data["error"])

@ -98,7 +98,7 @@ class NijieExtractor(AsynchronousExtractor):
params = {"id": self.user_id, "p": 1}
while True:
response = self.request(url, params=params, fatal=False)
response = self.request(url, params=params, expect=(404,))
if response.status_code == 404:
raise exception.NotFoundError("artist")

@ -160,7 +160,7 @@ class PinterestAPI():
params = {"data": json.dumps({"options": options}), "source_url": ""}
response = self.extractor.request(
url, params=params, headers=self.HEADERS, fatal=False)
url, params=params, headers=self.HEADERS, expect=range(400, 500))
try:
data = response.json()

@ -17,9 +17,9 @@ class XvideosExtractor(Extractor):
"""Base class for xvideos extractors"""
category = "xvideos"
def get_page(self):
response = self.request(self.url, fatal=False)
if response.status_code in (403, 404):
def get_page(self, codes=(403, 404)):
response = self.request(self.url, expect=codes)
if response.status_code in codes:
raise exception.NotFoundError(self.subcategory)
return response.text

Loading…
Cancel
Save