From 017188d2681b49195135d631775365baea4fd3c1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Sun, 17 Jun 2018 21:49:13 +0200 Subject: [PATCH] improve extractor.request() Replace the 'fatal' parameter with 'expect', which is a list/range of HTTP status codes >= 400 that should also be accepted. --- gallery_dl/extractor/artstation.py | 2 +- gallery_dl/extractor/common.py | 9 +++++---- gallery_dl/extractor/deviantart.py | 4 ++-- gallery_dl/extractor/exhentai.py | 4 ++-- gallery_dl/extractor/hentaifoundry.py | 3 ++- gallery_dl/extractor/imgur.py | 2 +- gallery_dl/extractor/nhentai.py | 3 ++- gallery_dl/extractor/nijie.py | 2 +- gallery_dl/extractor/pinterest.py | 2 +- gallery_dl/extractor/xvideos.py | 6 +++--- 10 files changed, 20 insertions(+), 17 deletions(-) diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py index 80ed0295..261710f1 100644 --- a/gallery_dl/extractor/artstation.py +++ b/gallery_dl/extractor/artstation.py @@ -73,7 +73,7 @@ class ArtstationExtractor(Extractor): def get_user_info(self, username): """Return metadata for a specific user""" url = "{}/users/{}/quick.json".format(self.root, username.lower()) - response = self.request(url, fatal=False) + response = self.request(url, expect=(404,)) if response.status_code == 404: raise exception.NotFoundError("user") return response.json() diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index 72a53369..1afd1b2c 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -51,7 +51,7 @@ class Extractor(): return config.interpolate( ("extractor", self.category, self.subcategory, key), default) - def request(self, url, method="GET", encoding=None, fatal=True, retries=3, + def request(self, url, method="GET", encoding=None, expect=(), retries=3, *args, **kwargs): max_tries = retries while True: @@ -62,14 +62,15 @@ class Extractor(): except requests.exceptions.RequestException as exc: raise exception.HttpError(exc) else: - if 200 <= response.status_code < 400 or not fatal: + code = response.status_code + if 200 <= code < 400 or code in expect: if encoding: response.encoding = encoding return response msg = "{} HTTP Error: {} for url: {}".format( - response.status_code, response.reason, url) - if response.status_code < 500 and response.status_code != 429: + code, response.reason, url) + if code < 500 and code != 429: break if not retries: diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index 41641052..cf13d7d8 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -264,9 +264,9 @@ class DeviantartDeviationExtractor(DeviantartExtractor): self.url = "https://" + match.group(1) def deviations(self): - response = self.request(self.url, fatal=False) + response = self.request(self.url, expect=range(400, 500)) deviation_id = text.extract(response.text, '//deviation/', '"')[0] - if response.status_code != 200 or not deviation_id: + if response.status_code >= 400 or not deviation_id: raise exception.NotFoundError("image") return (self.api.deviation(deviation_id),) diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index 4b07abcb..2cd244ea 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -127,7 +127,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): yield Message.Version, 1 url = "{}/g/{}/{}/".format(self.root, self.gid, self.token) - response = self.request(url, fatal=False) + response = self.request(url, expect=range(400, 500)) page = response.text if response.status_code == 404 and "Gallery Not Available" in page: @@ -327,7 +327,7 @@ class ExhentaiFavoriteExtractor(ExhentaiSearchExtractor): r"/favorites\.php(?:\?(.*))?"] test = [ ("https://exhentai.org/favorites.php", None), - ("https://exhentai.org/favorites.php?favcat=1&f_search=henreader" + ("https://exhentai.org/favorites.php?favcat=1&f_search=touhou" "&f_apply=Search+Favorites", None), ] diff --git a/gallery_dl/extractor/hentaifoundry.py b/gallery_dl/extractor/hentaifoundry.py index 23713fc0..4d7407a9 100644 --- a/gallery_dl/extractor/hentaifoundry.py +++ b/gallery_dl/extractor/hentaifoundry.py @@ -99,7 +99,8 @@ class HentaifoundryUserExtractor(HentaifoundryExtractor): num += 1 def get_job_metadata(self): - response = self.request(self.artist_url + "?enterAgree=1", fatal=False) + url = self.artist_url + "?enterAgree=1" + response = self.request(url, expect=(404,)) if response.status_code == 404: raise exception.NotFoundError("user") diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index f00b180d..c723de49 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -23,7 +23,7 @@ class ImgurExtractor(Extractor): self.mp4 = self.config("mp4", True) def _get_data(self, urlpart): - response = self.request("https://imgur.com/" + urlpart, fatal=False) + response = self.request("https://imgur.com/" + urlpart, expect=(404,)) if response.status_code == 404: raise exception.NotFoundError(self.subcategory) data = text.extract(response.text, "image : ", ",\n")[0] diff --git a/gallery_dl/extractor/nhentai.py b/gallery_dl/extractor/nhentai.py index a0c8abc1..f4aa5200 100644 --- a/gallery_dl/extractor/nhentai.py +++ b/gallery_dl/extractor/nhentai.py @@ -98,7 +98,8 @@ class NhentaiSearchExtractor(NHentaiExtractor): params["page"] = text.parse_int(params.get("page"), 1) while True: - data = self.request(url, params=params, fatal=False).json() + data = self.request( + url, params=params, expect=range(400, 500)).json() if "error" in data: self.log.error("API request failed: \"%s\"", data["error"]) diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index db2ce317..667fc479 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -98,7 +98,7 @@ class NijieExtractor(AsynchronousExtractor): params = {"id": self.user_id, "p": 1} while True: - response = self.request(url, params=params, fatal=False) + response = self.request(url, params=params, expect=(404,)) if response.status_code == 404: raise exception.NotFoundError("artist") diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index 1583e03c..cf2d8978 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -160,7 +160,7 @@ class PinterestAPI(): params = {"data": json.dumps({"options": options}), "source_url": ""} response = self.extractor.request( - url, params=params, headers=self.HEADERS, fatal=False) + url, params=params, headers=self.HEADERS, expect=range(400, 500)) try: data = response.json() diff --git a/gallery_dl/extractor/xvideos.py b/gallery_dl/extractor/xvideos.py index 15b77b63..ee5bf173 100644 --- a/gallery_dl/extractor/xvideos.py +++ b/gallery_dl/extractor/xvideos.py @@ -17,9 +17,9 @@ class XvideosExtractor(Extractor): """Base class for xvideos extractors""" category = "xvideos" - def get_page(self): - response = self.request(self.url, fatal=False) - if response.status_code in (403, 404): + def get_page(self, codes=(403, 404)): + response = self.request(self.url, expect=codes) + if response.status_code in codes: raise exception.NotFoundError(self.subcategory) return response.text