diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py index 24197ad9..f7b3bc1d 100644 --- a/gallery_dl/extractor/artstation.py +++ b/gallery_dl/extractor/artstation.py @@ -79,9 +79,7 @@ class ArtstationExtractor(Extractor): def get_user_info(self, username): """Return metadata for a specific user""" url = "{}/users/{}/quick.json".format(self.root, username.lower()) - response = self.request(url, expect=(404,)) - if response.status_code == 404: - raise exception.NotFoundError("user") + response = self.request(url, notfound="user") return response.json() def _pagination(self, url, params=None): diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index e883d33a..be4316b8 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -66,8 +66,8 @@ class Extractor(): return config.interpolate( ("extractor", self.category, self.subcategory, key), default) - def request(self, url, method="GET", *, session=None, - encoding=None, expect=(), retries=None, **kwargs): + def request(self, url, method="GET", *, session=None, retries=None, + encoding=None, fatal=True, notfound=None, **kwargs): tries = 1 retries = self._retries if retries is None else retries session = self.session if session is None else session @@ -86,10 +86,13 @@ class Extractor(): raise exception.HttpError(exc) else: code = response.status_code - if 200 <= code < 400 or code in expect: + if 200 <= code < 400 or not fatal and \ + (400 <= code < 429 or 431 <= code < 500): if encoding: response.encoding = encoding return response + if notfound and code == 404: + raise exception.NotFoundError(notfound) if cloudflare.is_challenge(response): self.log.info("Solving Cloudflare challenge") url, domain, cookies = cloudflare.solve_challenge( @@ -98,7 +101,7 @@ class Extractor(): continue msg = "{}: {} for url: {}".format(code, response.reason, url) - if code < 500 and code != 429: + if code < 500 and code != 429 and code != 430: break self.log.debug("%s (%s/%s)", msg, tries, retries+1) diff --git a/gallery_dl/extractor/deviantart.py b/gallery_dl/extractor/deviantart.py index ebab0400..8e53cae6 100644 --- a/gallery_dl/extractor/deviantart.py +++ b/gallery_dl/extractor/deviantart.py @@ -416,7 +416,7 @@ class DeviantartDeviationExtractor(DeviantartExtractor): def deviations(self): url = "{}/{}/{}".format(self.root, self.user, self.path) - response = self._html_request(url, expect=range(400, 500)) + response = self._html_request(url, fatal=False) deviation_id = text.extract(response.text, '//deviation/', '"')[0] if response.status_code >= 400 or not deviation_id: raise exception.NotFoundError("image") @@ -767,7 +767,7 @@ class DeviantartAPI(): def user_profile(self, username): """Get user profile information""" endpoint = "user/profile/" + username - return self._call(endpoint, expect_error=True) + return self._call(endpoint, fatal=False) def authenticate(self, refresh_token): """Authenticate the application by requesting an access token""" @@ -797,7 +797,7 @@ class DeviantartAPI(): _refresh_token_cache.update(refresh_token, data["refresh_token"]) return "Bearer " + data["access_token"] - def _call(self, endpoint, params=None, expect_error=False, public=True): + def _call(self, endpoint, params=None, fatal=True, public=True): """Call an API endpoint""" url = "https://www.deviantart.com/api/v1/oauth2/" + endpoint while True: @@ -806,11 +806,7 @@ class DeviantartAPI(): self.authenticate(None if public else self.refresh_token) response = self.extractor.request( - url, - params=params, - headers=self.headers, - expect=range(400, 500), - ) + url, headers=self.headers, params=params, fatal=False) data = response.json() status = response.status_code @@ -818,7 +814,7 @@ class DeviantartAPI(): if self.delay > self.delay_min: self.delay -= 1 return data - if expect_error: + if not fatal: return None if data.get("error_description") == "User not found.": raise exception.NotFoundError("user or group") diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index d67c58a6..20e07463 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -259,7 +259,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): def _gallery_page(self): url = "{}/g/{}/{}/".format( self.root, self.gallery_id, self.gallery_token) - response = self.request(url, expect=range(400, 500)) + response = self.request(url, fatal=False) page = response.text if response.status_code == 404 and "Gallery Not Available" in page: @@ -271,7 +271,7 @@ class ExhentaiGalleryExtractor(ExhentaiExtractor): def _image_page(self): url = "{}/s/{}/{}-{}".format( self.root, self.image_token, self.gallery_id, self.image_num) - page = self.request(url, expect=range(400, 500)).text + page = self.request(url, fatal=False).text if page.startswith(("Invalid page", "Keep trying")): raise exception.NotFoundError("image page") diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index 0468c0b2..c5e3d176 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -16,16 +16,15 @@ import json class ImgurExtractor(Extractor): """Base class for imgur extractors""" category = "imgur" + root = "https://imgur.com" def __init__(self, match): Extractor.__init__(self, match) self.item_id = match.group(1) self.mp4 = self.config("mp4", True) - def _get_data(self, urlpart): - response = self.request("https://imgur.com/" + urlpart, expect=(404,)) - if response.status_code == 404: - raise exception.NotFoundError(self.subcategory) + def _get_data(self, path): + response = self.request(self.root + path, notfound=self.subcategory) data = text.extract(response.text, "image : ", ",\n")[0] return self._clean(json.loads(data)) @@ -102,7 +101,7 @@ class ImgurImageExtractor(ImgurExtractor): ) def items(self): - image = self._get_data(self.item_id) + image = self._get_data("/" + self.item_id) url = self._prepare(image) yield Message.Version, 1 @@ -165,13 +164,13 @@ class ImgurAlbumExtractor(ImgurExtractor): ) def items(self): - album = self._get_data("a/" + self.item_id + "/all") + album = self._get_data("/a/" + self.item_id + "/all") images = album["album_images"]["images"] del album["album_images"] if int(album["num_images"]) > len(images): - url = ("https://imgur.com/ajaxalbums/getimages/" + - self.item_id + "/hit.json") + url = "{}/ajaxalbums/getimages/{}/hit.json".format( + self.root, self.item_id) images = self.request(url).json()["data"]["images"] yield Message.Version, 1 diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index b982baea..08c6c174 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -106,13 +106,8 @@ class NijieExtractor(AsynchronousMixin, Extractor): params = {"id": self.user_id, "p": 1} while True: - response = self.request(url, params=params, expect=(404,)) - if response.status_code == 404: - raise exception.NotFoundError("artist") - - page = response.text - ids = list(text.extract_iter(page, ' illust_id="', '"')) - yield from ids + page = self.request(url, params=params, notfound="artist").text + yield from text.extract_iter(page, 'illust_id="', '"') if '