From 3ecb51272293a1716f9ee09ec5f7229a27d7038b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mike=20F=C3=A4hrmann?= Date: Mon, 18 Sep 2023 23:50:25 +0200 Subject: [PATCH] send Referer headers by default --- docs/configuration.rst | 18 +++++++++++++++++- gallery_dl/extractor/500px.py | 3 --- gallery_dl/extractor/8chan.py | 3 --- gallery_dl/extractor/artstation.py | 2 -- gallery_dl/extractor/behance.py | 5 ++--- gallery_dl/extractor/bunkr.py | 3 +-- gallery_dl/extractor/common.py | 7 +++++++ gallery_dl/extractor/deviantart.py | 4 +--- gallery_dl/extractor/exhentai.py | 1 - gallery_dl/extractor/fantia.py | 5 +---- gallery_dl/extractor/foolfuuka.py | 3 --- gallery_dl/extractor/hiperdex.py | 3 --- gallery_dl/extractor/hotleak.py | 3 --- gallery_dl/extractor/imagefap.py | 3 --- gallery_dl/extractor/imgur.py | 6 +----- gallery_dl/extractor/itaku.py | 1 - gallery_dl/extractor/kemonoparty.py | 1 - gallery_dl/extractor/mangafox.py | 3 --- gallery_dl/extractor/mangakakalot.py | 3 --- gallery_dl/extractor/manganelo.py | 2 -- gallery_dl/extractor/naverwebtoon.py | 1 - gallery_dl/extractor/newgrounds.py | 2 -- gallery_dl/extractor/nijie.py | 1 - gallery_dl/extractor/nozomi.py | 6 +++--- gallery_dl/extractor/patreon.py | 1 - gallery_dl/extractor/pinterest.py | 1 - gallery_dl/extractor/pornpics.py | 3 --- gallery_dl/extractor/reactor.py | 1 - gallery_dl/extractor/redgifs.py | 1 - gallery_dl/extractor/sankaku.py | 1 - gallery_dl/extractor/skeb.py | 7 +++---- gallery_dl/extractor/vipergirls.py | 3 --- gallery_dl/extractor/weibo.py | 1 - 33 files changed, 36 insertions(+), 72 deletions(-) diff --git a/docs/configuration.rst b/docs/configuration.rst index 36d57fb8..ee1567a9 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -565,6 +565,21 @@ Description browser would use HTTP/2. +extractor.*.referer +------------------- +Type + * ``bool`` + * ``string`` +Default + ``true`` +Description + Send `Referer `__ + headers with all outgoing HTTP requests. + + If this is a ``string``, send it as Referer + instead of the extractor's ``root`` domain. + + extractor.*.headers ------------------- Type @@ -576,7 +591,8 @@ Default "User-Agent" : "", "Accept" : "*/*", "Accept-Language": "en-US,en;q=0.5", - "Accept-Encoding": "gzip, deflate" + "Accept-Encoding": "gzip, deflate", + "Referer" : "" } Description diff --git a/gallery_dl/extractor/500px.py b/gallery_dl/extractor/500px.py index a1a9d0d9..41cc0deb 100644 --- a/gallery_dl/extractor/500px.py +++ b/gallery_dl/extractor/500px.py @@ -23,9 +23,6 @@ class _500pxExtractor(Extractor): root = "https://500px.com" cookies_domain = ".500px.com" - def _init(self): - self.session.headers["Referer"] = self.root + "/" - def items(self): data = self.metadata() diff --git a/gallery_dl/extractor/8chan.py b/gallery_dl/extractor/8chan.py index 3ecae13d..fc16f43c 100644 --- a/gallery_dl/extractor/8chan.py +++ b/gallery_dl/extractor/8chan.py @@ -102,9 +102,6 @@ class _8chanBoardExtractor(_8chanExtractor): _8chanExtractor.__init__(self, match) _, self.board, self.page = match.groups() - def _init(self): - self.session.headers["Referer"] = self.root + "/" - def items(self): page = text.parse_int(self.page, 1) url = "{}/{}/{}.json".format(self.root, self.board, page) diff --git a/gallery_dl/extractor/artstation.py b/gallery_dl/extractor/artstation.py index d6676946..b58b3d39 100644 --- a/gallery_dl/extractor/artstation.py +++ b/gallery_dl/extractor/artstation.py @@ -117,7 +117,6 @@ class ArtstationExtractor(Extractor): headers = { "Accept" : "application/json, text/plain, */*", "Origin" : self.root, - "Referer": self.root + "/", } if json: @@ -147,7 +146,6 @@ class ArtstationExtractor(Extractor): headers = { "Accept" : "*/*", "Origin" : self.root, - "Referer": self.root + "/", } return self.request( url, method="POST", headers=headers, json={}, diff --git a/gallery_dl/extractor/behance.py b/gallery_dl/extractor/behance.py index ac547a9e..fc5f9eff 100644 --- a/gallery_dl/extractor/behance.py +++ b/gallery_dl/extractor/behance.py @@ -35,9 +35,8 @@ class BehanceExtractor(Extractor): def _request_graphql(self, endpoint, variables): url = self.root + "/v3/graphql" headers = { - "Origin" : self.root, - "Referer": self.root + "/", - "X-BCP" : self._bcp, + "Origin": self.root, + "X-BCP" : self._bcp, "X-Requested-With": "XMLHttpRequest", } data = { diff --git a/gallery_dl/extractor/bunkr.py b/gallery_dl/extractor/bunkr.py index 1318d0e4..dc48090a 100644 --- a/gallery_dl/extractor/bunkr.py +++ b/gallery_dl/extractor/bunkr.py @@ -42,7 +42,6 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor): cdn = None files = [] append = files.append - headers = {"Referer": self.root + "/"} pos = page.index('class="grid-images') for url in text.extract_iter(page, '", "") count, pos = text.extract( diff --git a/gallery_dl/extractor/mangakakalot.py b/gallery_dl/extractor/mangakakalot.py index 2b93392d..e1adf21e 100644 --- a/gallery_dl/extractor/mangakakalot.py +++ b/gallery_dl/extractor/mangakakalot.py @@ -31,9 +31,6 @@ class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor): self.path = match.group(1) ChapterExtractor.__init__(self, match, self.root + self.path) - def _init(self): - self.session.headers['Referer'] = self.root + "/" - def metadata(self, page): _ , pos = text.extract(page, '', '<') manga , pos = text.extract(page, '', '<', pos) diff --git a/gallery_dl/extractor/manganelo.py b/gallery_dl/extractor/manganelo.py index d2175a02..46019ad8 100644 --- a/gallery_dl/extractor/manganelo.py +++ b/gallery_dl/extractor/manganelo.py @@ -23,8 +23,6 @@ class ManganeloBase(): super().__init__(match, "https://" + domain + path) def _init(self): - self.session.headers['Referer'] = self.root + "/" - if self._match_chapter is None: ManganeloBase._match_chapter = re.compile( r"(?:[Vv]ol\.?\s*(\d+)\s?)?" diff --git a/gallery_dl/extractor/naverwebtoon.py b/gallery_dl/extractor/naverwebtoon.py index c58056ec..72ee5b06 100644 --- a/gallery_dl/extractor/naverwebtoon.py +++ b/gallery_dl/extractor/naverwebtoon.py @@ -85,7 +85,6 @@ class NaverwebtoonComicExtractor(NaverwebtoonBase, Extractor): url = self.root + "/api/article/list" headers = { "Accept": "application/json, text/plain, */*", - "Referer": self.root + "/", } params = { "titleId": self.title_id, diff --git a/gallery_dl/extractor/newgrounds.py b/gallery_dl/extractor/newgrounds.py index 55f5c285..1bcc915d 100644 --- a/gallery_dl/extractor/newgrounds.py +++ b/gallery_dl/extractor/newgrounds.py @@ -207,7 +207,6 @@ class NewgroundsExtractor(Extractor): headers = { "Accept": "application/json, text/javascript, */*; q=0.01", "X-Requested-With": "XMLHttpRequest", - "Referer": self.root, } sources = self.request(url, headers=headers).json()["sources"] @@ -478,7 +477,6 @@ class NewgroundsSearchExtractor(NewgroundsExtractor): headers = { "Accept": "application/json, text/javascript, */*; q=0.01", "X-Requested-With": "XMLHttpRequest", - "Referer": self.root, } params["inner"] = "1" params["page"] = 1 diff --git a/gallery_dl/extractor/nijie.py b/gallery_dl/extractor/nijie.py index 9dd9d7ab..b902404c 100644 --- a/gallery_dl/extractor/nijie.py +++ b/gallery_dl/extractor/nijie.py @@ -30,7 +30,6 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor): BaseExtractor.initialize(self) - self.session.headers["Referer"] = self.root + "/" self.user_name = None if self.category == "horne": self._extract_data = self._extract_data_horne diff --git a/gallery_dl/extractor/nozomi.py b/gallery_dl/extractor/nozomi.py index 9bff92ff..8c7ffe52 100644 --- a/gallery_dl/extractor/nozomi.py +++ b/gallery_dl/extractor/nozomi.py @@ -24,11 +24,11 @@ class NozomiExtractor(Extractor): filename_fmt = "{postid} {dataid}.{extension}" archive_fmt = "{dataid}" - def items(self): + def _init(self): + self.session.headers["Origin"] = self.root + def items(self): data = self.metadata() - self.session.headers["Origin"] = self.root - self.session.headers["Referer"] = self.root + "/" for post_id in map(str, self.posts()): url = "https://j.nozomi.la/post/{}/{}/{}.json".format( diff --git a/gallery_dl/extractor/patreon.py b/gallery_dl/extractor/patreon.py index 0c5dfbe2..42957077 100644 --- a/gallery_dl/extractor/patreon.py +++ b/gallery_dl/extractor/patreon.py @@ -103,7 +103,6 @@ class PatreonExtractor(Extractor): def _pagination(self, url): headers = { - "Referer" : self.root + "/", "Content-Type": "application/vnd.api+json", } diff --git a/gallery_dl/extractor/pinterest.py b/gallery_dl/extractor/pinterest.py index 8afa397e..e9f124f1 100644 --- a/gallery_dl/extractor/pinterest.py +++ b/gallery_dl/extractor/pinterest.py @@ -325,7 +325,6 @@ class PinterestAPI(): "Accept" : "application/json, text/javascript, " "*/*, q=0.01", "Accept-Language" : "en-US,en;q=0.5", - "Referer" : self.root + "/", "X-Requested-With" : "XMLHttpRequest", "X-APP-VERSION" : "0c4af40", "X-CSRFToken" : csrf_token, diff --git a/gallery_dl/extractor/pornpics.py b/gallery_dl/extractor/pornpics.py index 1f14da07..4a6f0314 100644 --- a/gallery_dl/extractor/pornpics.py +++ b/gallery_dl/extractor/pornpics.py @@ -24,9 +24,6 @@ class PornpicsExtractor(Extractor): super().__init__(match) self.item = match.group(1) - def _init(self): - self.session.headers["Referer"] = self.root + "/" - def items(self): for gallery in self.galleries(): gallery["_extractor"] = PornpicsGalleryExtractor diff --git a/gallery_dl/extractor/reactor.py b/gallery_dl/extractor/reactor.py index abe993a6..9a6c8a5a 100644 --- a/gallery_dl/extractor/reactor.py +++ b/gallery_dl/extractor/reactor.py @@ -34,7 +34,6 @@ class ReactorExtractor(BaseExtractor): self.category = netloc.rpartition(".")[0] def _init(self): - self.session.headers["Referer"] = self.root self.gif = self.config("gif", False) def items(self): diff --git a/gallery_dl/extractor/redgifs.py b/gallery_dl/extractor/redgifs.py index 52315106..57093091 100644 --- a/gallery_dl/extractor/redgifs.py +++ b/gallery_dl/extractor/redgifs.py @@ -193,7 +193,6 @@ class RedgifsAPI(): def __init__(self, extractor): self.extractor = extractor self.headers = { - "Referer" : extractor.root + "/", "authorization" : None, "content-type" : "application/json", "x-customheader": extractor.root + "/", diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index f9fe7fa3..745a351b 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -180,7 +180,6 @@ class SankakuAPI(): self.extractor = extractor self.headers = { "Accept" : "application/vnd.sankaku.api+json;v=2", - "Referer" : extractor.root + "/", "Platform": "web-app", "Origin" : extractor.root, } diff --git a/gallery_dl/extractor/skeb.py b/gallery_dl/extractor/skeb.py index f10ab622..55a0db00 100644 --- a/gallery_dl/extractor/skeb.py +++ b/gallery_dl/extractor/skeb.py @@ -45,7 +45,7 @@ class SkebExtractor(Extractor): """Return additional metadata""" def _pagination(self, url, params): - headers = {"Referer": self.root, "Authorization": "Bearer null"} + headers = {"Authorization": "Bearer null"} params["offset"] = 0 while True: @@ -69,7 +69,7 @@ class SkebExtractor(Extractor): def _get_post_data(self, user_name, post_num): url = "{}/api/users/{}/works/{}".format( self.root, user_name, post_num) - headers = {"Referer": self.root, "Authorization": "Bearer null"} + headers = {"Authorization": "Bearer null"} resp = self.request(url, headers=headers).json() creator = resp["creator"] post = { @@ -190,7 +190,6 @@ class SkebSearchExtractor(SkebExtractor): } headers = { "Origin": self.root, - "Referer": self.root + "/", "x-algolia-api-key": "9a4ce7d609e71bf29e977925e4c6740c", "x-algolia-application-id": "HB1JT3KRE9", } @@ -243,7 +242,7 @@ class SkebFollowingExtractor(SkebExtractor): url = "{}/api/users/{}/following_creators".format( self.root, self.user_name) params = {"sort": "date", "offset": 0, "limit": 90} - headers = {"Referer": self.root, "Authorization": "Bearer null"} + headers = {"Authorization": "Bearer null"} while True: data = self.request(url, params=params, headers=headers).json() diff --git a/gallery_dl/extractor/vipergirls.py b/gallery_dl/extractor/vipergirls.py index 456a0298..4ee252ef 100644 --- a/gallery_dl/extractor/vipergirls.py +++ b/gallery_dl/extractor/vipergirls.py @@ -26,9 +26,6 @@ class VipergirlsExtractor(Extractor): cookies_domain = ".vipergirls.to" cookies_names = ("vg_userid", "vg_password") - def _init(self): - self.session.headers["Referer"] = self.root + "/" - def items(self): self.login() diff --git a/gallery_dl/extractor/weibo.py b/gallery_dl/extractor/weibo.py index 51999455..168d5a0f 100644 --- a/gallery_dl/extractor/weibo.py +++ b/gallery_dl/extractor/weibo.py @@ -37,7 +37,6 @@ class WeiboExtractor(Extractor): cookies = _cookie_cache() if cookies is not None: self.cookies.update(cookies) - self.session.headers["Referer"] = self.root + "/" def request(self, url, **kwargs): response = Extractor.request(self, url, **kwargs)