send Referer headers by default

pull/4571/head
Mike Fährmann 1 year ago
parent cb4798f07a
commit 3ecb512722
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -565,6 +565,21 @@ Description
browser would use HTTP/2.
extractor.*.referer
-------------------
Type
* ``bool``
* ``string``
Default
``true``
Description
Send `Referer <https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Referer>`__
headers with all outgoing HTTP requests.
If this is a ``string``, send it as Referer
instead of the extractor's ``root`` domain.
extractor.*.headers
-------------------
Type
@ -576,7 +591,8 @@ Default
"User-Agent" : "<extractor.*.user-agent>",
"Accept" : "*/*",
"Accept-Language": "en-US,en;q=0.5",
"Accept-Encoding": "gzip, deflate"
"Accept-Encoding": "gzip, deflate",
"Referer" : "<extractor.*.referer>"
}
Description

@ -23,9 +23,6 @@ class _500pxExtractor(Extractor):
root = "https://500px.com"
cookies_domain = ".500px.com"
def _init(self):
self.session.headers["Referer"] = self.root + "/"
def items(self):
data = self.metadata()

@ -102,9 +102,6 @@ class _8chanBoardExtractor(_8chanExtractor):
_8chanExtractor.__init__(self, match)
_, self.board, self.page = match.groups()
def _init(self):
self.session.headers["Referer"] = self.root + "/"
def items(self):
page = text.parse_int(self.page, 1)
url = "{}/{}/{}.json".format(self.root, self.board, page)

@ -117,7 +117,6 @@ class ArtstationExtractor(Extractor):
headers = {
"Accept" : "application/json, text/plain, */*",
"Origin" : self.root,
"Referer": self.root + "/",
}
if json:
@ -147,7 +146,6 @@ class ArtstationExtractor(Extractor):
headers = {
"Accept" : "*/*",
"Origin" : self.root,
"Referer": self.root + "/",
}
return self.request(
url, method="POST", headers=headers, json={},

@ -35,9 +35,8 @@ class BehanceExtractor(Extractor):
def _request_graphql(self, endpoint, variables):
url = self.root + "/v3/graphql"
headers = {
"Origin" : self.root,
"Referer": self.root + "/",
"X-BCP" : self._bcp,
"Origin": self.root,
"X-BCP" : self._bcp,
"X-Requested-With": "XMLHttpRequest",
}
data = {

@ -42,7 +42,6 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
cdn = None
files = []
append = files.append
headers = {"Referer": self.root + "/"}
pos = page.index('class="grid-images')
for url in text.extract_iter(page, '<a href="', '"', pos):
@ -63,7 +62,7 @@ class BunkrAlbumExtractor(LolisafeAlbumExtractor):
else:
domain = domain.replace("cdn", "media-files", 1)
url = urlunsplit((scheme, domain, path, query, fragment))
append({"file": url, "_http_headers": headers})
append({"file": url})
return files, {
"album_id" : self.album_id,

@ -310,6 +310,13 @@ class Extractor():
else:
headers["Accept-Encoding"] = "gzip, deflate"
custom_referer = self.config("referer", True)
if custom_referer:
if isinstance(custom_referer, str):
headers["Referer"] = custom_referer
elif self.root:
headers["Referer"] = self.root + "/"
custom_headers = self.config("headers")
if custom_headers:
headers.update(custom_headers)

@ -1422,11 +1422,9 @@ class DeviantartEclipseAPI():
def _call(self, endpoint, params):
url = "https://www.deviantart.com/_napi" + endpoint
headers = {"Referer": "https://www.deviantart.com/"}
params["csrf_token"] = self.csrf_token or self._fetch_csrf_token()
response = self.request(
url, params=params, headers=headers, fatal=None)
response = self.request(url, params=params, fatal=None)
if response.status_code == 404:
raise exception.StopExtraction(

@ -45,7 +45,6 @@ class ExhentaiExtractor(Extractor):
if self.version != "ex":
self.cookies.set("nw", "1", domain=self.cookies_domain)
self.session.headers["Referer"] = self.root + "/"
self.original = self.config("original", True)
limits = self.config("limits", False)

@ -22,7 +22,6 @@ class FantiaExtractor(Extractor):
def _init(self):
self.headers = {
"Accept" : "application/json, text/plain, */*",
"Referer": self.root,
"X-Requested-With": "XMLHttpRequest",
}
self._empty_plan = {
@ -65,11 +64,9 @@ class FantiaExtractor(Extractor):
def _pagination(self, url):
params = {"page": 1}
headers = self.headers.copy()
del headers["X-Requested-With"]
while True:
page = self.request(url, params=params, headers=headers).text
page = self.request(url, params=params).text
self._csrf_token(page)
post_id = None

@ -25,9 +25,6 @@ class FoolfuukaExtractor(BaseExtractor):
if self.category == "b4k":
self.remote = self._remote_direct
def _init(self):
self.session.headers["Referer"] = self.root + "/"
def items(self):
yield Message.Directory, self.metadata()
for post in self.posts():

@ -22,9 +22,6 @@ class HiperdexBase():
category = "hiperdex"
root = "https://hiperdex.com"
def _init(self):
self.session.headers["Referer"] = self.root + "/"
@memcache(keyarg=1)
def manga_data(self, manga, page=None):
if not page:

@ -21,9 +21,6 @@ class HotleakExtractor(Extractor):
archive_fmt = "{type}_{creator}_{id}"
root = "https://hotleak.vip"
def _init(self):
self.session.headers["Referer"] = self.root + "/"
def items(self):
for post in self.posts():
yield Message.Directory, post

@ -23,9 +23,6 @@ class ImagefapExtractor(Extractor):
archive_fmt = "{gallery_id}_{image_id}"
request_interval = (2.0, 4.0)
def _init(self):
self.session.headers["Referer"] = self.root + "/"
def request(self, url, **kwargs):
response = Extractor.request(self, url, **kwargs)

@ -281,11 +281,7 @@ class ImgurAPI():
params["client_id"] = self.client_id
params["page"] = 0
params["sort"] = "newest"
headers = {
"Referer": "https://imgur.com/",
"Origin": "https://imgur.com",
}
headers = {"Origin": "https://imgur.com"}
while True:
data = self._call(endpoint, params, headers)["data"]

@ -85,7 +85,6 @@ class ItakuAPI():
self.root = extractor.root + "/api"
self.headers = {
"Accept": "application/json, text/plain, */*",
"Referer": extractor.root + "/",
}
def galleries_images(self, username, section=None):

@ -37,7 +37,6 @@ class KemonopartyExtractor(Extractor):
Extractor.__init__(self, match)
def _init(self):
self.session.headers["Referer"] = self.root + "/"
self._prepare_ddosguard_cookies()
self._find_inline = re.compile(
r'src="(?:https?://(?:kemono|coomer)\.(?:party|su))?(/inline/[^"]+'

@ -27,9 +27,6 @@ class MangafoxChapterExtractor(ChapterExtractor):
self.urlbase = self.root + base
ChapterExtractor.__init__(self, match, self.urlbase + "/1.html")
def _init(self):
self.session.headers["Referer"] = self.root + "/"
def metadata(self, page):
manga, pos = text.extract(page, "<title>", "</title>")
count, pos = text.extract(

@ -31,9 +31,6 @@ class MangakakalotChapterExtractor(MangakakalotBase, ChapterExtractor):
self.path = match.group(1)
ChapterExtractor.__init__(self, match, self.root + self.path)
def _init(self):
self.session.headers['Referer'] = self.root + "/"
def metadata(self, page):
_ , pos = text.extract(page, '<span itemprop="title">', '<')
manga , pos = text.extract(page, '<span itemprop="title">', '<', pos)

@ -23,8 +23,6 @@ class ManganeloBase():
super().__init__(match, "https://" + domain + path)
def _init(self):
self.session.headers['Referer'] = self.root + "/"
if self._match_chapter is None:
ManganeloBase._match_chapter = re.compile(
r"(?:[Vv]ol\.?\s*(\d+)\s?)?"

@ -85,7 +85,6 @@ class NaverwebtoonComicExtractor(NaverwebtoonBase, Extractor):
url = self.root + "/api/article/list"
headers = {
"Accept": "application/json, text/plain, */*",
"Referer": self.root + "/",
}
params = {
"titleId": self.title_id,

@ -207,7 +207,6 @@ class NewgroundsExtractor(Extractor):
headers = {
"Accept": "application/json, text/javascript, */*; q=0.01",
"X-Requested-With": "XMLHttpRequest",
"Referer": self.root,
}
sources = self.request(url, headers=headers).json()["sources"]
@ -478,7 +477,6 @@ class NewgroundsSearchExtractor(NewgroundsExtractor):
headers = {
"Accept": "application/json, text/javascript, */*; q=0.01",
"X-Requested-With": "XMLHttpRequest",
"Referer": self.root,
}
params["inner"] = "1"
params["page"] = 1

@ -30,7 +30,6 @@ class NijieExtractor(AsynchronousMixin, BaseExtractor):
BaseExtractor.initialize(self)
self.session.headers["Referer"] = self.root + "/"
self.user_name = None
if self.category == "horne":
self._extract_data = self._extract_data_horne

@ -24,11 +24,11 @@ class NozomiExtractor(Extractor):
filename_fmt = "{postid} {dataid}.{extension}"
archive_fmt = "{dataid}"
def items(self):
def _init(self):
self.session.headers["Origin"] = self.root
def items(self):
data = self.metadata()
self.session.headers["Origin"] = self.root
self.session.headers["Referer"] = self.root + "/"
for post_id in map(str, self.posts()):
url = "https://j.nozomi.la/post/{}/{}/{}.json".format(

@ -103,7 +103,6 @@ class PatreonExtractor(Extractor):
def _pagination(self, url):
headers = {
"Referer" : self.root + "/",
"Content-Type": "application/vnd.api+json",
}

@ -325,7 +325,6 @@ class PinterestAPI():
"Accept" : "application/json, text/javascript, "
"*/*, q=0.01",
"Accept-Language" : "en-US,en;q=0.5",
"Referer" : self.root + "/",
"X-Requested-With" : "XMLHttpRequest",
"X-APP-VERSION" : "0c4af40",
"X-CSRFToken" : csrf_token,

@ -24,9 +24,6 @@ class PornpicsExtractor(Extractor):
super().__init__(match)
self.item = match.group(1)
def _init(self):
self.session.headers["Referer"] = self.root + "/"
def items(self):
for gallery in self.galleries():
gallery["_extractor"] = PornpicsGalleryExtractor

@ -34,7 +34,6 @@ class ReactorExtractor(BaseExtractor):
self.category = netloc.rpartition(".")[0]
def _init(self):
self.session.headers["Referer"] = self.root
self.gif = self.config("gif", False)
def items(self):

@ -193,7 +193,6 @@ class RedgifsAPI():
def __init__(self, extractor):
self.extractor = extractor
self.headers = {
"Referer" : extractor.root + "/",
"authorization" : None,
"content-type" : "application/json",
"x-customheader": extractor.root + "/",

@ -180,7 +180,6 @@ class SankakuAPI():
self.extractor = extractor
self.headers = {
"Accept" : "application/vnd.sankaku.api+json;v=2",
"Referer" : extractor.root + "/",
"Platform": "web-app",
"Origin" : extractor.root,
}

@ -45,7 +45,7 @@ class SkebExtractor(Extractor):
"""Return additional metadata"""
def _pagination(self, url, params):
headers = {"Referer": self.root, "Authorization": "Bearer null"}
headers = {"Authorization": "Bearer null"}
params["offset"] = 0
while True:
@ -69,7 +69,7 @@ class SkebExtractor(Extractor):
def _get_post_data(self, user_name, post_num):
url = "{}/api/users/{}/works/{}".format(
self.root, user_name, post_num)
headers = {"Referer": self.root, "Authorization": "Bearer null"}
headers = {"Authorization": "Bearer null"}
resp = self.request(url, headers=headers).json()
creator = resp["creator"]
post = {
@ -190,7 +190,6 @@ class SkebSearchExtractor(SkebExtractor):
}
headers = {
"Origin": self.root,
"Referer": self.root + "/",
"x-algolia-api-key": "9a4ce7d609e71bf29e977925e4c6740c",
"x-algolia-application-id": "HB1JT3KRE9",
}
@ -243,7 +242,7 @@ class SkebFollowingExtractor(SkebExtractor):
url = "{}/api/users/{}/following_creators".format(
self.root, self.user_name)
params = {"sort": "date", "offset": 0, "limit": 90}
headers = {"Referer": self.root, "Authorization": "Bearer null"}
headers = {"Authorization": "Bearer null"}
while True:
data = self.request(url, params=params, headers=headers).json()

@ -26,9 +26,6 @@ class VipergirlsExtractor(Extractor):
cookies_domain = ".vipergirls.to"
cookies_names = ("vg_userid", "vg_password")
def _init(self):
self.session.headers["Referer"] = self.root + "/"
def items(self):
self.login()

@ -37,7 +37,6 @@ class WeiboExtractor(Extractor):
cookies = _cookie_cache()
if cookies is not None:
self.cookies.update(cookies)
self.session.headers["Referer"] = self.root + "/"
def request(self, url, **kwargs):
response = Extractor.request(self, url, **kwargs)

Loading…
Cancel
Save