diff --git a/docs/configuration.rst b/docs/configuration.rst index 9aed9675..107a8fa6 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -289,6 +289,19 @@ Description Source to read additional cookies from. =========== ===== +extractor.*.user-agent +---------------------- +=========== ===== +Type ``string`` +Default ``"Mozilla/5.0 (X11; Linux x86_64; rv:54.0) Gecko/20100101 Firefox/54.0"`` +Description User-Agent header value to be used for HTTP requests. + + Note that this option has no effect on `pixiv` and + `readcomiconline` extractors, as these need specific values to + function correctly. +=========== ===== + + Extractor-specific Options ========================== diff --git a/gallery_dl/extractor/3dbooru.py b/gallery_dl/extractor/3dbooru.py index 3d084b0d..74c55346 100644 --- a/gallery_dl/extractor/3dbooru.py +++ b/gallery_dl/extractor/3dbooru.py @@ -17,7 +17,6 @@ class ThreedeebooruExtractor(booru.JSONBooruExtractor): api_url = "http://behoimi.org/post/index.json" headers = { "Referer": "http://behoimi.org/post/show/", - "User-Agent": "Mozilla/5.0", "Accept-Encoding": "identity", } diff --git a/gallery_dl/extractor/chan.py b/gallery_dl/extractor/chan.py index 4380d034..65e942d8 100644 --- a/gallery_dl/extractor/chan.py +++ b/gallery_dl/extractor/chan.py @@ -75,7 +75,6 @@ class FoolfuukaThreadExtractor(SharedConfigExtractor): def __init__(self, match): SharedConfigExtractor.__init__(self) self.board, self.thread = match.groups() - self.session.headers["User-Agent"] = "Mozilla 5.0" if self.referer: self.session.headers["Referer"] = self.root diff --git a/gallery_dl/extractor/common.py b/gallery_dl/extractor/common.py index d1cda0cb..8d00be97 100644 --- a/gallery_dl/extractor/common.py +++ b/gallery_dl/extractor/common.py @@ -34,6 +34,7 @@ class Extractor(): self.session = requests.Session() self.log = logging.getLogger(self.category) self._set_cookies(self.config("cookies")) + self._set_headers() def __iter__(self): return self.items() @@ -96,6 +97,13 @@ class Extractor(): return username, password + def _set_headers(self): + """Set additional headers for the 'session' object""" + self.session.headers["Accept-Language"] = "en-US,en;q=0.5" + self.session.headers["User-Agent"] = self.config( + "user-agent", ("Mozilla/5.0 (X11; Linux x86_64; rv:54.0) " + "Gecko/20100101 Firefox/54.0")) + def _set_cookies(self, cookies): """Populate the cookiejar with 'cookies'""" if cookies: diff --git a/gallery_dl/extractor/exhentai.py b/gallery_dl/extractor/exhentai.py index a03c6945..9c79385c 100644 --- a/gallery_dl/extractor/exhentai.py +++ b/gallery_dl/extractor/exhentai.py @@ -50,11 +50,7 @@ class ExhentaiGalleryExtractor(Extractor): self.wait_max = self.config("wait-max", 6) if self.wait_max < self.wait_min: self.wait_max = self.wait_min - self.session.headers.update({ - "User-Agent": "Mozilla/5.0", - "Accept-Language": "en-US,en;q=0.5", - "Referer": self.root + "/", - }) + self.session.headers["Referer"] = self.root + "/" def items(self): self.login() diff --git a/gallery_dl/extractor/readcomiconline.py b/gallery_dl/extractor/readcomiconline.py index 0b44ddf2..d1b987f9 100644 --- a/gallery_dl/extractor/readcomiconline.py +++ b/gallery_dl/extractor/readcomiconline.py @@ -20,6 +20,10 @@ class ReadcomiconlineExtractor(kissmanga.KissmangaExtractor): filename_fmt = "{comic}_{issue:>03}_{page:>03}.{extension}" root = "http://readcomiconline.to" + def __init__(self, match): + kissmanga.KissmangaExtractor.__init__(self, match) + self.session.headers["User-Agent"] = "Wget/1.19.2 (linux-gnu)" + class ReadcomiconlineComicExtractor(ReadcomiconlineExtractor, kissmanga.KissmangaMangaExtractor): diff --git a/gallery_dl/extractor/sankaku.py b/gallery_dl/extractor/sankaku.py index ff67b595..17f079b2 100644 --- a/gallery_dl/extractor/sankaku.py +++ b/gallery_dl/extractor/sankaku.py @@ -41,9 +41,6 @@ class SankakuTagExtractor(Extractor): self.wait_max = self.config("wait-max", 4) if self.wait_max < self.wait_min: self.wait_max = self.wait_min - self.session.headers["User-Agent"] = ( - "Mozilla/5.0 Gecko/20100101 Firefox/40.0" - ) def skip(self, num): pages = min(num // 20, 49) diff --git a/gallery_dl/extractor/senmanga.py b/gallery_dl/extractor/senmanga.py index d895824b..a4876f73 100644 --- a/gallery_dl/extractor/senmanga.py +++ b/gallery_dl/extractor/senmanga.py @@ -38,7 +38,6 @@ class SenmangaChapterExtractor(Extractor): self.chapter_url = "{}/{}/".format(self.root, part) self.img_url = "{}/viewer/{}/".format(self.root, part) self.session.headers["Referer"] = self.chapter_url - self.session.headers["User-Agent"] = "Mozilla 5.0" def items(self): data = self.get_job_metadata() diff --git a/gallery_dl/extractor/twitter.py b/gallery_dl/extractor/twitter.py index ff02070d..0d13077a 100644 --- a/gallery_dl/extractor/twitter.py +++ b/gallery_dl/extractor/twitter.py @@ -37,10 +37,6 @@ class TwitterTweetExtractor(Extractor): self.path, self.user, self.tid = match.groups() def items(self): - self.session.headers["User-Agent"] = ( - "Mozilla/5.0 (X11; Linux x86_64; rv:48.0) " - "Gecko/20100101 Firefox/48.0" - ) page = self.request("https://twitter.com/" + self.path).text data = self.get_job_metadata() imgs = self.get_image_urls(page)