diff --git a/docs/supportedsites.rst b/docs/supportedsites.rst index 925185c7..ce238e6f 100644 --- a/docs/supportedsites.rst +++ b/docs/supportedsites.rst @@ -52,7 +52,7 @@ ImageFap https://imagefap.com/ Images from Users, Gall ImgBB https://imgbb.com/ Images from Users, Albums, individual Images Optional imgbox https://imgbox.com/ Galleries, individual Images imgth https://imgth.com/ Galleries -imgur https://imgur.com/ Albums, Galleries, individual Images +imgur https://imgur.com/ |imgur-C| Instagram https://www.instagram.com/ |instagram-C| Optional Jaimini's Box https://jaiminisbox.com/reader/ Chapters, Manga Joyreactor http://joyreactor.cc/ |joyreactor-C| @@ -137,6 +137,7 @@ Turboimagehost https://www.turboimagehost.com/ individual Images .. |deviantart-C| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals, Popular Images, Scraps, Sta.sh .. |flickr-C| replace:: Images from Users, Albums, Favorites, Galleries, Groups, individual Images, Search Results .. |hentaifoundry-C| replace:: Images from Users, Favorites, individual Images, Popular Images, Recent Images, Scraps +.. |imgur-C| replace:: Images from Users, Albums, Favorites, Galleries, individual Images .. |instagram-C| replace:: Images from Users, Channels, individual Images, Stories, Tag-Searches .. |joyreactor-C| replace:: Images from Users, Posts, Search Results, Tag-Searches .. |nijie-C| replace:: Images from Users, Doujin, Favorites, individual Images diff --git a/gallery_dl/extractor/imgur.py b/gallery_dl/extractor/imgur.py index 0cae5a78..2c68dee2 100644 --- a/gallery_dl/extractor/imgur.py +++ b/gallery_dl/extractor/imgur.py @@ -10,13 +10,18 @@ from .common import Extractor, Message from .. import text, exception +import itertools import json +BASE_PATTERN = r"(?:https?://)?(?:www\.|[im]\.)?imgur\.com" + + class ImgurExtractor(Extractor): """Base class for imgur extractors""" category = "imgur" root = "https://imgur.com" + api_root = "https://api.imgur.com" def __init__(self, match): Extractor.__init__(self, match) @@ -43,14 +48,40 @@ class ImgurExtractor(Extractor): image["extension"] = image["ext"][1:] return url + def _items_apiv3(self, urlfmt): + album_ex = ImgurAlbumExtractor + image_ex = ImgurImageExtractor + + params = { + "IMGURPLATFORM" : "web", + "album_previews": "0", + "client_id" : "546c25a59c58ad7", + } + headers = { + "Origin" : self.root, + "Referer": self.root + "/", + } + + yield Message.Version, 1 + + for num in itertools.count(0): + url = urlfmt.format(num) + data = self.request(url, params=params, headers=headers).json() + + for item in data["data"]: + item["_extractor"] = album_ex if item["is_album"] else image_ex + yield Message.Queue, item["link"], item + + if len(data["data"]) < 60: + return + class ImgurImageExtractor(ImgurExtractor): """Extractor for individual images on imgur.com""" subcategory = "image" filename_fmt = "{category}_{hash}{title:?_//}.{extension}" archive_fmt = "{hash}" - pattern = (r"(?:https?://)?(?:www\.|[im]\.|)?imgur\.com" - r"/(?!gallery)(\w{7}|\w{5})[sbtmlh]?\.?") + pattern = BASE_PATTERN + r"/(?!gallery)(\w{7}|\w{5})[sbtmlh]?\.?" test = ( ("https://imgur.com/21yMxCS", { "url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2", @@ -111,8 +142,7 @@ class ImgurAlbumExtractor(ImgurExtractor): directory_fmt = ("{category}", "{album[hash]}{album[title]:? - //}") filename_fmt = "{category}_{album[hash]}_{num:>03}_{hash}.{extension}" archive_fmt = "{album[hash]}_{hash}" - pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com" - r"/(?:a|t/unmuted)/(\w{7}|\w{5})") + pattern = BASE_PATTERN + r"/(?:a|t/unmuted)/(\w{7}|\w{5})" test = ( ("https://imgur.com/a/TcBmP", { "url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563", @@ -181,8 +211,7 @@ class ImgurAlbumExtractor(ImgurExtractor): class ImgurGalleryExtractor(ImgurExtractor): """Extractor for imgur galleries""" subcategory = "gallery" - pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com" - r"/gallery/(\w{7}|\w{5})") + pattern = BASE_PATTERN + r"/gallery/(\w{7}|\w{5})" test = ( ("https://imgur.com/gallery/zf2fIms", { # non-album gallery (#380) "pattern": "https://imgur.com/zf2fIms", @@ -210,35 +239,34 @@ class ImgurGalleryExtractor(ImgurExtractor): class ImgurUserExtractor(ImgurExtractor): """Extractor for all images posted by a user""" subcategory = "user" - pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com" - r"/user/([^/?&#]+)(?:/submitted|/posts)?/?") + pattern = BASE_PATTERN + r"/user/([^/?&#]+)(?:/posts|/submitted)?/?$" test = ( ("https://imgur.com/user/Miguenzo", { - + "range": "1-100", + "count": 100, + "pattern": r"https?://(i.imgur.com|imgur.com/a)/[\w.]+" }), - ("https://imgur.com/user/Miguenzo/submitted"), - ("https://imgur.com/user/Miguenzo/submitted/newest"), ("https://imgur.com/user/Miguenzo/posts"), + ("https://imgur.com/user/Miguenzo/submitted"), ) def items(self): - num = 0 - base = "{}/user/{}/submitted".format(self.root, self.key) - data = {"_extractor": ImgurGalleryExtractor} - headers = { - "Referer": base, - "X-Requested-With": "XMLHttpRequest", - } + urlfmt = "{}/3/account/{}/submissions/{{}}/newest".format( + self.api_root, self.key) + return self._items_apiv3(urlfmt) - while True: - cnt = 0 - url = "{}/page/{}?scrolling".format(base, num) - page = self.request(url, headers=headers).text - for path in text.extract_iter(page, '