[imgur] use API with "public" client_id (#446)

Using the API endpoints makes it possible to access NSFW content
without logging in.
pull/465/head
Mike Fährmann 5 years ago
parent b23c822b23
commit 8f38a35b91
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -52,7 +52,7 @@ ImageFap https://imagefap.com/ Galleries, individual I
ImgBB https://imgbb.com/ Albums, individual Images, User Profiles Optional ImgBB https://imgbb.com/ Albums, individual Images, User Profiles Optional
imgbox https://imgbox.com/ Galleries, individual Images imgbox https://imgbox.com/ Galleries, individual Images
imgth https://imgth.com/ Galleries imgth https://imgth.com/ Galleries
imgur https://imgur.com/ |imgur-C| Optional imgur https://imgur.com/ |imgur-C|
Instagram https://www.instagram.com/ |instagram-C| Optional Instagram https://www.instagram.com/ |instagram-C| Optional
Jaimini's Box https://jaiminisbox.com/reader/ Chapters, Manga Jaimini's Box https://jaiminisbox.com/reader/ Chapters, Manga
Joyreactor http://joyreactor.com/ Posts, Search Results, Tag-Searches, User Profiles Joyreactor http://joyreactor.com/ Posts, Search Results, Tag-Searches, User Profiles

@ -10,9 +10,6 @@
from .common import Extractor, Message from .common import Extractor, Message
from .. import text, exception from .. import text, exception
from ..cache import cache
import itertools
import json
BASE_PATTERN = r"(?:https?://)?(?:www\.|[im]\.)?imgur\.com" BASE_PATTERN = r"(?:https?://)?(?:www\.|[im]\.)?imgur\.com"
@ -22,125 +19,75 @@ class ImgurExtractor(Extractor):
"""Base class for imgur extractors""" """Base class for imgur extractors"""
category = "imgur" category = "imgur"
root = "https://imgur.com" root = "https://imgur.com"
api_root = "https://api.imgur.com"
def __init__(self, match): def __init__(self, match):
Extractor.__init__(self, match) Extractor.__init__(self, match)
self.api = ImgurAPI(self)
self.key = match.group(1) self.key = match.group(1)
self.mp4 = self.config("mp4", True) self.mp4 = self.config("mp4", True)
def login(self): def _prepare(self, image):
username, password = self._get_auth_info()
if username:
self._update_cookies(self._login_impl(username, password))
@cache(maxage=180*24*3600, keyarg=1)
def _login_impl(self, username, password):
self.log.info("Logging in as %s", username)
url = "{}/signin?invokedBy=Regular%20Sign%20In".format(self.root)
headers = {"Referer": url}
data = {
"username": username,
"password": password,
"remember": "remember",
"submit" : "",
}
response = self.request(url, method="POST", headers=headers, data=data)
if not response.history:
error = text.extract(response.text, 'class="error">', '<')[0] or ""
raise exception.AuthenticationError(error.strip())
return self.session.cookies
def _extract_data(self, path):
page = self.request(self.root + path, notfound=self.subcategory).text
data = text.extract(page, "image : ", ",\n")[0]
if not data:
if ">Sign in required<" in page:
self.log.error("'Sign in required'")
else:
self.log.error("Unable to extract JSON data")
raise exception.StopExtraction()
data = json.loads(data)
try: try:
del data["adConfig"] del image["ad_url"]
del data["isAd"] del image["ad_type"]
del image["ad_config"]
except KeyError: except KeyError:
pass pass
return data
def _prepare(self, image): url = image["mp4"] if image["animated"] and self.mp4 else image["link"]
image["ext"] = image["ext"].partition("?")[0] image["date"] = text.parse_timestamp(image["datetime"])
if image["ext"] == ".gif" and ( text.nameext_from_url(url, image)
(self.mp4 and image["prefer_video"]) or self.mp4 == "always"):
image["ext"] = ".mp4"
url = "https://i.imgur.com/" + image["hash"] + image["ext"]
image["extension"] = image["ext"][1:]
return url return url
def _items_apiv3(self, urlfmt): def _items_queue(self, items):
self.login()
album_ex = ImgurAlbumExtractor album_ex = ImgurAlbumExtractor
image_ex = ImgurImageExtractor image_ex = ImgurImageExtractor
params = {
"IMGURPLATFORM" : "web",
"album_previews": "0",
"client_id" : "546c25a59c58ad7",
}
headers = {
"Origin" : self.root,
"Referer": self.root + "/",
}
yield Message.Version, 1 yield Message.Version, 1
for item in items:
for num in itertools.count(0): item["_extractor"] = album_ex if item["is_album"] else image_ex
url = urlfmt.format(num) yield Message.Queue, item["link"], item
data = self.request(url, params=params, headers=headers).json()
for item in data["data"]:
item["_extractor"] = album_ex if item["is_album"] else image_ex
yield Message.Queue, item["link"], item
if len(data["data"]) < 60:
return
class ImgurImageExtractor(ImgurExtractor): class ImgurImageExtractor(ImgurExtractor):
"""Extractor for individual images on imgur.com""" """Extractor for individual images on imgur.com"""
subcategory = "image" subcategory = "image"
filename_fmt = "{category}_{hash}{title:?_//}.{extension}" filename_fmt = "{category}_{id}{title:?_//}.{extension}"
archive_fmt = "{hash}" archive_fmt = "{id}"
pattern = BASE_PATTERN + r"/(?!gallery)(\w{7}|\w{5})[sbtmlh]?\.?" pattern = BASE_PATTERN + r"/(?!gallery)(\w{7}|\w{5})[sbtmlh]?\.?"
test = ( test = (
("https://imgur.com/21yMxCS", { ("https://imgur.com/21yMxCS", {
"url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2", "url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2",
"content": "0c8768055e4e20e7c7259608b67799171b691140", "content": "0c8768055e4e20e7c7259608b67799171b691140",
"keyword": { "keyword": {
"animated": False, "account_id" : None,
"datetime": "2016-11-10 14:24:35", "account_url" : None,
"description": str, "animated" : False,
"ext": ".png", "bandwidth" : int,
"extension": "png", "date" : "type:datetime",
"hash": "21yMxCS", "datetime" : 1478787875,
"height": "32", "description" : None,
"is_moderated": False, "edited" : "0",
"is_safe": False, "extension" : "png",
"is_viral": 0, "favorite" : False,
"looping": False, "filename" : "21yMxCS",
"mimetype": "image/png", "has_sound" : False,
"name": None, "height" : 32,
"prefer_video": False, "id" : "21yMxCS",
"size": 182, "in_gallery" : False,
"source": "", "in_most_viral": False,
"title": "Test", "is_ad" : False,
"video_host": None, "link" : "https://i.imgur.com/21yMxCS.png",
"video_source": None, "nsfw" : False,
"width": "64", "section" : None,
"size" : 182,
"tags" : [],
"title" : "Test",
"type" : "image/png",
"views" : int,
"vote" : None,
"width" : 64,
}, },
}), }),
("http://imgur.com/0gybAXR", { # gifv/mp4 video ("http://imgur.com/0gybAXR", { # gifv/mp4 video
@ -148,10 +95,10 @@ class ImgurImageExtractor(ImgurExtractor):
"content": "a3c080e43f58f55243ab830569ba02309d59abfc", "content": "a3c080e43f58f55243ab830569ba02309d59abfc",
}), }),
("https://imgur.com/HjoXJAd", { # url ends with '.jpg?1' ("https://imgur.com/HjoXJAd", { # url ends with '.jpg?1'
"url": "73f361b50753ab25da64160aa50bc5d139480d45", "url": "ec2cf11a2bfb4939feff374781a6e6f3e9af8e8e",
}), }),
("https://imgur.com/zzzzzzz", { # not found ("https://imgur.com/zzzzzzz", { # not found
"exception": exception.NotFoundError, "exception": exception.HttpError,
}), }),
("https://www.imgur.com/21yMxCS"), # www ("https://www.imgur.com/21yMxCS"), # www
("https://m.imgur.com/21yMxCS"), # mobile ("https://m.imgur.com/21yMxCS"), # mobile
@ -163,8 +110,7 @@ class ImgurImageExtractor(ImgurExtractor):
) )
def items(self): def items(self):
self.login() image = self.api.image(self.key)
image = self._extract_data("/" + self.key)
url = self._prepare(image) url = self._prepare(image)
yield Message.Version, 1 yield Message.Version, 1
yield Message.Directory, image yield Message.Directory, image
@ -174,42 +120,67 @@ class ImgurImageExtractor(ImgurExtractor):
class ImgurAlbumExtractor(ImgurExtractor): class ImgurAlbumExtractor(ImgurExtractor):
"""Extractor for imgur albums""" """Extractor for imgur albums"""
subcategory = "album" subcategory = "album"
directory_fmt = ("{category}", "{album[hash]}{album[title]:? - //}") directory_fmt = ("{category}", "{album[id]}{album[title]:? - //}")
filename_fmt = "{category}_{album[hash]}_{num:>03}_{hash}.{extension}" filename_fmt = "{category}_{album[id]}_{num:>03}_{id}.{extension}"
archive_fmt = "{album[hash]}_{hash}" archive_fmt = "{album[id]}_{id}"
pattern = BASE_PATTERN + r"/(?:a|t/unmuted)/(\w{7}|\w{5})" pattern = BASE_PATTERN + r"/(?:a|t/unmuted)/(\w{7}|\w{5})"
test = ( test = (
("https://imgur.com/a/TcBmP", { ("https://imgur.com/a/TcBmP", {
"url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563", "url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563",
"keyword": { "keyword": {
"album": { "album": {
"album_cover": "693j2Kr", "account_id" : None,
"album_description": None, "account_url" : None,
"cover": "693j2Kr", "cover" : "693j2Kr",
"datetime": "2015-10-09 10:37:50", "cover_edited": None,
"description": None, "cover_height": 1400,
"hash": "TcBmP", "cover_width" : 951,
"id": "TcBmP", "date" : "type:datetime",
"is_album": True, "datetime" : 1444387070,
"num_images": "19", "description" : None,
"title": "138", "favorite" : False,
"title_clean": "TcBmP", "id" : "TcBmP",
"views": str, "images_count": 19,
"in_gallery" : False,
"is_ad" : False,
"is_album" : True,
"layout" : "blog",
"link" : "https://imgur.com/a/TcBmP",
"nsfw" : False,
"privacy" : "hidden",
"section" : None,
"title" : "138",
"views" : int,
}, },
"animated": bool, "account_id" : None,
"datetime": str, "account_url": None,
"extension": str, "animated" : bool,
"hash": str, "bandwidth" : int,
"height": int, "date" : "type:datetime",
"num": int, "datetime" : int,
"prefer_video": bool, "description": None,
"size": int, "edited" : "0",
"title": str, "favorite" : False,
"width": int, "has_sound" : False,
"height" : int,
"id" : str,
"in_gallery" : False,
"is_ad" : False,
"link" : r"re:https://i\.imgur\.com/\w+\.jpg",
"nsfw" : None,
"num" : int,
"section" : None,
"size" : int,
"tags" : list,
"title" : None,
"type" : "image/jpeg",
"views" : int,
"vote" : None,
"width" : int,
}, },
}), }),
("https://imgur.com/a/eD9CT", { # large album ("https://imgur.com/a/eD9CT", { # large album
"url": "4ee94de31ff26be416271bc0b1ea27b9349c9937", "url": "de748c181a04d18bef1de9d4f4866ef0a06d632b",
}), }),
("https://imgur.com/a/RhJXhVT/all", { # 7 character album hash ("https://imgur.com/a/RhJXhVT/all", { # 7 character album hash
"url": "695ef0c950023362a0163ee5041796300db76674", "url": "695ef0c950023362a0163ee5041796300db76674",
@ -218,22 +189,22 @@ class ImgurAlbumExtractor(ImgurExtractor):
"url": "86b4747f8147cec7602f0214e267309af73a8655", "url": "86b4747f8147cec7602f0214e267309af73a8655",
}), }),
("https://imgur.com/a/TcBmQ", { ("https://imgur.com/a/TcBmQ", {
"exception": exception.NotFoundError, "exception": exception.HttpError,
}), }),
("https://www.imgur.com/a/TcBmP"), # www ("https://www.imgur.com/a/TcBmP"), # www
("https://m.imgur.com/a/TcBmP"), # mobile ("https://m.imgur.com/a/TcBmP"), # mobile
) )
def items(self): def items(self):
self.login() album = self.api.album(self.key)
album = self._extract_data("/a/" + self.key) album["date"] = text.parse_timestamp(album["datetime"])
images = album["album_images"]["images"] images = album["images"]
del album["album_images"]
if int(album["num_images"]) > len(images): try:
url = "{}/ajaxalbums/getimages/{}/hit.json".format( del album["images"]
self.root, self.key) del album["ad_config"]
images = self.request(url).json()["data"]["images"] except KeyError:
pass
yield Message.Version, 1 yield Message.Version, 1
yield Message.Directory, {"album": album, "count": len(images)} yield Message.Directory, {"album": album, "count": len(images)}
@ -258,16 +229,13 @@ class ImgurGalleryExtractor(ImgurExtractor):
) )
def items(self): def items(self):
self.login()
url = self.root + "/a/" + self.key url = self.root + "/a/" + self.key
with self.request(url, method="HEAD", fatal=False) as response: with self.request(url, method="HEAD", fatal=False) as response:
code = response.status_code if response.status_code < 400:
extr = ImgurAlbumExtractor
if code < 400: else:
extr = ImgurAlbumExtractor extr = ImgurImageExtractor
else: url = self.root + "/" + self.key
extr = ImgurImageExtractor
url = self.root + "/" + self.key
yield Message.Version, 1 yield Message.Version, 1
yield Message.Queue, url, {"_extractor": extr} yield Message.Queue, url, {"_extractor": extr}
@ -288,9 +256,7 @@ class ImgurUserExtractor(ImgurExtractor):
) )
def items(self): def items(self):
urlfmt = "{}/3/account/{}/submissions/{{}}/newest".format( return self._items_queue(self.api.account_submissions(self.key))
self.api_root, self.key)
return self._items_apiv3(urlfmt)
class ImgurFavoriteExtractor(ImgurExtractor): class ImgurFavoriteExtractor(ImgurExtractor):
@ -304,6 +270,43 @@ class ImgurFavoriteExtractor(ImgurExtractor):
}) })
def items(self): def items(self):
urlfmt = "{}/3/account/{}/gallery_favorites/{{}}/newest".format( return self._items_queue(self.api.account_favorites(self.key))
self.api_root, self.key)
return self._items_apiv3(urlfmt)
class ImgurAPI():
def __init__(self, extractor):
self.extractor = extractor
self.headers = {
"Authorization": "Client-ID " + extractor.config(
"client-id", "546c25a59c58ad7"),
}
def account_favorites(self, account):
endpoint = "account/{}/gallery_favorites".format(account)
return self._pagination(endpoint)
def account_submissions(self, account):
endpoint = "account/{}/submissions".format(account)
return self._pagination(endpoint)
def album(self, album_hash):
return self._call("album/" + album_hash)
def image(self, image_hash):
return self._call("image/" + image_hash)
def _call(self, endpoint):
return self.extractor.request(
"https://api.imgur.com/3/" + endpoint, headers=self.headers,
).json()["data"]
def _pagination(self, endpoint):
num = 0
while True:
data = self._call("{}/{}".format(endpoint, num))
if not data:
return
yield from data
num += 1

@ -118,7 +118,6 @@ AUTH_MAP = {
"idolcomplex": "Optional", "idolcomplex": "Optional",
"imgbb" : "Optional", "imgbb" : "Optional",
"instagram" : "Optional", "instagram" : "Optional",
"imgur" : "Optional",
"mangoxo" : "Optional", "mangoxo" : "Optional",
"nijie" : "Required", "nijie" : "Required",
"pixiv" : "Required", "pixiv" : "Required",

Loading…
Cancel
Save