[imgur] update

- fix image/album detection for galleries
- use new API endpoints for image/album data
pull/997/head
Mike Fährmann 4 years ago
parent b5243297ff
commit 799ca07fc8
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -27,19 +27,17 @@ class ImgurExtractor(Extractor):
self.mp4 = self.config("mp4", True) self.mp4 = self.config("mp4", True)
def _prepare(self, image): def _prepare(self, image):
try: image.update(image["metadata"])
del image["ad_url"] del image["metadata"]
del image["ad_type"]
del image["ad_config"]
except KeyError:
pass
if image["animated"] and self.mp4 and "mp4" in image: if image["ext"] == "jpeg":
url = image["mp4"] image["ext"] = "jpg"
else: elif image["is_animated"] and self.mp4 and image["ext"] == "gif":
url = image["link"] image["ext"] = "mp4"
image["date"] = text.parse_timestamp(image["datetime"]) image["url"] = url = "https://i.imgur.com/{}.{}".format(
image["id"], image["ext"])
image["date"] = text.parse_datetime(image["created_at"])
text.nameext_from_url(url, image) text.nameext_from_url(url, image)
return url return url
@ -65,33 +63,38 @@ class ImgurImageExtractor(ImgurExtractor):
"url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2", "url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2",
"content": "0c8768055e4e20e7c7259608b67799171b691140", "content": "0c8768055e4e20e7c7259608b67799171b691140",
"keyword": { "keyword": {
"account_id" : None, "account_id" : 0,
"account_url" : None, "comment_count" : int,
"animated" : False, "cover_id" : "21yMxCS",
"bandwidth" : int, "date" : "dt:2016-11-10 14:24:35",
"date" : "dt:2016-11-10 14:24:35", "description" : "",
"datetime" : 1478787875, "downvote_count": int,
"description" : None, "duration" : 0,
"edited" : "0", "ext" : "png",
"extension" : "png", "favorite" : False,
"favorite" : False, "favorite_count": 0,
"filename" : "21yMxCS", "has_sound" : False,
"has_sound" : False, "height" : 32,
"height" : 32, "id" : "21yMxCS",
"id" : "21yMxCS", "image_count" : 1,
"in_gallery" : False, "in_most_viral" : False,
"in_most_viral": False, "is_ad" : False,
"is_ad" : False, "is_album" : False,
"link" : "https://i.imgur.com/21yMxCS.png", "is_animated" : False,
"nsfw" : False, "is_looping" : False,
"section" : None, "is_mature" : False,
"size" : 182, "is_pending" : False,
"tags" : [], "mime_type" : "image/png",
"title" : "Test", "name" : "test-テスト",
"type" : "image/png", "point_count" : int,
"views" : int, "privacy" : "",
"vote" : None, "score" : int,
"width" : 64, "size" : 182,
"title" : "Test",
"upvote_count" : int,
"url" : "https://i.imgur.com/21yMxCS.png",
"view_count" : int,
"width" : 64,
}, },
}), }),
("http://imgur.com/0gybAXR", { # gifv/mp4 video ("http://imgur.com/0gybAXR", { # gifv/mp4 video
@ -101,30 +104,32 @@ class ImgurImageExtractor(ImgurExtractor):
("https://imgur.com/XFfsmuC", { # missing title in API response (#467) ("https://imgur.com/XFfsmuC", { # missing title in API response (#467)
"keyword": {"title": "Tears are a natural response to irritants"}, "keyword": {"title": "Tears are a natural response to irritants"},
}), }),
("https://imgur.com/HjoXJAd", { # url ends with '.jpg?1'
"url": "ec2cf11a2bfb4939feff374781a6e6f3e9af8e8e",
}),
("https://imgur.com/1Nily2P", { # animated png ("https://imgur.com/1Nily2P", { # animated png
"pattern": "https://i.imgur.com/1Nily2P.png", "pattern": "https://i.imgur.com/1Nily2P.png",
}), }),
("https://imgur.com/zzzzzzz", { # not found ("https://imgur.com/zzzzzzz", { # not found
"exception": exception.HttpError, "exception": exception.HttpError,
}), }),
("https://www.imgur.com/21yMxCS"), # www ("https://www.imgur.com/21yMxCS"), # www
("https://m.imgur.com/21yMxCS"), # mobile ("https://m.imgur.com/21yMxCS"), # mobile
("https://imgur.com/zxaY6"), # 5 character key ("https://imgur.com/zxaY6"), # 5 character key
("https://i.imgur.com/21yMxCS.png"), # direct link ("https://i.imgur.com/21yMxCS.png"), # direct link
("https://i.imgur.com/21yMxCSh.png"), # direct link thumbnail ("https://i.imgur.com/21yMxCSh.png"), # direct link thumbnail
("https://i.imgur.com/zxaY6.gif"), # direct link (short) ("https://i.imgur.com/zxaY6.gif"), # direct link (short)
("https://i.imgur.com/zxaY6s.gif"), # direct link (short; thumb) ("https://i.imgur.com/zxaY6s.gif"), # direct link (short; thumb)
) )
def items(self): def items(self):
image = self.api.image(self.key) image = self.api.image(self.key)
if not image["title"]:
page = self.request(self.root + "/" + self.key, fatal=False).text try:
title = text.extract(page, "<title>", "<")[0] or "" del image["ad_url"]
image["title"] = text.unescape(title.rpartition(" - ")[0].strip()) del image["ad_type"]
except KeyError:
pass
image.update(image["media"][0])
del image["media"]
url = self._prepare(image) url = self._prepare(image)
yield Message.Version, 1 yield Message.Version, 1
yield Message.Directory, image yield Message.Directory, image
@ -143,53 +148,49 @@ class ImgurAlbumExtractor(ImgurExtractor):
"url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563", "url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563",
"keyword": { "keyword": {
"album": { "album": {
"account_id" : None, "account_id" : 0,
"account_url" : None, "comment_count" : int,
"cover" : "693j2Kr", "cover_id" : "693j2Kr",
"cover_edited": None, "date" : "dt:2015-10-09 10:37:50",
"cover_height": 1400, "description" : "",
"cover_width" : 951, "downvote_count": 0,
"date" : "dt:2015-10-09 10:37:50", "favorite" : False,
"datetime" : 1444387070, "favorite_count": 0,
"description" : None, "id" : "TcBmP",
"favorite" : False, "image_count" : 19,
"id" : "TcBmP", "in_most_viral" : False,
"images_count": 19, "is_ad" : False,
"in_gallery" : False, "is_album" : True,
"is_ad" : False, "is_mature" : False,
"is_album" : True, "is_pending" : False,
"layout" : "blog", "privacy" : "private",
"link" : "https://imgur.com/a/TcBmP", "score" : int,
"nsfw" : bool, "title" : "138",
"privacy" : "hidden", "topic" : "",
"section" : None, "topic_id" : 0,
"title" : "138", "upvote_count" : int,
"views" : int, "url" : "https://imgur.com/a/TcBmP",
"view_count" : int,
"virality" : int,
}, },
"account_id" : None, "account_id" : 0,
"account_url": None, "count" : 19,
"animated" : bool,
"bandwidth" : int,
"date" : "type:datetime", "date" : "type:datetime",
"datetime" : int, "description": "",
"description": None, "ext" : "jpg",
"edited" : "0",
"favorite" : False,
"has_sound" : False, "has_sound" : False,
"height" : int, "height" : int,
"id" : str, "id" : str,
"in_gallery" : False, "is_animated": False,
"is_ad" : False, "is_looping" : False,
"link" : r"re:https://i\.imgur\.com/\w+\.jpg", "mime_type" : "image/jpeg",
"nsfw" : None, "name" : str,
"num" : int, "num" : int,
"section" : None,
"size" : int, "size" : int,
"tags" : list, "title" : str,
"title" : None, "type" : "image",
"type" : "image/jpeg", "updated_at" : None,
"views" : int, "url" : str,
"vote" : None,
"width" : int, "width" : int,
}, },
}), }),
@ -208,13 +209,15 @@ class ImgurAlbumExtractor(ImgurExtractor):
def items(self): def items(self):
album = self.api.album(self.key) album = self.api.album(self.key)
album["date"] = text.parse_timestamp(album["datetime"]) album["date"] = text.parse_datetime(album["created_at"])
images = album["images"]
images = album["media"]
del album["media"]
count = len(images) count = len(images)
try: try:
del album["images"] del album["ad_url"]
del album["ad_config"] del album["ad_type"]
except KeyError: except KeyError:
pass pass
@ -239,22 +242,17 @@ class ImgurGalleryExtractor(ImgurExtractor):
("https://imgur.com/gallery/eD9CT", { ("https://imgur.com/gallery/eD9CT", {
"pattern": "https://imgur.com/a/eD9CT", "pattern": "https://imgur.com/a/eD9CT",
}), }),
("https://imgur.com/t/unmuted/26sEhNr", { # unmuted URL ("https://imgur.com/t/unmuted/26sEhNr"),
"pattern": "https://imgur.com/26sEhNr",
}),
("https://imgur.com/t/cat/qSB8NbN"), ("https://imgur.com/t/cat/qSB8NbN"),
) )
def items(self): def items(self):
url = self.root + "/a/" + self.key if self.api.gallery(self.key)["is_album"]:
with self.request(url, method="HEAD", fatal=False) as response: url = "{}/a/{}".format(self.root, self.key)
if response.status_code < 400: extr = ImgurAlbumExtractor
extr = ImgurAlbumExtractor else:
else: url = "{}/{}".format(self.root, self.key)
extr = ImgurImageExtractor extr = ImgurImageExtractor
url = self.root + "/" + self.key
yield Message.Version, 1
yield Message.Queue, url, {"_extractor": extr} yield Message.Queue, url, {"_extractor": extr}
@ -346,38 +344,46 @@ class ImgurAPI():
} }
def account_favorites(self, account): def account_favorites(self, account):
endpoint = "account/{}/gallery_favorites".format(account) endpoint = "/3/account/{}/gallery_favorites".format(account)
return self._pagination(endpoint) return self._pagination(endpoint)
def gallery_search(self, query): def gallery_search(self, query):
endpoint = "gallery/search" endpoint = "/3/gallery/search"
params = {"q": query} params = {"q": query}
return self._pagination(endpoint, params) return self._pagination(endpoint, params)
def account_submissions(self, account): def account_submissions(self, account):
endpoint = "account/{}/submissions".format(account) endpoint = "/3/account/{}/submissions".format(account)
return self._pagination(endpoint) return self._pagination(endpoint)
def gallery_subreddit(self, subreddit): def gallery_subreddit(self, subreddit):
endpoint = "gallery/r/{}".format(subreddit) endpoint = "/3/gallery/r/{}".format(subreddit)
return self._pagination(endpoint) return self._pagination(endpoint)
def gallery_tag(self, tag): def gallery_tag(self, tag):
endpoint = "gallery/t/{}".format(tag) endpoint = "/3/gallery/t/{}".format(tag)
return self._pagination(endpoint, key="items") return self._pagination(endpoint, key="items")
def image(self, image_hash):
endpoint = "/post/v1/media/" + image_hash
params = {"include": "media,tags,account"}
return self._call(endpoint, params)
def album(self, album_hash): def album(self, album_hash):
return self._call("album/" + album_hash) endpoint = "/post/v1/albums/" + album_hash
params = {"include": "media,tags,account"}
return self._call(endpoint, params)
def image(self, image_hash): def gallery(self, gallery_hash):
return self._call("image/" + image_hash) endpoint = "/post/v1/posts/" + gallery_hash
return self._call(endpoint)
def _call(self, endpoint, params=None): def _call(self, endpoint, params=None):
try: try:
return self.extractor.request( return self.extractor.request(
"https://api.imgur.com/3/" + endpoint, "https://api.imgur.com" + endpoint,
params=params, headers=self.headers, params=params, headers=self.headers,
).json()["data"] ).json()
except exception.HttpError as exc: except exception.HttpError as exc:
if exc.status != 403 or b"capacity" not in exc.response.content: if exc.status != 403 or b"capacity" not in exc.response.content:
raise raise
@ -388,7 +394,7 @@ class ImgurAPI():
num = 0 num = 0
while True: while True:
data = self._call("{}/{}".format(endpoint, num), params) data = self._call("{}/{}".format(endpoint, num), params)["data"]
if key: if key:
data = data[key] data = data[key]
if not data: if not data:

Loading…
Cancel
Save