[imgur] add 'favorite' extractor (closes #420)

… and use a newer site-internal API endpoint for user posts
deviantart-rewrite
Mike Fährmann 5 years ago
parent ee5e20221f
commit 4330133114
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -52,7 +52,7 @@ ImageFap https://imagefap.com/ Images from Users, Gall
ImgBB https://imgbb.com/ Images from Users, Albums, individual Images Optional
imgbox https://imgbox.com/ Galleries, individual Images
imgth https://imgth.com/ Galleries
imgur https://imgur.com/ Albums, Galleries, individual Images
imgur https://imgur.com/ |imgur-C|
Instagram https://www.instagram.com/ |instagram-C| Optional
Jaimini's Box https://jaiminisbox.com/reader/ Chapters, Manga
Joyreactor http://joyreactor.cc/ |joyreactor-C|
@ -137,6 +137,7 @@ Turboimagehost https://www.turboimagehost.com/ individual Images
.. |deviantart-C| replace:: Collections, Deviations, Favorites, Folders, Galleries, Journals, Popular Images, Scraps, Sta.sh
.. |flickr-C| replace:: Images from Users, Albums, Favorites, Galleries, Groups, individual Images, Search Results
.. |hentaifoundry-C| replace:: Images from Users, Favorites, individual Images, Popular Images, Recent Images, Scraps
.. |imgur-C| replace:: Images from Users, Albums, Favorites, Galleries, individual Images
.. |instagram-C| replace:: Images from Users, Channels, individual Images, Stories, Tag-Searches
.. |joyreactor-C| replace:: Images from Users, Posts, Search Results, Tag-Searches
.. |nijie-C| replace:: Images from Users, Doujin, Favorites, individual Images

@ -10,13 +10,18 @@
from .common import Extractor, Message
from .. import text, exception
import itertools
import json
BASE_PATTERN = r"(?:https?://)?(?:www\.|[im]\.)?imgur\.com"
class ImgurExtractor(Extractor):
"""Base class for imgur extractors"""
category = "imgur"
root = "https://imgur.com"
api_root = "https://api.imgur.com"
def __init__(self, match):
Extractor.__init__(self, match)
@ -43,14 +48,40 @@ class ImgurExtractor(Extractor):
image["extension"] = image["ext"][1:]
return url
def _items_apiv3(self, urlfmt):
album_ex = ImgurAlbumExtractor
image_ex = ImgurImageExtractor
params = {
"IMGURPLATFORM" : "web",
"album_previews": "0",
"client_id" : "546c25a59c58ad7",
}
headers = {
"Origin" : self.root,
"Referer": self.root + "/",
}
yield Message.Version, 1
for num in itertools.count(0):
url = urlfmt.format(num)
data = self.request(url, params=params, headers=headers).json()
for item in data["data"]:
item["_extractor"] = album_ex if item["is_album"] else image_ex
yield Message.Queue, item["link"], item
if len(data["data"]) < 60:
return
class ImgurImageExtractor(ImgurExtractor):
"""Extractor for individual images on imgur.com"""
subcategory = "image"
filename_fmt = "{category}_{hash}{title:?_//}.{extension}"
archive_fmt = "{hash}"
pattern = (r"(?:https?://)?(?:www\.|[im]\.|)?imgur\.com"
r"/(?!gallery)(\w{7}|\w{5})[sbtmlh]?\.?")
pattern = BASE_PATTERN + r"/(?!gallery)(\w{7}|\w{5})[sbtmlh]?\.?"
test = (
("https://imgur.com/21yMxCS", {
"url": "6f2dcfb86815bdd72808c313e5f715610bc7b9b2",
@ -111,8 +142,7 @@ class ImgurAlbumExtractor(ImgurExtractor):
directory_fmt = ("{category}", "{album[hash]}{album[title]:? - //}")
filename_fmt = "{category}_{album[hash]}_{num:>03}_{hash}.{extension}"
archive_fmt = "{album[hash]}_{hash}"
pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com"
r"/(?:a|t/unmuted)/(\w{7}|\w{5})")
pattern = BASE_PATTERN + r"/(?:a|t/unmuted)/(\w{7}|\w{5})"
test = (
("https://imgur.com/a/TcBmP", {
"url": "ce3552f550a5b5316bd9c7ae02e21e39f30c0563",
@ -181,8 +211,7 @@ class ImgurAlbumExtractor(ImgurExtractor):
class ImgurGalleryExtractor(ImgurExtractor):
"""Extractor for imgur galleries"""
subcategory = "gallery"
pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com"
r"/gallery/(\w{7}|\w{5})")
pattern = BASE_PATTERN + r"/gallery/(\w{7}|\w{5})"
test = (
("https://imgur.com/gallery/zf2fIms", { # non-album gallery (#380)
"pattern": "https://imgur.com/zf2fIms",
@ -210,35 +239,34 @@ class ImgurGalleryExtractor(ImgurExtractor):
class ImgurUserExtractor(ImgurExtractor):
"""Extractor for all images posted by a user"""
subcategory = "user"
pattern = (r"(?:https?://)?(?:www\.|m\.)?imgur\.com"
r"/user/([^/?&#]+)(?:/submitted|/posts)?/?")
pattern = BASE_PATTERN + r"/user/([^/?&#]+)(?:/posts|/submitted)?/?$"
test = (
("https://imgur.com/user/Miguenzo", {
"range": "1-100",
"count": 100,
"pattern": r"https?://(i.imgur.com|imgur.com/a)/[\w.]+"
}),
("https://imgur.com/user/Miguenzo/submitted"),
("https://imgur.com/user/Miguenzo/submitted/newest"),
("https://imgur.com/user/Miguenzo/posts"),
("https://imgur.com/user/Miguenzo/submitted"),
)
def items(self):
num = 0
base = "{}/user/{}/submitted".format(self.root, self.key)
data = {"_extractor": ImgurGalleryExtractor}
headers = {
"Referer": base,
"X-Requested-With": "XMLHttpRequest",
}
urlfmt = "{}/3/account/{}/submissions/{{}}/newest".format(
self.api_root, self.key)
return self._items_apiv3(urlfmt)
while True:
cnt = 0
url = "{}/page/{}?scrolling".format(base, num)
page = self.request(url, headers=headers).text
for path in text.extract_iter(page, '<a href="', '"'):
cnt += 1
yield Message.Queue, self.root + path, data
class ImgurFavoriteExtractor(ImgurExtractor):
"""Extractor for a user's favorites"""
subcategory = "favorite"
pattern = BASE_PATTERN + r"/user/([^/?&#]+)/favorites"
test = ("https://imgur.com/user/Miguenzo/favorites", {
"range": "1-100",
"count": 100,
"pattern": r"https?://(i.imgur.com|imgur.com/a)/[\w.]+"
})
if cnt < 60:
return
num += 1
def items(self):
urlfmt = "{}/3/account/{}/gallery_favorites/{{}}/newest".format(
self.api_root, self.key)
return self._items_apiv3(urlfmt)

Loading…
Cancel
Save