[deviantart] always download original images

Deviation-objects returned by the DeviantArt API don't always contain
the URL and metadata of the original image ([1]). Getting this
information requires an additional API call [2], which is indicated by
the 'is_downloadable' and 'download_filesize' metadata within a
deviation-object.

[1] https://myria-moon.deviantart.com/art/Aime-Moi-part-en-vadrouille-261986576
[2] https://www.deviantart.com/developers/http/v1/20160316/deviation_download/bed6982b88949bdb08b52cd6763fcafd
pull/54/head
Mike Fährmann 7 years ago
parent 8e6a767109
commit 75d3a1f72f
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -287,6 +287,19 @@ Description Select the directory structure created by the Gallery- and
=========== =====
extractor.deviantart.original
-----------------------------
=========== =====
Type ``bool``
Default ``true``
Description Request full-sized original images if available.
Some of DeviantArt's images require an additional API call to get
their actual original version, which is being hosted on
Amazon Web Services (AWS) servers.
=========== =====
extractor.deviantart.mature
---------------------------
=========== =====

@ -27,6 +27,8 @@ class DeviantartExtractor(Extractor):
Extractor.__init__(self)
self.api = DeviantartAPI(self)
self.offset = 0
self.flat = self.config("flat", True)
self.original = self.config("original", True)
if match:
self.user = match.group(1)
@ -53,7 +55,12 @@ class DeviantartExtractor(Extractor):
yield Message.Directory, deviation
if "content" in deviation:
yield self.commit(deviation, deviation["content"])
content = deviation["content"]
if (self.original and deviation["is_downloadable"] and
content["filesize"] != deviation["download_filesize"]):
content.update(
self.api.deviation_download(deviation["deviationid"]))
yield self.commit(deviation, content)
if "videos" in deviation:
video = max(deviation["videos"],
@ -144,10 +151,6 @@ class DeviantartExtractor(Extractor):
deviation["extension"] = "htm"
return Message.Url, html, deviation
@property
def flat(self):
return self.config("flat", True)
@staticmethod
def _find_folder(folders, name):
pattern = r"[^\w]*" + name.replace("-", r"[^\w]+") + r"[^\w]*$"
@ -200,6 +203,7 @@ class DeviantartFolderExtractor(DeviantartExtractor):
("http://majestic-da.deviantart.com/gallery/63419606/CHIBI-KAWAII", {
"url": "2ea2a3df9591c26568b09291acb453fb87ce9920",
"keyword": "160b891599aa4ba1c799cd9e1696bcb1ddefeef4",
"options": (("original", False),),
}),
]
@ -242,6 +246,11 @@ class DeviantartDeviationExtractor(DeviantartExtractor):
("http://sta.sh/abcdefghijkl", {
"exception": exception.NotFoundError,
}),
(("https://myria-moon.deviantart.com/art/"
"Aime-Moi-part-en-vadrouille-261986576"), {
"pattern": (r"https?://s3\.amazonaws\.com/origin-orig\."
r"deviantart\.net/a383/f/2013/135/e/7/[^.]+\.jpg\?"),
}),
]
def __init__(self, match):
@ -292,6 +301,7 @@ class DeviantartCollectionExtractor(DeviantartExtractor):
test = [("http://rosuuri.deviantart.com/favourites/58951174/Useful", {
"url": "f43b202011483e06998db1891e4b62381fabd64a",
"keyword": "629eb627747b3f0ae35541d0725cc345b3ac5aca",
"options": (("original", False),),
})]
def __init__(self, match):
@ -333,6 +343,7 @@ class DeviantartAPI():
def __init__(self, extractor, client_id="5388",
client_secret="76b08c69cfb27f26d6161f9ab6d061a1"):
self.session = extractor.session
self.headers = {}
self.log = extractor.log
self.client_id = extractor.config("client-id", client_id)
self.client_secret = extractor.config("client-secret", client_secret)
@ -374,6 +385,12 @@ class DeviantartAPI():
params = {"deviationid": deviation_id}
return self._call(endpoint, params)
def deviation_download(self, deviation_id):
"""Get the original file download (if allowed)"""
endpoint = "deviation/download/" + deviation_id
params = {"mature_content": self.mature}
return self._call(endpoint, params)
def gallery(self, username, folder_id="", offset=0):
"""Yield all Deviation-objects contained in a gallery folder"""
endpoint = "gallery/" + folder_id
@ -407,7 +424,7 @@ class DeviantartAPI():
access_token = self._authenticate_impl(
self.client_id, self.client_secret
)
self.session.headers["Authorization"] = access_token
self.headers["Authorization"] = access_token
@cache(maxage=3590, keyarg=1)
def _authenticate_impl(self, client_id, client_secret):
@ -432,7 +449,8 @@ class DeviantartAPI():
time.sleep(self.delay)
self.authenticate()
response = self.session.get(url, params=params)
response = self.session.get(
url, headers=self.headers, params=params)
if response.status_code == 200:
break

@ -27,17 +27,23 @@ class TestExtractors(unittest.TestCase):
config.clear()
def _run_test(self, extr, url, result):
content = "content" in result if result else False
if result:
if "options" in result:
for key, value in result["options"]:
config.set(key.split("."), value)
content = "content" in result
else:
content = False
tjob = job.TestJob(url, content=content)
self.assertEqual(extr, tjob.extractor.__class__)
if not result:
return
if "options" in result:
for key, value in result["options"]:
config.set(key, value)
if "exception" in result:
self.assertRaises(result["exception"], tjob.run)
return
tjob.run()
if "url" in result:
self.assertEqual(result["url"], tjob.hash_url.hexdigest())

Loading…
Cancel
Save