'
header = HEADER_CUSTOM_TEMPLATE.format(
title=title, url=url, date=str(date),
)
else:
needle = '
'
catlist = deviation["category_path"].split("/")
categories = " / ".join(
('
{}'
'').format(self.root, cpath, cat.capitalize())
for cat, cpath in zip(
catlist,
itertools.accumulate(catlist, lambda t, c: t + "/" + c)
)
)
username = deviation["author"]["username"]
urlname = deviation.get("username") or username.lower()
header = HEADER_TEMPLATE.format(
title=title,
url=url,
userurl="{}/{}/".format(self.root, urlname),
username=username,
date=date,
categories=categories,
)
html = JOURNAL_TEMPLATE_HTML.format(
title=title,
html=html.replace(needle, header, 1),
shadow=shadow,
css=css,
cls=cls,
)
deviation["extension"] = "htm"
return Message.Url, html, deviation
@staticmethod
def _commit_journal_text(deviation, journal):
date = datetime.datetime.utcfromtimestamp(deviation["published_time"])
content = "\n".join(
text.unescape(text.remove_html(txt))
for txt in journal["html"].rpartition("")
)
txt = JOURNAL_TEMPLATE_TEXT.format(
title=deviation["title"],
username=deviation["author"]["username"],
date=date,
content=content,
)
deviation["extension"] = "txt"
return Message.Url, txt, deviation
@staticmethod
def _find_folder(folders, name):
pattern = r"[^\w]*" + name.replace("-", r"[^\w]+") + r"[^\w]*$"
for folder in folders:
if re.match(pattern, folder["name"]):
return folder
raise exception.NotFoundError("folder")
def _folder_urls(self, folders, category):
url = "{}/{}/{}/0/".format(self.root, self.user, category)
return [(url + folder["name"], folder) for folder in folders]
def _update_content(self, deviation, content):
data = self.api.deviation_download(deviation["deviationid"])
if self.original == "images":
url = data["src"].partition("?")[0]
mtype = mimetypes.guess_type(url, False)[0]
if not mtype or not mtype.startswith("image/"):
return
content.update(data)
class DeviantartGalleryExtractor(DeviantartExtractor):
"""Extractor for all deviations from an artist's gallery"""
subcategory = "gallery"
archive_fmt = "g_{username}_{index}.{extension}"
pattern = BASE_PATTERN + r"(?:/(?:gallery/?(?:\?catpath=/)?)?)?$"
test = (
("https://www.deviantart.com/shimoda7/gallery/", {
"pattern": r"https://(s3.amazonaws.com/origin-(img|orig)"
r".deviantart.net/|images-wixmp-\w+.wixmp.com/)",
"count": ">= 30",
"keyword": {
"allows_comments": bool,
"author": {
"type": "regular",
"usericon": str,
"userid": "9AE51FC7-0278-806C-3FFF-F4961ABF9E2B",
"username": "shimoda7",
},
"category_path": str,
"content": {
"filesize": int,
"height": int,
"src": str,
"transparency": bool,
"width": int,
},
"da_category": str,
"deviationid": str,
"?download_filesize": int,
"extension": str,
"index": str,
"is_deleted": bool,
"is_downloadable": bool,
"is_favourited": bool,
"is_mature": bool,
"preview": {
"height": int,
"src": str,
"transparency": bool,
"width": int,
},
"published_time": int,
"stats": {
"comments": int,
"favourites": int,
},
"target": dict,
"thumbs": list,
"title": str,
"url": r"re:https://www.deviantart.com/shimoda7/art/[^/]+-\d+",
"username": "shimoda7",
},
}),
("https://www.deviantart.com/yakuzafc", {
"pattern": r"https://www.deviantart.com/yakuzafc/gallery/0/",
"count": ">= 15",
}),
("https://www.deviantart.com/shimoda8/gallery/", {
"exception": exception.NotFoundError,
}),
("https://www.deviantart.com/shimoda7/gallery/?catpath=/"),
("https://shimoda7.deviantart.com/gallery/"),
("https://yakuzafc.deviantart.com/"),
("https://shimoda7.deviantart.com/gallery/?catpath=/"),
)
def deviations(self):
if self.flat and not self.group:
return self.api.gallery_all(self.user, self.offset)
else:
folders = self.api.gallery_folders(self.user)
return self._folder_urls(folders, "gallery")
class DeviantartFolderExtractor(DeviantartExtractor):
"""Extractor for deviations inside an artist's gallery folder"""
subcategory = "folder"
directory_fmt = ("{category}", "{folder[owner]}", "{folder[title]}")
archive_fmt = "F_{folder[uuid]}_{index}.{extension}"
pattern = BASE_PATTERN + r"/gallery/(\d+)/([^/?]+)"
test = (
("https://www.deviantart.com/shimoda7/gallery/722019/Miscellaneous", {
"count": 5,
"options": (("original", False),),
}),
("https://www.deviantart.com/yakuzafc/gallery/37412168/Crafts", {
"count": ">= 4",
"options": (("original", False),),
}),
("https://shimoda7.deviantart.com/gallery/722019/Miscellaneous"),
("https://yakuzafc.deviantart.com/gallery/37412168/Crafts"),
)
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
_, _, fid, self.fname = match.groups()
self.folder = {"owner": self.user, "index": fid}
def deviations(self):
folders = self.api.gallery_folders(self.user)
folder = self._find_folder(folders, self.fname)
self.folder["title"] = folder["name"]
self.folder["uuid"] = folder["folderid"]
return self.api.gallery(self.user, folder["folderid"], self.offset)
def prepare(self, deviation):
DeviantartExtractor.prepare(self, deviation)
deviation["folder"] = self.folder
class DeviantartDeviationExtractor(DeviantartExtractor):
"""Extractor for single deviations"""
subcategory = "deviation"
archive_fmt = "{index}.{extension}"
pattern = BASE_PATTERN + r"/((?:art|journal)/[^/?]+-\d+)"
test = (
(("https://www.deviantart.com/shimoda7/art/"
"For-the-sake-of-a-memory-10073852"), {
"content": "6a7c74dc823ebbd457bdd9b3c2838a6ee728091e",
}),
("https://www.deviantart.com/zzz/art/zzz-1234567890", {
"exception": exception.NotFoundError,
}),
(("https://www.deviantart.com/myria-moon/art/"
"Aime-Moi-part-en-vadrouille-261986576"), {
"pattern": (r"https?://s3\.amazonaws\.com/origin-orig\."
r"deviantart\.net/a383/f/2013/135/e/7/[^.]+\.jpg\?"),
}),
("https://shimoda7.deviantart.com"
"/art/For-the-sake-of-a-memory-10073852"),
("https://myria-moon.deviantart.com"
"/art/Aime-Moi-part-en-vadrouille-261986576"),
("https://zzz.deviantart.com/art/zzz-1234567890"),
)
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
self.path = match.group(3)
def deviations(self):
url = "{}/{}/{}".format(self.root, self.user, self.path)
response = self.request(url, expect=range(400, 500))
deviation_id = text.extract(response.text, '//deviation/', '"')[0]
if response.status_code >= 400 or not deviation_id:
raise exception.NotFoundError("image")
return (self.api.deviation(deviation_id),)
class DeviantartStashExtractor(DeviantartExtractor):
"""Extractor for sta.sh-ed deviations"""
subcategory = "stash"
archive_fmt = "{index}.{extension}"
pattern = r"(?:https?://)?sta\.sh/([a-z0-9]+)"
test = (
("https://sta.sh/022c83odnaxc", {
"pattern": r"https://s3.amazonaws.com/origin-orig.deviantart.net",
"count": 1,
}),
("https://sta.sh/21jf51j7pzl2", {
"pattern": pattern,
"count": 4,
}),
("https://sta.sh/abcdefghijkl", {
"exception": exception.HttpError,
}),
)
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
self.stash_id = match.group(1)
def deviations(self):
url = "https://sta.sh/" + self.stash_id
page = self.request(url).text
deviation_id = text.extract(page, '//deviation/', '"')[0]
if deviation_id:
yield self.api.deviation(deviation_id)
else:
data = {"_extractor": DeviantartStashExtractor}
page = text.extract(
page, '
= 20",
"options": (("original", False),),
}),
("https://pencilshadings.deviantart.com"
"/favourites/70595441/3D-Favorites"),
)
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
_, _, cid, self.cname = match.groups()
self.collection = {"owner": self.user, "index": cid}
def deviations(self):
folders = self.api.collections_folders(self.user)
folder = self._find_folder(folders, self.cname)
self.collection["title"] = folder["name"]
self.collection["uuid"] = folder["folderid"]
return self.api.collections(self.user, folder["folderid"], self.offset)
def prepare(self, deviation):
DeviantartExtractor.prepare(self, deviation)
deviation["collection"] = self.collection
class DeviantartJournalExtractor(DeviantartExtractor):
"""Extractor for an artist's journals"""
subcategory = "journal"
directory_fmt = ("{category}", "{username}", "Journal")
archive_fmt = "j_{username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/(?:journal|blog)/?(?:\?catpath=/)?$"
test = (
("https://www.deviantart.com/angrywhitewanker/journal/", {
"url": "38db2a0d3a587a7e0f9dba7ff7d274610ebefe44",
}),
("https://www.deviantart.com/angrywhitewanker/journal/", {
"url": "b2a8e74d275664b1a4acee0fca0a6fd33298571e",
"options": (("journals", "text"),),
}),
("https://www.deviantart.com/angrywhitewanker/journal/", {
"count": 0,
"options": (("journals", "none"),),
}),
("https://www.deviantart.com/shimoda7/journal/?catpath=/"),
("https://angrywhitewanker.deviantart.com/journal/"),
("https://shimoda7.deviantart.com/journal/?catpath=/"),
)
def deviations(self):
return self.api.browse_user_journals(self.user, self.offset)
class DeviantartPopularExtractor(DeviantartExtractor):
"""Extractor for popular deviations"""
subcategory = "popular"
directory_fmt = ("{category}", "Popular",
"{popular[range]}", "{popular[search]}")
archive_fmt = "P_{popular[range]}_{popular[search]}_{index}.{extension}"
pattern = (r"(?:https?://)?www\.deviantart\.com"
r"((?:/\w+)*)/(?:popular-([^/?]+))/?(?:\?([^#]*))?")
test = (
("https://www.deviantart.com/popular-24-hours/?q=tree+house", {
"options": (("original", False),),
}),
("https://www.deviantart.com/artisan/popular-all-time/?q=tree"),
)
def __init__(self, match):
DeviantartExtractor.__init__(self, match)
self.search_term = self.time_range = self.category_path = None
self.user = ""
path, trange, query = match.groups()
if path:
self.category_path = path.lstrip("/")
if trange:
self.time_range = trange.replace("-", "").replace("hours", "hr")
if query:
self.search_term = text.parse_query(query).get("q")
self.popular = {
"search": self.search_term or "",
"range": trange or "24-hours",
"path": self.category_path,
}
def deviations(self):
return self.api.browse_popular(
self.search_term, self.time_range, self.category_path, self.offset)
def prepare(self, deviation):
DeviantartExtractor.prepare(self, deviation)
deviation["popular"] = self.popular
class DeviantartAPI():
"""Minimal interface for the deviantart API"""
CLIENT_ID = "5388"
CLIENT_SECRET = "76b08c69cfb27f26d6161f9ab6d061a1"
def __init__(self, extractor):
self.extractor = extractor
self.log = extractor.log
self.headers = {}
delay = extractor.config("wait-min", 0)
self.delay = math.ceil(math.log2(delay)) if delay >= 1 else -1
self.delay_min = max(2, self.delay)
self.mature = extractor.config("mature", "true")
if not isinstance(self.mature, str):
self.mature = "true" if self.mature else "false"
self.refresh_token = extractor.config("refresh-token")
self.client_id = extractor.config("client-id", self.CLIENT_ID)
self.client_secret = extractor.config(
"client-secret", self.CLIENT_SECRET)
def browse_popular(self, query=None, timerange=None,
category_path=None, offset=0):
"""Yield popular deviations"""
endpoint = "browse/popular"
params = {"q": query, "offset": offset, "limit": 120,
"timerange": timerange, "category_path": category_path,
"mature_content": self.mature}
return self._pagination(endpoint, params)
def browse_user_journals(self, username, offset=0):
"""Yield all journal entries of a specific user"""
endpoint = "browse/user/journals"
params = {"username": username, "offset": offset, "limit": 50,
"mature_content": self.mature, "featured": "false"}
return self._pagination(endpoint, params)
def collections(self, username, folder_id, offset=0):
"""Yield all Deviation-objects contained in a collection folder"""
endpoint = "collections/" + folder_id
params = {"username": username, "offset": offset, "limit": 24,
"mature_content": self.mature}
return self._pagination(endpoint, params)
@memcache(keyarg=1)
def collections_folders(self, username, offset=0):
"""Yield all collection folders of a specific user"""
endpoint = "collections/folders"
params = {"username": username, "offset": offset, "limit": 50,
"mature_content": self.mature}
return self._pagination_list(endpoint, params)
def deviation(self, deviation_id):
"""Query and return info about a single Deviation"""
endpoint = "deviation/" + deviation_id
return self._call(endpoint)
def deviation_content(self, deviation_id):
"""Get extended content of a single Deviation"""
endpoint = "deviation/content"
params = {"deviationid": deviation_id}
return self._call(endpoint, params)
def deviation_download(self, deviation_id):
"""Get the original file download (if allowed)"""
endpoint = "deviation/download/" + deviation_id
params = {"mature_content": self.mature}
return self._call(endpoint, params)
def gallery(self, username, folder_id="", offset=0):
"""Yield all Deviation-objects contained in a gallery folder"""
endpoint = "gallery/" + folder_id
params = {"username": username, "offset": offset, "limit": 24,
"mature_content": self.mature, "mode": "newest"}
return self._pagination(endpoint, params)
def gallery_all(self, username, offset=0):
"""Yield all Deviation-objects of a specific user"""
endpoint = "gallery/all"
params = {"username": username, "offset": offset, "limit": 24,
"mature_content": self.mature}
return self._pagination(endpoint, params)
@memcache(keyarg=1)
def gallery_folders(self, username, offset=0):
"""Yield all gallery folders of a specific user"""
endpoint = "gallery/folders"
params = {"username": username, "offset": offset, "limit": 50,
"mature_content": self.mature}
return self._pagination_list(endpoint, params)
@memcache(keyarg=1)
def user_profile(self, username):
"""Get user profile information"""
endpoint = "user/profile/" + username
return self._call(endpoint, expect_error=True)
def authenticate(self, refresh_token):
"""Authenticate the application by requesting an access token"""
self.headers["Authorization"] = self._authenticate_impl(refresh_token)
@cache(maxage=3590, keyarg=1)
def _authenticate_impl(self, refresh_token):
"""Actual authenticate implementation"""
url = "https://www.deviantart.com/oauth2/token"
if refresh_token:
self.log.info("Refreshing private access token")
data = {"grant_type": "refresh_token",
"refresh_token": _refresh_token_cache(refresh_token)}
else:
self.log.info("Requesting public access token")
data = {"grant_type": "client_credentials"}
auth = (self.client_id, self.client_secret)
response = self.extractor.request(
url, method="POST", data=data, auth=auth)
data = response.json()
if response.status_code != 200:
raise exception.AuthenticationError('"{} ({})"'.format(
data.get("error_description"), data.get("error")))
if refresh_token:
_refresh_token_cache.update(refresh_token, data["refresh_token"])
return "Bearer " + data["access_token"]
def _call(self, endpoint, params=None, expect_error=False, public=True):
"""Call an API endpoint"""
url = "https://www.deviantart.com/api/v1/oauth2/" + endpoint
while True:
if self.delay >= 0:
time.sleep(2 ** self.delay)
self.authenticate(None if public else self.refresh_token)
response = self.extractor.request(
url,
params=params,
headers=self.headers,
expect=range(400, 500),
)
data = response.json()
status = response.status_code
if 200 <= status < 400:
if self.delay > self.delay_min:
self.delay -= 1
return data
elif expect_error:
return None
elif data.get("error_description") == "User not found.":
raise exception.NotFoundError("user or group")
self.log.debug(response.text)
msg = "API responded with {} {}".format(
status, response.reason)
if status == 429:
self.delay += 1
self.log.warning("%s. Using %ds delay.", msg, 2 ** self.delay)
else:
self.log.error(msg)
return data
def _pagination(self, endpoint, params):
public = True
while True:
data = self._call(endpoint, params, public=public)
if "results" not in data:
self.log.error("Unexpected API response: %s", data)
return
if (public and self.refresh_token and
len(data["results"]) < params["limit"]):
self.log.debug("Switching to private access token")
public = False
continue
yield from data["results"]
if not data["has_more"]:
return
params["offset"] = data["next_offset"]
def _pagination_list(self, endpoint, params):
result = []
result.extend(self._pagination(endpoint, params))
return result
@cache(maxage=365*24*60*60, keyarg=0)
def _refresh_token_cache(original_token, new_token=None):
return new_token or original_token
SHADOW_TEMPLATE = """
"""
HEADER_TEMPLATE = """
"""
HEADER_CUSTOM_TEMPLATE = """
Journal Entry:
{date}
"""
JOURNAL_TEMPLATE_HTML = """text:
{title}
"""
JOURNAL_TEMPLATE_TEXT = """text:{title}
by {username}, {date}
{content}
"""