[deviantart] fix journal creation (#400)

deviantart-rewrite
Mike Fährmann 5 years ago
parent c6c5cb1898
commit dedea3b4db
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -158,12 +158,16 @@ class DeviantartExtractor(Extractor):
def _commit_journal_html(self, deviation, journal): def _commit_journal_html(self, deviation, journal):
title = text.escape(deviation["title"]) title = text.escape(deviation["title"])
url = deviation["url"] url = deviation["url"]
thumbs = deviation["thumbs"] thumbs = deviation.get("thumbs") or deviation.get("files")
html = journal["html"] html = journal["html"]
shadow = SHADOW_TEMPLATE.format_map(thumbs[0]) if thumbs else "" shadow = SHADOW_TEMPLATE.format_map(thumbs[0]) if thumbs else ""
if "css" in journal: if "css" in journal:
css, cls = journal["css"], "withskin" css, cls = journal["css"], "withskin"
elif html.startswith("<style"):
css, _, html = html.partition("</style>")
css = css.partition(">")[2]
cls = "withskin"
else: else:
css, cls = "", "journal-green" css, cls = "", "journal-green"
@ -194,22 +198,25 @@ class DeviantartExtractor(Extractor):
categories=categories, categories=categories,
) )
if needle in html:
html = html.replace(needle, header, 1)
else:
html = JOURNAL_TEMPLATE_HTML_EXTRA.format(header, html)
html = JOURNAL_TEMPLATE_HTML.format( html = JOURNAL_TEMPLATE_HTML.format(
title=title, title=title, html=html, shadow=shadow, css=css, cls=cls)
html=html.replace(needle, header, 1),
shadow=shadow,
css=css,
cls=cls,
)
deviation["extension"] = "htm" deviation["extension"] = "htm"
return Message.Url, html, deviation return Message.Url, html, deviation
@staticmethod @staticmethod
def _commit_journal_text(deviation, journal): def _commit_journal_text(deviation, journal):
html = journal["html"]
if html.startswith("<style"):
html = html.partition("</style>")[2]
content = "\n".join( content = "\n".join(
text.unescape(text.remove_html(txt)) text.unescape(text.remove_html(txt))
for txt in journal["html"].rpartition("<script")[0].split("<br />") for txt in html.rpartition("<script")[0].split("<br />")
) )
txt = JOURNAL_TEMPLATE_TEXT.format( txt = JOURNAL_TEMPLATE_TEXT.format(
title=deviation["title"], title=deviation["title"],
@ -305,7 +312,7 @@ class DeviantartGalleryExtractor(DeviantartExtractor):
}), }),
# 'folders' option (#276) # 'folders' option (#276)
("https://www.deviantart.com/justatest235723", { ("https://www.deviantart.com/justatest235723", {
"count": 2, "count": 3,
"options": (("metadata", 1), ("folders", 1), ("original", 0)), "options": (("metadata", 1), ("folders", 1), ("original", 0)),
"keyword": { "keyword": {
"description": str, "description": str,
@ -484,19 +491,21 @@ class DeviantartJournalExtractor(DeviantartExtractor):
subcategory = "journal" subcategory = "journal"
directory_fmt = ("{category}", "{username}", "Journal") directory_fmt = ("{category}", "{username}", "Journal")
archive_fmt = "j_{username}_{index}.{extension}" archive_fmt = "j_{username}_{index}.{extension}"
pattern = BASE_PATTERN + r"/(?:journal|blog)/?(?:\?catpath=/)?$" pattern = BASE_PATTERN + r"/(?:posts(?:/journals)?|journal)/?(?:\?.*)?$"
test = ( test = (
("https://www.deviantart.com/angrywhitewanker/journal/", { ("https://www.deviantart.com/angrywhitewanker/posts/journals/", {
"url": "38db2a0d3a587a7e0f9dba7ff7d274610ebefe44", "url": "38db2a0d3a587a7e0f9dba7ff7d274610ebefe44",
}), }),
("https://www.deviantart.com/angrywhitewanker/journal/", { ("https://www.deviantart.com/angrywhitewanker/posts/journals/", {
"url": "b2a8e74d275664b1a4acee0fca0a6fd33298571e", "url": "b2a8e74d275664b1a4acee0fca0a6fd33298571e",
"options": (("journals", "text"),), "options": (("journals", "text"),),
}), }),
("https://www.deviantart.com/angrywhitewanker/journal/", { ("https://www.deviantart.com/angrywhitewanker/posts/journals/", {
"count": 0, "count": 0,
"options": (("journals", "none"),), "options": (("journals", "none"),),
}), }),
("https://www.deviantart.com/shimoda7/posts/"),
("https://www.deviantart.com/shimoda7/journal/"),
("https://www.deviantart.com/shimoda7/journal/?catpath=/"), ("https://www.deviantart.com/shimoda7/journal/?catpath=/"),
("https://shimoda7.deviantart.com/journal/"), ("https://shimoda7.deviantart.com/journal/"),
("https://shimoda7.deviantart.com/journal/?catpath=/"), ("https://shimoda7.deviantart.com/journal/?catpath=/"),
@ -549,22 +558,8 @@ class DeviantartPopularExtractor(DeviantartExtractor):
deviation["popular"] = self.popular deviation["popular"] = self.popular
class DeviantartExtractorV2(Extractor): class DeviantartExtractorV2(DeviantartExtractor):
"""Base class for deviantart extractors using the NAPI""" """Base class for deviantart extractors using the NAPI"""
category = "deviantart"
directory_fmt = ("{category}", "{author[username]!l}")
filename_fmt = "{category}_{index}_{title}.{extension}"
root = "https://www.deviantart.com"
def __init__(self, match=None):
Extractor.__init__(self, match)
self.offset = 0
self.extra = self.config("extra", False)
self.quality = self.config("quality", "100")
self.user = match.group(1) or match.group(2)
if self.quality:
self.quality = "q_{}".format(self.quality)
def items(self): def items(self):
url = ( url = (
@ -625,7 +620,11 @@ class DeviantartExtractorV2(Extractor):
# extract download target # extract download target
target = files[-1] target = files[-1]
if target["type"] == "gif": if deviation["isJournal"] and self.commit_journal:
journal = deviation["textContent"]
journal["html"] = journal["html"]["markup"]
target["src"] = self.commit_journal(deviation, journal)[1]
elif target["type"] == "gif":
pass pass
elif target["type"] == "video": elif target["type"] == "video":
# select largest video # select largest video
@ -642,9 +641,7 @@ class DeviantartExtractorV2(Extractor):
target = extended["download"] target = extended["download"]
target["src"] = target["url"] target["src"] = target["url"]
del target["url"] del target["url"]
elif target["src"].startswith("https://images-wixmp-"):
# url rewrites
if target["src"].startswith("https://images-wixmp-"):
if deviation["index"] <= 790677560: if deviation["index"] <= 790677560:
# https://github.com/r888888888/danbooru/issues/4069 # https://github.com/r888888888/danbooru/issues/4069
target["src"] = re.sub( target["src"] = re.sub(
@ -662,6 +659,7 @@ class DeviantartExtractorV2(Extractor):
sub("_", deviation["author"]["username"].lower()), "-d", sub("_", deviation["author"]["username"].lower()), "-d",
util.bencode(deviation["index"], alphabet), util.bencode(deviation["index"], alphabet),
)) ))
if "extension" not in deviation:
deviation["extension"] = target["extension"] = ( deviation["extension"] = target["extension"] = (
text.ext_from_url(target["src"]) text.ext_from_url(target["src"])
) )
@ -1122,6 +1120,27 @@ roses/cssmin/desktop.css?1491362542749" >
</html> </html>
""" """
JOURNAL_TEMPLATE_HTML_EXTRA = """\
<div id="devskin0"><div class="negate-box-margin" style="">\
<div usr class="gr-box gr-genericbox"
><i usr class="gr1"><i></i></i
><i usr class="gr2"><i></i></i
><i usr class="gr3"><i></i></i
><div usr class="gr-top">
<i usr class="tri"></i>
{}
</div>
</div><div usr class="gr-body"><div usr class="gr">
<div class="grf-indent">
<div class="text">
{} </div>
</div>
</div></div>
<i usr class="gr3 gb"></i>
<i usr class="gr2 gb"></i>
<i usr class="gr1 gb gb1"></i> </div>
</div></div>"""
JOURNAL_TEMPLATE_TEXT = """text:{title} JOURNAL_TEMPLATE_TEXT = """text:{title}
by {username}, {date} by {username}, {date}

Loading…
Cancel
Save