[newgrounds] improve 'art-image' extraction (#4642)

- download files in original resolution
- replace .webp with extension of first file
pull/4667/head
Mike Fährmann 11 months ago
parent 833dce141f
commit c4c4e4d2f4
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -56,13 +56,26 @@ class NewgroundsExtractor(Extractor):
yield Message.Directory, post
yield Message.Url, url, text.nameext_from_url(url, post)
ext = post["extension"]
for num, url in enumerate(text.extract_iter(
post["_images"] + post["_comment"],
'data-smartload-src="', '"'), 1):
post["num"] = num
post["_index"] = "{}_{:>02}".format(post["index"], num)
url = text.ensure_http_scheme(url)
yield Message.Url, url, text.nameext_from_url(url, post)
text.nameext_from_url(url, post)
if "_fallback" in post:
del post["_fallback"]
if "/comments/" not in url:
url = url.replace("/medium_views/", "/images/", 1)
if post["extension"] == "webp":
post["_fallback"] = (url,)
post["extension"] = ext
url = url.replace(".webp", "." + ext)
yield Message.Url, url, post
else:
self.log.warning(
"Unable to get download URL for '%s'", post_url)

@ -55,15 +55,30 @@ __tests__ = (
{
"#url" : "https://www.newgrounds.com/art/view/zedrinbot/lewd-animation-tutorial",
"#comment" : "extra files in 'art-image-row' elements (#4642)",
"#comment" : "extra files in 'art-image-row' elements - WebP to GIF (#4642)",
"#category": ("", "newgrounds", "image"),
"#class" : newgrounds.NewgroundsImageExtractor,
"#urls" : (
"https://art.ngfiles.com/images/5091000/5091275_45067_zedrinbot_untitled-5091275.0a9d27ed2bc265a7e89478ed6ad6f86f.gif?f1696187399",
"https://art.ngfiles.com/medium_views/5091000/5091275_45071_zedrinbot_untitled-5091275.6fdc62eaef43528fb1c9bda624d30a3d.webp?f1696187437",
"https://art.ngfiles.com/medium_views/5091000/5091275_45070_zedrinbot_untitled-5091275.0d7334746374465bd448908b88d1f810.webp?f1696187435",
"https://art.ngfiles.com/medium_views/5091000/5091275_45072_zedrinbot_untitled-5091275.6fdc62eaef43528fb1c9bda624d30a3d.webp?f1696187438",
"https://art.ngfiles.com/medium_views/5091000/5091275_45073_zedrinbot_untitled-5091275.20aa05c1cd22fd058e8c68ce58f5a302.webp?f1696187439",
"https://art.ngfiles.com/images/5091000/5091275_45071_zedrinbot_untitled-5091275.6fdc62eaef43528fb1c9bda624d30a3d.gif?f1696187437",
"https://art.ngfiles.com/images/5091000/5091275_45070_zedrinbot_untitled-5091275.0d7334746374465bd448908b88d1f810.gif?f1696187435",
"https://art.ngfiles.com/images/5091000/5091275_45072_zedrinbot_untitled-5091275.6fdc62eaef43528fb1c9bda624d30a3d.gif?f1696187438",
"https://art.ngfiles.com/images/5091000/5091275_45073_zedrinbot_untitled-5091275.20aa05c1cd22fd058e8c68ce58f5a302.gif?f1696187439",
),
},
{
"#url" : "https://www.newgrounds.com/art/view/zedrinbot/nazrin-tanlines",
"#comment" : "extra files in 'art-image-row' elements - native PNG files (#4642)",
"#category": ("", "newgrounds", "image"),
"#class" : newgrounds.NewgroundsImageExtractor,
"#urls" : (
"https://art.ngfiles.com/images/5009000/5009916_14628_zedrinbot_nazrin-tanlines.265f7b6beec5855a349e2646e90cbc01.png?f1695698131",
"https://art.ngfiles.com/images/5009000/5009916_14632_zedrinbot_nazrin-tanlines.40bd62fbf5875806cda6b004b348114a.png?f1695698148",
"https://art.ngfiles.com/images/5009000/5009916_14634_zedrinbot_nazrin-tanlines.40bd62fbf5875806cda6b004b348114a.png?f1695698148",
"https://art.ngfiles.com/images/5009000/5009916_14633_zedrinbot_nazrin-tanlines.40bd62fbf5875806cda6b004b348114a.png?f1695698148",
"https://art.ngfiles.com/images/5009000/5009916_14635_zedrinbot_nazrin-tanlines.6a7aa4fd63e5f8077ad29314568246cc.png?f1695698149",
"https://art.ngfiles.com/images/5009000/5009916_14636_zedrinbot_nazrin-tanlines.6a7aa4fd63e5f8077ad29314568246cc.png?f1695698149",
),
},
@ -73,7 +88,7 @@ __tests__ = (
"#category": ("", "newgrounds", "image"),
"#class" : newgrounds.NewgroundsImageExtractor,
"#options" : {"username": None},
"#count" : 1,
"#count" : 0,
},
{

Loading…
Cancel
Save