[nozomi] update file URLs (#3925)

Static images are now only available in WebP format over the 'w'
subdomain. GIFs also got their own 'g' subdomain.
pull/3935/head
Mike Fährmann 1 year ago
parent 2dfd4a3de2
commit 359e31e462
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -1,6 +1,6 @@
# -*- coding: utf-8 -*-
# Copyright 2019-2022 Mike Fährmann
# Copyright 2019-2023 Mike Fährmann
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 2 as
@ -63,10 +63,20 @@ class NozomiExtractor(Extractor):
yield Message.Directory, post
for post["num"], image in enumerate(images, 1):
post["filename"] = post["dataid"] = did = image["dataid"]
post["extension"] = ext = image["type"]
post["is_video"] = video = bool(image.get("is_video"))
ext = image["type"]
if video:
subdomain = "v"
elif ext == "gif":
subdomain = "g"
else:
subdomain = "w"
ext = "webp"
post["extension"] = ext
post["url"] = url = "https://{}.nozomi.la/{}/{}/{}.{}".format(
"v" if video else "i", did[-1], did[-3:-1], did, ext)
subdomain, did[-1], did[-3:-1], did, ext)
yield Message.Url, url, post
def posts(self):
@ -97,15 +107,17 @@ class NozomiPostExtractor(NozomiExtractor):
pattern = r"(?:https?://)?nozomi\.la/post/(\d+)"
test = (
("https://nozomi.la/post/3649262.html", {
"url": "f4522adfc8159355fd0476de28761b5be0f02068",
"content": "cd20d2c5149871a0b80a1b0ce356526278964999",
"url": "e5525e717aec712843be8b88592d6406ae9e60ba",
"pattern": r"https://w\.nozomi\.la/2/15/aaa9f7c632cde1e1a5baaff3fb"
r"6a6d857ec73df7fdc5cf5a358caf604bf73152\.webp",
"content": "6d62c4a7fea50c0a89d499603c4e7a2b4b9bffa8",
"keyword": {
"artist" : ["hammer (sunset beach)"],
"character": ["patchouli knowledge"],
"copyright": ["touhou"],
"dataid" : "re:aaa9f7c632cde1e1a5baaff3fb6a6d857ec73df7fdc5",
"date" : "dt:2016-07-26 02:32:03",
"extension": "jpg",
"extension": "webp",
"filename" : str,
"height" : 768,
"is_video" : False,
@ -118,14 +130,26 @@ class NozomiPostExtractor(NozomiExtractor):
}),
# multiple images per post
("https://nozomi.la/post/25588032.html", {
"url": "6aa3b7db385abcc9d374bdffd19187bccbf8f228",
"keyword": "2a2998af93c6438863c4077bd386b613b8bc2957",
"url": "fb956ccedcf2cf509739d26e2609e910244aa56c",
"keyword": "516ca5cbd0d2a46a8ce26679d6e08de5ac42184b",
"count": 7,
}),
# empty 'date' (#1163)
("https://nozomi.la/post/130309.html", {
"keyword": {"date": None},
})
}),
# gif
("https://nozomi.la/post/1647.html", {
"pattern": r"https://g\.nozomi\.la/a/f0/d1b06469e00d72e4f6346209c1"
r"49db459d76b58a074416c260ed93cc31fa9f0a\.gif",
"content": "952efb78252bbc9fb56df2e8fafb68d5e6364181",
}),
# video
("https://nozomi.la/post/2269847.html", {
"pattern": r"https://v\.nozomi\.la/d/0e/ff88398862669783691b31519f"
r"2bea3a35c24b6e62e3ba2d89b4409e41c660ed\.webm",
"content": "57065e6c16da7b1c7098a63b36fb0c6c6f1b9bca",
}),
)
def __init__(self, match):
@ -160,7 +184,7 @@ class NozomiTagExtractor(NozomiExtractor):
archive_fmt = "t_{search_tags}_{dataid}"
pattern = r"(?:https?://)?nozomi\.la/tag/([^/?#]+)-(\d+)\."
test = ("https://nozomi.la/tag/3:1_aspect_ratio-1.html", {
"pattern": r"^https://[iv]\.nozomi\.la/\w/\w\w/\w+\.\w+$",
"pattern": r"^https://[wgv]\.nozomi\.la/\w/\w\w/\w+\.\w+$",
"count": ">= 25",
"range": "1-25",
})

Loading…
Cancel
Save