[naver] simplify code + add test

pull/5126/head
Mike Fährmann 7 months ago
parent f64fb8f239
commit a8d3efbb99
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -10,7 +10,6 @@
from .common import GalleryExtractor, Extractor, Message
from .. import text
from urllib.parse import unquote
class NaverBase():
@ -63,16 +62,13 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
return data
def images(self, page):
return [
(unquote(url, encoding="EUC-KR")
.replace("://post", "://blog", 1)
.partition("?")[0], None)
if "\ufffd" in unquote(url)
else
(url.replace("://post", "://blog", 1)
.partition("?")[0], None)
for url in text.extract_iter(page, 'data-lazy-src="', '"')
]
results = []
for url in text.extract_iter(page, 'data-lazy-src="', '"'):
url = url.replace("://post", "://blog", 1).partition("?")[0]
if "\ufffd" in text.unquote(url):
url = text.unquote(url, encoding="EUC-KR")
results.append((url, None))
return results
class NaverBlogExtractor(NaverBase, Extractor):

@ -24,6 +24,33 @@ __tests__ = (
"#sha1_metadata": "a6e23d19afbee86b37d6e7ad934650c379d2cb1e",
},
{
"#url" : "https://blog.naver.com/PostView.nhn?blogId=rlfqjxm0&logNo=70161391809",
"#comment" : "filenames in EUC-KR encoding (#5126)",
"#category": ("", "naver", "post"),
"#class" : naver.NaverPostExtractor,
"#urls": (
"https://blogfiles.pstatic.net/20130305_23/ping9303_1362411028002Dpz9z_PNG/1_사본.png",
"https://blogfiles.pstatic.net/20130305_46/rlfqjxm0_1362473322580x33zi_PNG/오마갓합작.png",
),
"blog": {
"id" : "rlfqjxm0",
"num" : 43030507,
"user": "에나",
},
"post": {
"date" : "dt:2013-03-05 17:48:00",
"description": " ◈     PROMOTER :핑수 ˚ 아담 EDITOR핑수   넵:이크:핑수...",
"num" : 70161391809,
"title" : "[공유] { 합작}  OH, MY GOD! ~ 아 또 무슨 종말을 한다 그래~"
},
"count" : 2,
"num" : range(1, 2),
"filename" : r"re:1_사본|오마갓합작",
"extension": "png",
},
{
"#url" : "https://blog.naver.com/gukjung",
"#category": ("", "naver", "blog"),

Loading…
Cancel
Save