[naver] simplify code + add test

pull/5126/head
Mike Fährmann 7 months ago
parent f64fb8f239
commit a8d3efbb99
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -10,7 +10,6 @@
from .common import GalleryExtractor, Extractor, Message from .common import GalleryExtractor, Extractor, Message
from .. import text from .. import text
from urllib.parse import unquote
class NaverBase(): class NaverBase():
@ -63,16 +62,13 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
return data return data
def images(self, page): def images(self, page):
return [ results = []
(unquote(url, encoding="EUC-KR") for url in text.extract_iter(page, 'data-lazy-src="', '"'):
.replace("://post", "://blog", 1) url = url.replace("://post", "://blog", 1).partition("?")[0]
.partition("?")[0], None) if "\ufffd" in text.unquote(url):
if "\ufffd" in unquote(url) url = text.unquote(url, encoding="EUC-KR")
else results.append((url, None))
(url.replace("://post", "://blog", 1) return results
.partition("?")[0], None)
for url in text.extract_iter(page, 'data-lazy-src="', '"')
]
class NaverBlogExtractor(NaverBase, Extractor): class NaverBlogExtractor(NaverBase, Extractor):

@ -24,6 +24,33 @@ __tests__ = (
"#sha1_metadata": "a6e23d19afbee86b37d6e7ad934650c379d2cb1e", "#sha1_metadata": "a6e23d19afbee86b37d6e7ad934650c379d2cb1e",
}, },
{
"#url" : "https://blog.naver.com/PostView.nhn?blogId=rlfqjxm0&logNo=70161391809",
"#comment" : "filenames in EUC-KR encoding (#5126)",
"#category": ("", "naver", "post"),
"#class" : naver.NaverPostExtractor,
"#urls": (
"https://blogfiles.pstatic.net/20130305_23/ping9303_1362411028002Dpz9z_PNG/1_사본.png",
"https://blogfiles.pstatic.net/20130305_46/rlfqjxm0_1362473322580x33zi_PNG/오마갓합작.png",
),
"blog": {
"id" : "rlfqjxm0",
"num" : 43030507,
"user": "에나",
},
"post": {
"date" : "dt:2013-03-05 17:48:00",
"description": " ◈     PROMOTER :핑수 ˚ 아담 EDITOR핑수   넵:이크:핑수...",
"num" : 70161391809,
"title" : "[공유] { 합작}  OH, MY GOD! ~ 아 또 무슨 종말을 한다 그래~"
},
"count" : 2,
"num" : range(1, 2),
"filename" : r"re:1_사본|오마갓합작",
"extension": "png",
},
{ {
"#url" : "https://blog.naver.com/gukjung", "#url" : "https://blog.naver.com/gukjung",
"#category": ("", "naver", "blog"), "#category": ("", "naver", "blog"),

Loading…
Cancel
Save