From f64fb8f239744c912ad4b15c3accf9e3bc6b7018 Mon Sep 17 00:00:00 2001
From: Johann Hong <57867081+986569200-johann-Hong@users.noreply.github.com>
Date: Mon, 29 Jan 2024 00:23:09 +0900
Subject: [PATCH] [naver] EUC-KR encoding issue in old image URLs Fix

Around October 2010, the image server URL format and file name
encoding changed from EUC-KR to UTF-8.
Modified to detect old URL format and decode image URLs into EUC-KR

- (lint with flake8) Customize conditions
  Wrap lines smaller than 79 characters

- (lint with flake8) Customize conditions (2nd try)
  - One import per line
  - Indent on consecutive lines

- (lint with flake8) Customize conditions (3rd try)
  - E128 continuation line under-indented for visual indent
  - E123 closing bracket does not match indentation of opening bracket's line

- Update naver.py
  Check encoding for all image URLs
---
 gallery_dl/extractor/naver.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/gallery_dl/extractor/naver.py b/gallery_dl/extractor/naver.py
index 55faf9e7..25801c7e 100644
--- a/gallery_dl/extractor/naver.py
+++ b/gallery_dl/extractor/naver.py
@@ -10,6 +10,7 @@
 
 from .common import GalleryExtractor, Extractor, Message
 from .. import text
+from urllib.parse import unquote
 
 
 class NaverBase():
@@ -63,7 +64,13 @@ class NaverPostExtractor(NaverBase, GalleryExtractor):
 
     def images(self, page):
         return [
-            (url.replace("://post", "://blog", 1).partition("?")[0], None)
+            (unquote(url, encoding="EUC-KR")
+             .replace("://post", "://blog", 1)
+             .partition("?")[0], None)
+            if "\ufffd" in unquote(url)
+            else
+            (url.replace("://post", "://blog", 1)
+             .partition("?")[0], None)
             for url in text.extract_iter(page, 'data-lazy-src="', '"')
         ]