[zerochan] fix 'Invalid control character' errors (#5892)

pull/5951/head
Mike Fährmann 2 months ago
parent aa6d00613f
commit 8a6e208605
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -12,6 +12,7 @@ from .booru import BooruExtractor
from ..cache import cache from ..cache import cache
from .. import text, util, exception from .. import text, util, exception
import collections import collections
import re
BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net" BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net"
@ -92,7 +93,14 @@ class ZerochanExtractor(BooruExtractor):
def _parse_entry_api(self, entry_id): def _parse_entry_api(self, entry_id):
url = "{}/{}?json".format(self.root, entry_id) url = "{}/{}?json".format(self.root, entry_id)
item = self.request(url).json() text = self.request(url).text
try:
item = util.json_loads(text)
except ValueError as exc:
if " control character " not in str(exc):
raise
text = re.sub(r"[\x00-\x1f\x7f]", "", text)
item = util.json_loads(text)
data = { data = {
"id" : item["id"], "id" : item["id"],

@ -177,4 +177,15 @@ __tests__ = (
"width" : 750, "width" : 750,
}, },
{
"#url" : "https://www.zerochan.net/1395035",
"#comment" : "Invalid control character '\r' in 'source' field (#5892)",
"#category": ("booru", "zerochan", "image"),
"#class" : zerochan.ZerochanImageExtractor,
"#auth" : True,
"#options" : {"metadata": True},
"source": "http://www.youtube.com/watch?v=0vodqkGPxt8",
},
) )

Loading…
Cancel
Save