[zerochan] fix 'Invalid control character' errors (#5892)

pull/5951/head
Mike Fährmann 2 months ago
parent aa6d00613f
commit 8a6e208605
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -12,6 +12,7 @@ from .booru import BooruExtractor
from ..cache import cache
from .. import text, util, exception
import collections
import re
BASE_PATTERN = r"(?:https?://)?(?:www\.)?zerochan\.net"
@ -92,7 +93,14 @@ class ZerochanExtractor(BooruExtractor):
def _parse_entry_api(self, entry_id):
url = "{}/{}?json".format(self.root, entry_id)
item = self.request(url).json()
text = self.request(url).text
try:
item = util.json_loads(text)
except ValueError as exc:
if " control character " not in str(exc):
raise
text = re.sub(r"[\x00-\x1f\x7f]", "", text)
item = util.json_loads(text)
data = {
"id" : item["id"],

@ -177,4 +177,15 @@ __tests__ = (
"width" : 750,
},
{
"#url" : "https://www.zerochan.net/1395035",
"#comment" : "Invalid control character '\r' in 'source' field (#5892)",
"#category": ("booru", "zerochan", "image"),
"#class" : zerochan.ZerochanImageExtractor,
"#auth" : True,
"#options" : {"metadata": True},
"source": "http://www.youtube.com/watch?v=0vodqkGPxt8",
},
)

Loading…
Cancel
Save