[downloader:http] refactor file signature checks

use functions/lambdas instead of startswith()
pull/3155/head
Mike Fährmann 2 years ago
parent 86790da2d5
commit f687e64513
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -189,7 +189,7 @@ class HttpDownloader(DownloaderBase):
# check filename extension against file header # check filename extension against file header
if adjust_extension and not offset and \ if adjust_extension and not offset and \
pathfmt.extension in FILE_SIGNATURES: pathfmt.extension in SIGNATURE_CHECKS:
try: try:
file_header = next( file_header = next(
content if response.raw.chunked content if response.raw.chunked
@ -220,7 +220,7 @@ class HttpDownloader(DownloaderBase):
offset += len(file_header) offset += len(file_header)
elif offset: elif offset:
if adjust_extension and \ if adjust_extension and \
pathfmt.extension in FILE_SIGNATURES: pathfmt.extension in SIGNATURE_CHECKS:
self._adjust_extension(pathfmt, fp.read(16)) self._adjust_extension(pathfmt, fp.read(16))
fp.seek(offset) fp.seek(offset)
@ -308,10 +308,9 @@ class HttpDownloader(DownloaderBase):
@staticmethod @staticmethod
def _adjust_extension(pathfmt, file_header): def _adjust_extension(pathfmt, file_header):
"""Check filename extension against file header""" """Check filename extension against file header"""
sig = FILE_SIGNATURES[pathfmt.extension] if not SIGNATURE_CHECKS[pathfmt.extension](file_header):
if not file_header.startswith(sig): for ext, check in SIGNATURE_CHECKS.items():
for ext, sig in FILE_SIGNATURES.items(): if check(file_header):
if file_header.startswith(sig):
pathfmt.set_extension(ext) pathfmt.set_extension(ext)
return True return True
return False return False
@ -362,27 +361,30 @@ MIME_TYPES = {
} }
# https://en.wikipedia.org/wiki/List_of_file_signatures # https://en.wikipedia.org/wiki/List_of_file_signatures
FILE_SIGNATURES = { SIGNATURE_CHECKS = {
"jpg" : b"\xFF\xD8\xFF", "jpg" : lambda s: s[0:3] == b"\xFF\xD8\xFF",
"png" : b"\x89PNG\r\n\x1A\n", "png" : lambda s: s[0:8] == b"\x89PNG\r\n\x1A\n",
"gif" : (b"GIF87a", b"GIF89a"), "gif" : lambda s: s[0:6] in (b"GIF87a", b"GIF89a"),
"bmp" : b"BM", "bmp" : lambda s: s[0:2] == b"BM",
"webp": b"RIFF", "webp": lambda s: (s[0:4] == b"RIFF" and
"svg" : b"<?xml", s[8:12] == b"WEBP"),
"ico" : b"\x00\x00\x01\x00", "svg" : lambda s: s[0:5] == b"<?xml",
"cur" : b"\x00\x00\x02\x00", "ico" : lambda s: s[0:4] == b"\x00\x00\x01\x00",
"psd" : b"8BPS", "cur" : lambda s: s[0:4] == b"\x00\x00\x02\x00",
"webm": b"\x1A\x45\xDF\xA3", "psd" : lambda s: s[0:4] == b"8BPS",
"ogg" : b"OggS", "webm": lambda s: s[0:4] == b"\x1A\x45\xDF\xA3",
"wav" : b"RIFF", "ogg" : lambda s: s[0:4] == b"OggS",
"mp3" : (b"\xFF\xFB", b"\xFF\xF3", b"\xFF\xF2", b"ID3"), "wav" : lambda s: (s[0:4] == b"RIFF" and
"zip" : (b"PK\x03\x04", b"PK\x05\x06", b"PK\x07\x08"), s[8:12] == b"WAVE"),
"rar" : b"\x52\x61\x72\x21\x1A\x07", "mp3" : lambda s: (s[0:3] == b"ID3" or
"7z" : b"\x37\x7A\xBC\xAF\x27\x1C", s[0:2] in (b"\xFF\xFB", b"\xFF\xF3", b"\xFF\xF2")),
"pdf" : b"%PDF-", "zip" : lambda s: s[0:4] in (b"PK\x03\x04", b"PK\x05\x06", b"PK\x07\x08"),
"swf" : (b"CWS", b"FWS"), "rar" : lambda s: s[0:6] == b"\x52\x61\x72\x21\x1A\x07",
"7z" : lambda s: s[0:6] == b"\x37\x7A\xBC\xAF\x27\x1C",
"pdf" : lambda s: s[0:5] == b"%PDF-",
"swf" : lambda s: s[0:3] in (b"CWS", b"FWS"),
# check 'bin' files against all other file signatures # check 'bin' files against all other file signatures
"bin" : b"\x00\x00\x00\x00\x00\x00\x00\x00", "bin" : lambda s: False,
} }
__downloader__ = HttpDownloader __downloader__ = HttpDownloader

Loading…
Cancel
Save