[downloader:http] refactor file signature checks

use functions/lambdas instead of startswith()
pull/3155/head
Mike Fährmann 2 years ago
parent 86790da2d5
commit f687e64513
No known key found for this signature in database
GPG Key ID: 5680CA389D365A88

@ -189,7 +189,7 @@ class HttpDownloader(DownloaderBase):
# check filename extension against file header
if adjust_extension and not offset and \
pathfmt.extension in FILE_SIGNATURES:
pathfmt.extension in SIGNATURE_CHECKS:
try:
file_header = next(
content if response.raw.chunked
@ -220,7 +220,7 @@ class HttpDownloader(DownloaderBase):
offset += len(file_header)
elif offset:
if adjust_extension and \
pathfmt.extension in FILE_SIGNATURES:
pathfmt.extension in SIGNATURE_CHECKS:
self._adjust_extension(pathfmt, fp.read(16))
fp.seek(offset)
@ -308,10 +308,9 @@ class HttpDownloader(DownloaderBase):
@staticmethod
def _adjust_extension(pathfmt, file_header):
"""Check filename extension against file header"""
sig = FILE_SIGNATURES[pathfmt.extension]
if not file_header.startswith(sig):
for ext, sig in FILE_SIGNATURES.items():
if file_header.startswith(sig):
if not SIGNATURE_CHECKS[pathfmt.extension](file_header):
for ext, check in SIGNATURE_CHECKS.items():
if check(file_header):
pathfmt.set_extension(ext)
return True
return False
@ -362,27 +361,30 @@ MIME_TYPES = {
}
# https://en.wikipedia.org/wiki/List_of_file_signatures
FILE_SIGNATURES = {
"jpg" : b"\xFF\xD8\xFF",
"png" : b"\x89PNG\r\n\x1A\n",
"gif" : (b"GIF87a", b"GIF89a"),
"bmp" : b"BM",
"webp": b"RIFF",
"svg" : b"<?xml",
"ico" : b"\x00\x00\x01\x00",
"cur" : b"\x00\x00\x02\x00",
"psd" : b"8BPS",
"webm": b"\x1A\x45\xDF\xA3",
"ogg" : b"OggS",
"wav" : b"RIFF",
"mp3" : (b"\xFF\xFB", b"\xFF\xF3", b"\xFF\xF2", b"ID3"),
"zip" : (b"PK\x03\x04", b"PK\x05\x06", b"PK\x07\x08"),
"rar" : b"\x52\x61\x72\x21\x1A\x07",
"7z" : b"\x37\x7A\xBC\xAF\x27\x1C",
"pdf" : b"%PDF-",
"swf" : (b"CWS", b"FWS"),
SIGNATURE_CHECKS = {
"jpg" : lambda s: s[0:3] == b"\xFF\xD8\xFF",
"png" : lambda s: s[0:8] == b"\x89PNG\r\n\x1A\n",
"gif" : lambda s: s[0:6] in (b"GIF87a", b"GIF89a"),
"bmp" : lambda s: s[0:2] == b"BM",
"webp": lambda s: (s[0:4] == b"RIFF" and
s[8:12] == b"WEBP"),
"svg" : lambda s: s[0:5] == b"<?xml",
"ico" : lambda s: s[0:4] == b"\x00\x00\x01\x00",
"cur" : lambda s: s[0:4] == b"\x00\x00\x02\x00",
"psd" : lambda s: s[0:4] == b"8BPS",
"webm": lambda s: s[0:4] == b"\x1A\x45\xDF\xA3",
"ogg" : lambda s: s[0:4] == b"OggS",
"wav" : lambda s: (s[0:4] == b"RIFF" and
s[8:12] == b"WAVE"),
"mp3" : lambda s: (s[0:3] == b"ID3" or
s[0:2] in (b"\xFF\xFB", b"\xFF\xF3", b"\xFF\xF2")),
"zip" : lambda s: s[0:4] in (b"PK\x03\x04", b"PK\x05\x06", b"PK\x07\x08"),
"rar" : lambda s: s[0:6] == b"\x52\x61\x72\x21\x1A\x07",
"7z" : lambda s: s[0:6] == b"\x37\x7A\xBC\xAF\x27\x1C",
"pdf" : lambda s: s[0:5] == b"%PDF-",
"swf" : lambda s: s[0:3] in (b"CWS", b"FWS"),
# check 'bin' files against all other file signatures
"bin" : b"\x00\x00\x00\x00\x00\x00\x00\x00",
"bin" : lambda s: False,
}
__downloader__ = HttpDownloader

Loading…
Cancel
Save