|
|
@ -189,7 +189,7 @@ class HttpDownloader(DownloaderBase):
|
|
|
|
|
|
|
|
|
|
|
|
# check filename extension against file header
|
|
|
|
# check filename extension against file header
|
|
|
|
if adjust_extension and not offset and \
|
|
|
|
if adjust_extension and not offset and \
|
|
|
|
pathfmt.extension in FILE_SIGNATURES:
|
|
|
|
pathfmt.extension in SIGNATURE_CHECKS:
|
|
|
|
try:
|
|
|
|
try:
|
|
|
|
file_header = next(
|
|
|
|
file_header = next(
|
|
|
|
content if response.raw.chunked
|
|
|
|
content if response.raw.chunked
|
|
|
@ -220,7 +220,7 @@ class HttpDownloader(DownloaderBase):
|
|
|
|
offset += len(file_header)
|
|
|
|
offset += len(file_header)
|
|
|
|
elif offset:
|
|
|
|
elif offset:
|
|
|
|
if adjust_extension and \
|
|
|
|
if adjust_extension and \
|
|
|
|
pathfmt.extension in FILE_SIGNATURES:
|
|
|
|
pathfmt.extension in SIGNATURE_CHECKS:
|
|
|
|
self._adjust_extension(pathfmt, fp.read(16))
|
|
|
|
self._adjust_extension(pathfmt, fp.read(16))
|
|
|
|
fp.seek(offset)
|
|
|
|
fp.seek(offset)
|
|
|
|
|
|
|
|
|
|
|
@ -308,10 +308,9 @@ class HttpDownloader(DownloaderBase):
|
|
|
|
@staticmethod
|
|
|
|
@staticmethod
|
|
|
|
def _adjust_extension(pathfmt, file_header):
|
|
|
|
def _adjust_extension(pathfmt, file_header):
|
|
|
|
"""Check filename extension against file header"""
|
|
|
|
"""Check filename extension against file header"""
|
|
|
|
sig = FILE_SIGNATURES[pathfmt.extension]
|
|
|
|
if not SIGNATURE_CHECKS[pathfmt.extension](file_header):
|
|
|
|
if not file_header.startswith(sig):
|
|
|
|
for ext, check in SIGNATURE_CHECKS.items():
|
|
|
|
for ext, sig in FILE_SIGNATURES.items():
|
|
|
|
if check(file_header):
|
|
|
|
if file_header.startswith(sig):
|
|
|
|
|
|
|
|
pathfmt.set_extension(ext)
|
|
|
|
pathfmt.set_extension(ext)
|
|
|
|
return True
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
return False
|
|
|
@ -362,27 +361,30 @@ MIME_TYPES = {
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
# https://en.wikipedia.org/wiki/List_of_file_signatures
|
|
|
|
# https://en.wikipedia.org/wiki/List_of_file_signatures
|
|
|
|
FILE_SIGNATURES = {
|
|
|
|
SIGNATURE_CHECKS = {
|
|
|
|
"jpg" : b"\xFF\xD8\xFF",
|
|
|
|
"jpg" : lambda s: s[0:3] == b"\xFF\xD8\xFF",
|
|
|
|
"png" : b"\x89PNG\r\n\x1A\n",
|
|
|
|
"png" : lambda s: s[0:8] == b"\x89PNG\r\n\x1A\n",
|
|
|
|
"gif" : (b"GIF87a", b"GIF89a"),
|
|
|
|
"gif" : lambda s: s[0:6] in (b"GIF87a", b"GIF89a"),
|
|
|
|
"bmp" : b"BM",
|
|
|
|
"bmp" : lambda s: s[0:2] == b"BM",
|
|
|
|
"webp": b"RIFF",
|
|
|
|
"webp": lambda s: (s[0:4] == b"RIFF" and
|
|
|
|
"svg" : b"<?xml",
|
|
|
|
s[8:12] == b"WEBP"),
|
|
|
|
"ico" : b"\x00\x00\x01\x00",
|
|
|
|
"svg" : lambda s: s[0:5] == b"<?xml",
|
|
|
|
"cur" : b"\x00\x00\x02\x00",
|
|
|
|
"ico" : lambda s: s[0:4] == b"\x00\x00\x01\x00",
|
|
|
|
"psd" : b"8BPS",
|
|
|
|
"cur" : lambda s: s[0:4] == b"\x00\x00\x02\x00",
|
|
|
|
"webm": b"\x1A\x45\xDF\xA3",
|
|
|
|
"psd" : lambda s: s[0:4] == b"8BPS",
|
|
|
|
"ogg" : b"OggS",
|
|
|
|
"webm": lambda s: s[0:4] == b"\x1A\x45\xDF\xA3",
|
|
|
|
"wav" : b"RIFF",
|
|
|
|
"ogg" : lambda s: s[0:4] == b"OggS",
|
|
|
|
"mp3" : (b"\xFF\xFB", b"\xFF\xF3", b"\xFF\xF2", b"ID3"),
|
|
|
|
"wav" : lambda s: (s[0:4] == b"RIFF" and
|
|
|
|
"zip" : (b"PK\x03\x04", b"PK\x05\x06", b"PK\x07\x08"),
|
|
|
|
s[8:12] == b"WAVE"),
|
|
|
|
"rar" : b"\x52\x61\x72\x21\x1A\x07",
|
|
|
|
"mp3" : lambda s: (s[0:3] == b"ID3" or
|
|
|
|
"7z" : b"\x37\x7A\xBC\xAF\x27\x1C",
|
|
|
|
s[0:2] in (b"\xFF\xFB", b"\xFF\xF3", b"\xFF\xF2")),
|
|
|
|
"pdf" : b"%PDF-",
|
|
|
|
"zip" : lambda s: s[0:4] in (b"PK\x03\x04", b"PK\x05\x06", b"PK\x07\x08"),
|
|
|
|
"swf" : (b"CWS", b"FWS"),
|
|
|
|
"rar" : lambda s: s[0:6] == b"\x52\x61\x72\x21\x1A\x07",
|
|
|
|
|
|
|
|
"7z" : lambda s: s[0:6] == b"\x37\x7A\xBC\xAF\x27\x1C",
|
|
|
|
|
|
|
|
"pdf" : lambda s: s[0:5] == b"%PDF-",
|
|
|
|
|
|
|
|
"swf" : lambda s: s[0:3] in (b"CWS", b"FWS"),
|
|
|
|
# check 'bin' files against all other file signatures
|
|
|
|
# check 'bin' files against all other file signatures
|
|
|
|
"bin" : b"\x00\x00\x00\x00\x00\x00\x00\x00",
|
|
|
|
"bin" : lambda s: False,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
__downloader__ = HttpDownloader
|
|
|
|
__downloader__ = HttpDownloader
|
|
|
|