Merge pull request #84 from xangelix/sse-c

This commit is contained in:
reivilibre 2022-11-08 12:55:08 +00:00 committed by GitHub
commit 52be2ef613
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 69 additions and 16 deletions

View file

@ -28,6 +28,12 @@ media_storage_providers:
access_key_id: <S3_ACCESS_KEY_ID>
secret_access_key: <S3_SECRET_ACCESS_KEY>
# Server Side Encryption for Customer-provided keys
#sse_customer_key: <S3_SSEC_KEY>
# Your SSE-C algorithm is very likely AES256
# Default is AES256.
#sse_customer_algo: <S3_SSEC_ALGO>
# The object storage class used when uploading files to the bucket.
# Default is STANDARD.
#storage_class: "STANDARD_IA"

View file

@ -62,7 +62,10 @@ class S3StorageProviderBackend(StorageProvider):
def __init__(self, hs, config):
self.cache_directory = hs.config.media.media_store_path
self.bucket = config["bucket"]
self.storage_class = config["storage_class"]
# A dictionary of extra arguments for uploading files.
# See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/s3.html#boto3.s3.transfer.S3Transfer.ALLOWED_UPLOAD_ARGS
# for a list of possible keys.
self.extra_args = config["extra_args"]
self.api_kwargs = {}
if "region_name" in config:
@ -118,11 +121,12 @@ class S3StorageProviderBackend(StorageProvider):
def _store_file():
with LoggingContext(parent_context=parent_logcontext):
self._get_s3_client().upload_file(
Filename=os.path.join(self.cache_directory, path),
Bucket=self.bucket,
Key=path,
ExtraArgs={"StorageClass": self.storage_class},
ExtraArgs=self.extra_args,
)
return make_deferred_yieldable(
@ -136,7 +140,9 @@ class S3StorageProviderBackend(StorageProvider):
d = defer.Deferred()
def _get_file():
s3_download_task(self._get_s3_client(), self.bucket, path, d, logcontext)
s3_download_task(
self._get_s3_client(), self.bucket, path, self.extra_args, d, logcontext
)
self._s3_pool.callInThread(_get_file)
return make_deferred_yieldable(d)
@ -158,7 +164,7 @@ class S3StorageProviderBackend(StorageProvider):
result = {
"bucket": bucket,
"storage_class": storage_class,
"extra_args": {"StorageClass": storage_class},
}
if "region_name" in config:
@ -173,10 +179,16 @@ class S3StorageProviderBackend(StorageProvider):
if "secret_access_key" in config:
result["secret_access_key"] = config["secret_access_key"]
if "sse_customer_key" in config:
result["extra_args"]["SSECustomerKey"] = config["sse_customer_key"]
result["extra_args"]["SSECustomerAlgorithm"] = config.get(
"sse_customer_algo", "AES256"
)
return result
def s3_download_task(s3_client, bucket, key, deferred, parent_logcontext):
def s3_download_task(s3_client, bucket, key, extra_args, deferred, parent_logcontext):
"""Attempts to download a file from S3.
Args:
@ -193,7 +205,16 @@ def s3_download_task(s3_client, bucket, key, deferred, parent_logcontext):
logger.info("Fetching %s from S3", key)
try:
resp = s3_client.get_object(Bucket=bucket, Key=key)
if extra_args["SSECustomerKey"] and extra_args["SSECustomerAlgorithm"]:
resp = s3_client.get_object(
Bucket=bucket,
Key=key,
SSECustomerKey=extra_args["SSECustomerKey"],
SSECustomerAlgorithm=extra_args["SSECustomerAlgorithm"],
)
else:
resp = s3_client.get_object(Bucket=bucket, Key=key)
except botocore.exceptions.ClientError as e:
if e.response["Error"]["Code"] in ("404", "NoSuchKey",):
logger.info("Media %s not found in S3", key)

View file

@ -167,11 +167,19 @@ def get_local_files(base_path, origin, filesystem_id, m_type):
return local_files
def check_file_in_s3(s3, bucket, key):
def check_file_in_s3(s3, bucket, key, extra_args):
"""Check the file exists in S3 (though it could be different)
"""
try:
s3.head_object(Bucket=bucket, Key=key)
if extra_args["SSECustomerKey"] and extra_args["SSECustomerAlgorithm"]:
s3.head_object(
Bucket=bucket,
Key=key,
SSECustomerKey=extra_args["SSECustomerKey"],
SSECustomerAlgorithm=extra_args["SSECustomerAlgorithm"],
)
else:
s3.head_object(Bucket=bucket, Key=key)
except botocore.exceptions.ClientError as e:
if int(e.response["Error"]["Code"]) == 404:
return False
@ -294,7 +302,7 @@ def run_check_delete(sqlite_conn, base_path):
print("Updated", len(deleted), "as deleted")
def run_upload(s3, bucket, sqlite_conn, base_path, should_delete, storage_class):
def run_upload(s3, bucket, sqlite_conn, base_path, extra_args, should_delete):
"""Entry point for upload command
"""
total = get_not_deleted_count(sqlite_conn)
@ -327,13 +335,10 @@ def run_upload(s3, bucket, sqlite_conn, base_path, should_delete, storage_class)
for rel_file_path in local_files:
local_path = os.path.join(base_path, rel_file_path)
if not check_file_in_s3(s3, bucket, rel_file_path):
if not check_file_in_s3(s3, bucket, rel_file_path, extra_args):
try:
s3.upload_file(
local_path,
bucket,
rel_file_path,
ExtraArgs={"StorageClass": storage_class},
local_path, bucket, rel_file_path, ExtraArgs=extra_args,
)
except Exception as e:
print("Failed to upload file %s: %s", local_path, e)
@ -481,6 +486,7 @@ def main():
"base_path", help="Base path of the media store directory"
)
upload_parser.add_argument("bucket", help="S3 bucket to upload to")
upload_parser.add_argument(
"--storage-class",
help="S3 storage class to use",
@ -495,6 +501,16 @@ def main():
default="STANDARD",
)
upload_parser.add_argument(
"--sse-customer-key", help="SSE-C key to use",
)
upload_parser.add_argument(
"--sse-customer-algo",
help="Algorithm for SSE-C, only used if sse-customer-key is also specified",
default="AES256",
)
upload_parser.add_argument(
"--delete",
action="store_const",
@ -537,13 +553,22 @@ def main():
if args.cmd == "upload":
sqlite_conn = get_sqlite_conn(parser)
s3 = boto3.client("s3", endpoint_url=args.endpoint_url)
extra_args = {"StorageClass": args.storage_class}
if args.sse_customer_key:
extra_args["SSECustomerKey"] = args.sse_customer_key
if args.sse_customer_algo:
extra_args["SSECustomerAlgorithm"] = args.sse_customer_algo
else:
extra_args["SSECustomerAlgorithm"] = "AES256"
run_upload(
s3,
args.bucket,
sqlite_conn,
args.base_path,
extra_args,
should_delete=args.delete,
storage_class=args.storage_class,
)
return

View file

@ -13,7 +13,8 @@ ignore = W503,E203,E731
# note that flake8 inherits the "ignore" settings from "pep8" (because it uses
# pep8 to do those checks), but not the "max-line-length" setting
max-line-length = 90
ignore=W503,E203,E731
# E501: line length is enforced by black; no need to duplicate in flake8
ignore=W503,E203,E731,E501
[isort]
line_length = 89