diff --git a/README.md b/README.md index fc6444f..50a833a 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,12 @@ media_storage_providers: access_key_id: secret_access_key: + # Server Side Encryption for Customer-provided keys + #sse_customer_key: + # Your SSE-C algorithm is very likely AES256 + # Default is AES256. + #sse_customer_algo: + # The object storage class used when uploading files to the bucket. # Default is STANDARD. #storage_class: "STANDARD_IA" diff --git a/s3_storage_provider.py b/s3_storage_provider.py index 68137ac..547f29b 100644 --- a/s3_storage_provider.py +++ b/s3_storage_provider.py @@ -62,7 +62,10 @@ class S3StorageProviderBackend(StorageProvider): def __init__(self, hs, config): self.cache_directory = hs.config.media.media_store_path self.bucket = config["bucket"] - self.storage_class = config["storage_class"] + # A dictionary of extra arguments for uploading files. + # See https://boto3.amazonaws.com/v1/documentation/api/latest/reference/customizations/s3.html#boto3.s3.transfer.S3Transfer.ALLOWED_UPLOAD_ARGS + # for a list of possible keys. + self.extra_args = config["extra_args"] self.api_kwargs = {} if "region_name" in config: @@ -118,11 +121,12 @@ class S3StorageProviderBackend(StorageProvider): def _store_file(): with LoggingContext(parent_context=parent_logcontext): + self._get_s3_client().upload_file( Filename=os.path.join(self.cache_directory, path), Bucket=self.bucket, Key=path, - ExtraArgs={"StorageClass": self.storage_class}, + ExtraArgs=self.extra_args, ) return make_deferred_yieldable( @@ -136,7 +140,9 @@ class S3StorageProviderBackend(StorageProvider): d = defer.Deferred() def _get_file(): - s3_download_task(self._get_s3_client(), self.bucket, path, d, logcontext) + s3_download_task( + self._get_s3_client(), self.bucket, path, self.extra_args, d, logcontext + ) self._s3_pool.callInThread(_get_file) return make_deferred_yieldable(d) @@ -158,7 +164,7 @@ class S3StorageProviderBackend(StorageProvider): result = { "bucket": bucket, - "storage_class": storage_class, + "extra_args": {"StorageClass": storage_class}, } if "region_name" in config: @@ -173,10 +179,16 @@ class S3StorageProviderBackend(StorageProvider): if "secret_access_key" in config: result["secret_access_key"] = config["secret_access_key"] + if "sse_customer_key" in config: + result["extra_args"]["SSECustomerKey"] = config["sse_customer_key"] + result["extra_args"]["SSECustomerAlgorithm"] = config.get( + "sse_customer_algo", "AES256" + ) + return result -def s3_download_task(s3_client, bucket, key, deferred, parent_logcontext): +def s3_download_task(s3_client, bucket, key, extra_args, deferred, parent_logcontext): """Attempts to download a file from S3. Args: @@ -193,7 +205,16 @@ def s3_download_task(s3_client, bucket, key, deferred, parent_logcontext): logger.info("Fetching %s from S3", key) try: - resp = s3_client.get_object(Bucket=bucket, Key=key) + if extra_args["SSECustomerKey"] and extra_args["SSECustomerAlgorithm"]: + resp = s3_client.get_object( + Bucket=bucket, + Key=key, + SSECustomerKey=extra_args["SSECustomerKey"], + SSECustomerAlgorithm=extra_args["SSECustomerAlgorithm"], + ) + else: + resp = s3_client.get_object(Bucket=bucket, Key=key) + except botocore.exceptions.ClientError as e: if e.response["Error"]["Code"] in ("404", "NoSuchKey",): logger.info("Media %s not found in S3", key) diff --git a/scripts/s3_media_upload b/scripts/s3_media_upload index 6be77e7..07e83be 100755 --- a/scripts/s3_media_upload +++ b/scripts/s3_media_upload @@ -167,11 +167,19 @@ def get_local_files(base_path, origin, filesystem_id, m_type): return local_files -def check_file_in_s3(s3, bucket, key): +def check_file_in_s3(s3, bucket, key, extra_args): """Check the file exists in S3 (though it could be different) """ try: - s3.head_object(Bucket=bucket, Key=key) + if extra_args["SSECustomerKey"] and extra_args["SSECustomerAlgorithm"]: + s3.head_object( + Bucket=bucket, + Key=key, + SSECustomerKey=extra_args["SSECustomerKey"], + SSECustomerAlgorithm=extra_args["SSECustomerAlgorithm"], + ) + else: + s3.head_object(Bucket=bucket, Key=key) except botocore.exceptions.ClientError as e: if int(e.response["Error"]["Code"]) == 404: return False @@ -294,7 +302,7 @@ def run_check_delete(sqlite_conn, base_path): print("Updated", len(deleted), "as deleted") -def run_upload(s3, bucket, sqlite_conn, base_path, should_delete, storage_class): +def run_upload(s3, bucket, sqlite_conn, base_path, extra_args, should_delete): """Entry point for upload command """ total = get_not_deleted_count(sqlite_conn) @@ -327,13 +335,10 @@ def run_upload(s3, bucket, sqlite_conn, base_path, should_delete, storage_class) for rel_file_path in local_files: local_path = os.path.join(base_path, rel_file_path) - if not check_file_in_s3(s3, bucket, rel_file_path): + if not check_file_in_s3(s3, bucket, rel_file_path, extra_args): try: s3.upload_file( - local_path, - bucket, - rel_file_path, - ExtraArgs={"StorageClass": storage_class}, + local_path, bucket, rel_file_path, ExtraArgs=extra_args, ) except Exception as e: print("Failed to upload file %s: %s", local_path, e) @@ -481,6 +486,7 @@ def main(): "base_path", help="Base path of the media store directory" ) upload_parser.add_argument("bucket", help="S3 bucket to upload to") + upload_parser.add_argument( "--storage-class", help="S3 storage class to use", @@ -495,6 +501,16 @@ def main(): default="STANDARD", ) + upload_parser.add_argument( + "--sse-customer-key", help="SSE-C key to use", + ) + + upload_parser.add_argument( + "--sse-customer-algo", + help="Algorithm for SSE-C, only used if sse-customer-key is also specified", + default="AES256", + ) + upload_parser.add_argument( "--delete", action="store_const", @@ -537,13 +553,22 @@ def main(): if args.cmd == "upload": sqlite_conn = get_sqlite_conn(parser) s3 = boto3.client("s3", endpoint_url=args.endpoint_url) + + extra_args = {"StorageClass": args.storage_class} + if args.sse_customer_key: + extra_args["SSECustomerKey"] = args.sse_customer_key + if args.sse_customer_algo: + extra_args["SSECustomerAlgorithm"] = args.sse_customer_algo + else: + extra_args["SSECustomerAlgorithm"] = "AES256" + run_upload( s3, args.bucket, sqlite_conn, args.base_path, + extra_args, should_delete=args.delete, - storage_class=args.storage_class, ) return diff --git a/setup.cfg b/setup.cfg index e590400..fa32a06 100644 --- a/setup.cfg +++ b/setup.cfg @@ -13,7 +13,8 @@ ignore = W503,E203,E731 # note that flake8 inherits the "ignore" settings from "pep8" (because it uses # pep8 to do those checks), but not the "max-line-length" setting max-line-length = 90 -ignore=W503,E203,E731 +# E501: line length is enforced by black; no need to duplicate in flake8 +ignore=W503,E203,E731,E501 [isort] line_length = 89