如何使用boto3 Python API将大文件从AWS S3存储桶复制到另一个S3存储桶?如果使用client.copy(),则会抛出错误"An error occurred (InvalidArgument) when calling the UploadPartCopy operation: Range specified is not valid for source object of size:"。
def get_session_client():
# session = boto3.session.Session(profile_name="default")
session = boto3.session.Session()
client = session.client("s3")
return session, client
def copy_with_multipart(local_s3_client, src_bucket, target_bucket, key, object_size):
current_thread_name = get_current_thread_name()
try:
initiate_multipart = local_s3_client.create_multipart_upload(
Bucket=target_bucket,
Key=key
)
upload_id = initiate_multipart['UploadId']
# 5 MB part size
part_size = 5 * 1024 * 1024
byte_position = 0
part_num = 1
parts_etags = []
while (byte_position < object_size):
# The last part might be smaller than partSize, so check to make sure
# that lastByte isn't beyond the end of the object.
last_byte = min(byte_position + part_size - 1, object_size - 1)
copy_source_range = f"bytes={byte_position}-{last_byte}"
# Copy this part
try:
info_log(f"{current_thread_name} Creating upload_part_copy source_range: {copy_source_range}")
response = local_s3_client.upload_part_copy(
Bucket=target_bucket,
CopySource={'Bucket': src_bucket, 'Key': key},
CopySourceRange=copy_source_range,
Key=key,
PartNumber=part_num,
UploadId=upload_id
)
except Exception as ex:
error_log(f"{current_thread_name} Error while CREATING UPLOAD_PART_COPY for key {key}")
raise ex
parts_etags.append({"ETag": response["CopyPartResult"]["ETag"], "PartNumber": part_num})
part_num += 1
byte_position += part_size
try:
response = local_s3_client.complete_multipart_upload(
Bucket=target_bucket,
Key=key,
MultipartUpload={
'Parts': parts_etags
},
UploadId=upload_id
)
info_log(f"{current_thread_name} {key} COMPLETE_MULTIPART_UPLOAD COMPLETED SUCCESSFULLY, response={response} !!!!")
except Exception as ex:
error_log(f"{current_thread_name} Error while CREATING COMPLETE_MULTIPART_UPLOAD for key {key}")
raise ex
except Exception as ex:
error_log(f"{current_thread_name} Error while CREATING CREATE_MULTIPART_UPLOAD for key {key}")
raise ex
调用多部分方法:
_, local_s3_client = get_session_client()
copy_with_multipart(local_s3_client, src_bucket_name, target_bucket_name, key, src_object_size)