From 9c5135ce6a10c8318e25a587620c8cde4563f348 Mon Sep 17 00:00:00 2001 From: George Claghorn Date: Thu, 16 May 2019 10:58:33 -0400 Subject: S3: permit uploading files larger than 5 GB Use multipart uploads for files larger than 100 MB. Dynamically calculate part size based on total object size and maximum part count. --- activestorage/CHANGELOG.md | 21 ++++++++++++++ .../lib/active_storage/service/s3_service.rb | 32 +++++++++++++++++++--- activestorage/test/service/s3_service_test.rb | 22 +++++++++++++-- 3 files changed, 69 insertions(+), 6 deletions(-) (limited to 'activestorage') diff --git a/activestorage/CHANGELOG.md b/activestorage/CHANGELOG.md index 957591ec0a..956567e08a 100644 --- a/activestorage/CHANGELOG.md +++ b/activestorage/CHANGELOG.md @@ -1,3 +1,24 @@ +* The S3 service now permits uploading files larger than 5 gigabytes. + When uploading a file greater than 100 megabytes in size, the service + transparently switches to [multipart uploads](https://docs.aws.amazon.com/AmazonS3/latest/dev/mpuoverview.html) + using a part size computed from the file's total size and S3's part count limit. + + No application changes are necessary to take advantage of this feature. You + can customize the default 100 MB multipart upload threshold in your S3 + service's configuration: + + ```yaml + production: + service: s3 + access_key_id: <%= Rails.application.credentials.dig(:aws, :access_key_id) %> + secret_access_key: <%= Rails.application.credentials.dig(:aws, :secret_access_key) %> + region: us-east-1 + bucket: my-bucket + upload: + multipart_threshold: <%= 250.megabytes %> + ``` + + *George Claghorn* Please check [6-0-stable](https://github.com/rails/rails/blob/6-0-stable/activestorage/CHANGELOG.md) for previous changes. diff --git a/activestorage/lib/active_storage/service/s3_service.rb b/activestorage/lib/active_storage/service/s3_service.rb index c7e4ec96a2..e4bd57048a 100644 --- a/activestorage/lib/active_storage/service/s3_service.rb +++ b/activestorage/lib/active_storage/service/s3_service.rb @@ -1,5 +1,7 @@ # frozen_string_literal: true +gem "aws-sdk-s3", "~> 1.14" + require "aws-sdk-s3" require "active_support/core_ext/numeric/bytes" @@ -7,20 +9,24 @@ module ActiveStorage # Wraps the Amazon Simple Storage Service (S3) as an Active Storage service. # See ActiveStorage::Service for the generic API documentation that applies to all services. class Service::S3Service < Service - attr_reader :client, :bucket, :upload_options + attr_reader :client, :bucket + attr_reader :multipart_upload_threshold, :upload_options def initialize(bucket:, upload: {}, **options) @client = Aws::S3::Resource.new(**options) @bucket = @client.bucket(bucket) + @multipart_upload_threshold = upload.fetch(:multipart_threshold, 100.megabytes) @upload_options = upload end def upload(key, io, checksum: nil, content_type: nil, **) instrument :upload, key: key, checksum: checksum do - object_for(key).put(upload_options.merge(body: io, content_md5: checksum, content_type: content_type)) - rescue Aws::S3::Errors::BadDigest - raise ActiveStorage::IntegrityError + if io.size < multipart_upload_threshold + upload_with_single_part key, io, checksum: checksum, content_type: content_type + else + upload_with_multipart key, io, content_type: content_type + end end end @@ -94,6 +100,24 @@ module ActiveStorage end private + MAXIMUM_UPLOAD_PARTS_COUNT = 10000 + MINIMUM_UPLOAD_PART_SIZE = 5.megabytes + + def upload_with_single_part(key, io, checksum: nil, content_type: nil) + object_for(key).put(body: io, content_md5: checksum, content_type: content_type, **upload_options) + rescue Aws::S3::Errors::BadDigest + raise ActiveStorage::IntegrityError + end + + def upload_with_multipart(key, io, content_type: nil) + part_size = [ io.size.fdiv(MAXIMUM_UPLOAD_PARTS_COUNT).ceil, MINIMUM_UPLOAD_PART_SIZE ].max + + object_for(key).upload_stream(content_type: content_type, part_size: part_size, **upload_options) do |out| + IO.copy_stream(io, out) + end + end + + def object_for(key) bucket.object(key) end diff --git a/activestorage/test/service/s3_service_test.rb b/activestorage/test/service/s3_service_test.rb index 74c0aa0405..b9120770e6 100644 --- a/activestorage/test/service/s3_service_test.rb +++ b/activestorage/test/service/s3_service_test.rb @@ -46,8 +46,7 @@ if SERVICE_CONFIGURATIONS[:s3] end test "uploading with server-side encryption" do - config = SERVICE_CONFIGURATIONS.deep_merge(s3: { upload: { server_side_encryption: "AES256" } }) - service = ActiveStorage::Service.configure(:s3, config) + service = build_service(upload: { server_side_encryption: "AES256" }) begin key = SecureRandom.base58(24) @@ -77,6 +76,25 @@ if SERVICE_CONFIGURATIONS[:s3] ensure @service.delete key end + + test "uploading a large object in multiple parts" do + service = build_service(upload: { multipart_threshold: 5.megabytes }) + + begin + key = SecureRandom.base58(24) + data = SecureRandom.bytes(8.megabytes) + + service.upload key, StringIO.new(data), checksum: Digest::MD5.base64digest(data) + assert data == service.download(key) + ensure + service.delete key + end + end + + private + def build_service(configuration) + ActiveStorage::Service.configure :s3, SERVICE_CONFIGURATIONS.deep_merge(s3: configuration) + end end else puts "Skipping S3 Service tests because no S3 configuration was supplied" -- cgit v1.2.3