aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGeorge Claghorn <george@basecamp.com>2019-05-16 10:58:33 -0400
committerGitHub <noreply@github.com>2019-05-16 10:58:33 -0400
commit9c5135ce6a10c8318e25a587620c8cde4563f348 (patch)
tree11e6ed9800b9bf401e02515607d1861092e31c78
parentc8396e030f4b1edd48340975696dcaf6aa8291c9 (diff)
downloadrails-9c5135ce6a10c8318e25a587620c8cde4563f348.tar.gz
rails-9c5135ce6a10c8318e25a587620c8cde4563f348.tar.bz2
rails-9c5135ce6a10c8318e25a587620c8cde4563f348.zip
S3: permit uploading files larger than 5 GB
Use multipart uploads for files larger than 100 MB. Dynamically calculate part size based on total object size and maximum part count.
-rw-r--r--activestorage/CHANGELOG.md21
-rw-r--r--activestorage/lib/active_storage/service/s3_service.rb32
-rw-r--r--activestorage/test/service/s3_service_test.rb22
3 files changed, 69 insertions, 6 deletions
diff --git a/activestorage/CHANGELOG.md b/activestorage/CHANGELOG.md
index 957591ec0a..956567e08a 100644
--- a/activestorage/CHANGELOG.md
+++ b/activestorage/CHANGELOG.md
@@ -1,3 +1,24 @@
+* The S3 service now permits uploading files larger than 5 gigabytes.
+ When uploading a file greater than 100 megabytes in size, the service
+ transparently switches to [multipart uploads](https://docs.aws.amazon.com/AmazonS3/latest/dev/mpuoverview.html)
+ using a part size computed from the file's total size and S3's part count limit.
+
+ No application changes are necessary to take advantage of this feature. You
+ can customize the default 100 MB multipart upload threshold in your S3
+ service's configuration:
+
+ ```yaml
+ production:
+ service: s3
+ access_key_id: <%= Rails.application.credentials.dig(:aws, :access_key_id) %>
+ secret_access_key: <%= Rails.application.credentials.dig(:aws, :secret_access_key) %>
+ region: us-east-1
+ bucket: my-bucket
+ upload:
+ multipart_threshold: <%= 250.megabytes %>
+ ```
+
+ *George Claghorn*
Please check [6-0-stable](https://github.com/rails/rails/blob/6-0-stable/activestorage/CHANGELOG.md) for previous changes.
diff --git a/activestorage/lib/active_storage/service/s3_service.rb b/activestorage/lib/active_storage/service/s3_service.rb
index c7e4ec96a2..e4bd57048a 100644
--- a/activestorage/lib/active_storage/service/s3_service.rb
+++ b/activestorage/lib/active_storage/service/s3_service.rb
@@ -1,5 +1,7 @@
# frozen_string_literal: true
+gem "aws-sdk-s3", "~> 1.14"
+
require "aws-sdk-s3"
require "active_support/core_ext/numeric/bytes"
@@ -7,20 +9,24 @@ module ActiveStorage
# Wraps the Amazon Simple Storage Service (S3) as an Active Storage service.
# See ActiveStorage::Service for the generic API documentation that applies to all services.
class Service::S3Service < Service
- attr_reader :client, :bucket, :upload_options
+ attr_reader :client, :bucket
+ attr_reader :multipart_upload_threshold, :upload_options
def initialize(bucket:, upload: {}, **options)
@client = Aws::S3::Resource.new(**options)
@bucket = @client.bucket(bucket)
+ @multipart_upload_threshold = upload.fetch(:multipart_threshold, 100.megabytes)
@upload_options = upload
end
def upload(key, io, checksum: nil, content_type: nil, **)
instrument :upload, key: key, checksum: checksum do
- object_for(key).put(upload_options.merge(body: io, content_md5: checksum, content_type: content_type))
- rescue Aws::S3::Errors::BadDigest
- raise ActiveStorage::IntegrityError
+ if io.size < multipart_upload_threshold
+ upload_with_single_part key, io, checksum: checksum, content_type: content_type
+ else
+ upload_with_multipart key, io, content_type: content_type
+ end
end
end
@@ -94,6 +100,24 @@ module ActiveStorage
end
private
+ MAXIMUM_UPLOAD_PARTS_COUNT = 10000
+ MINIMUM_UPLOAD_PART_SIZE = 5.megabytes
+
+ def upload_with_single_part(key, io, checksum: nil, content_type: nil)
+ object_for(key).put(body: io, content_md5: checksum, content_type: content_type, **upload_options)
+ rescue Aws::S3::Errors::BadDigest
+ raise ActiveStorage::IntegrityError
+ end
+
+ def upload_with_multipart(key, io, content_type: nil)
+ part_size = [ io.size.fdiv(MAXIMUM_UPLOAD_PARTS_COUNT).ceil, MINIMUM_UPLOAD_PART_SIZE ].max
+
+ object_for(key).upload_stream(content_type: content_type, part_size: part_size, **upload_options) do |out|
+ IO.copy_stream(io, out)
+ end
+ end
+
+
def object_for(key)
bucket.object(key)
end
diff --git a/activestorage/test/service/s3_service_test.rb b/activestorage/test/service/s3_service_test.rb
index 74c0aa0405..b9120770e6 100644
--- a/activestorage/test/service/s3_service_test.rb
+++ b/activestorage/test/service/s3_service_test.rb
@@ -46,8 +46,7 @@ if SERVICE_CONFIGURATIONS[:s3]
end
test "uploading with server-side encryption" do
- config = SERVICE_CONFIGURATIONS.deep_merge(s3: { upload: { server_side_encryption: "AES256" } })
- service = ActiveStorage::Service.configure(:s3, config)
+ service = build_service(upload: { server_side_encryption: "AES256" })
begin
key = SecureRandom.base58(24)
@@ -77,6 +76,25 @@ if SERVICE_CONFIGURATIONS[:s3]
ensure
@service.delete key
end
+
+ test "uploading a large object in multiple parts" do
+ service = build_service(upload: { multipart_threshold: 5.megabytes })
+
+ begin
+ key = SecureRandom.base58(24)
+ data = SecureRandom.bytes(8.megabytes)
+
+ service.upload key, StringIO.new(data), checksum: Digest::MD5.base64digest(data)
+ assert data == service.download(key)
+ ensure
+ service.delete key
+ end
+ end
+
+ private
+ def build_service(configuration)
+ ActiveStorage::Service.configure :s3, SERVICE_CONFIGURATIONS.deep_merge(s3: configuration)
+ end
end
else
puts "Skipping S3 Service tests because no S3 configuration was supplied"