From c2ba530c43244b5b60fd629f61cd8b44c43ecda9 Mon Sep 17 00:00:00 2001 From: George Claghorn Date: Mon, 15 Jan 2018 13:06:17 -0500 Subject: Extract content types from blob data --- Gemfile | 1 + Gemfile.lock | 13 ++++++++ activestorage/activestorage.gemspec | 4 +++ .../app/models/active_storage/attachment.rb | 6 +++- activestorage/app/models/active_storage/blob.rb | 14 +++++--- .../app/models/active_storage/blob/identifiable.rb | 11 +++++++ .../app/models/active_storage/identification.rb | 38 ++++++++++++++++++++++ activestorage/lib/active_storage.rb | 2 ++ .../lib/active_storage/service/disk_service.rb | 28 +++++++--------- activestorage/test/analyzer/video_analyzer_test.rb | 2 +- activestorage/test/dummy/config/storage.yml | 1 + activestorage/test/models/attachments_test.rb | 25 +++++++++++++- activestorage/test/models/blob_test.rb | 12 ++++++- activestorage/test/models/variant_test.rb | 2 +- activestorage/test/service/configurator_test.rb | 5 ++- activestorage/test/service/disk_service_test.rb | 2 +- activestorage/test/service/mirror_service_test.rb | 7 ++-- activestorage/test/test_helper.rb | 5 ++- guides/source/active_storage_overview.md | 3 ++ .../rails/app/templates/config/storage.yml.tt | 2 ++ 20 files changed, 151 insertions(+), 32 deletions(-) create mode 100644 activestorage/app/models/active_storage/blob/identifiable.rb create mode 100644 activestorage/app/models/active_storage/identification.rb diff --git a/Gemfile b/Gemfile index 674edaa773..1bc9863cc4 100644 --- a/Gemfile +++ b/Gemfile @@ -19,6 +19,7 @@ gem "rack-cache", "~> 1.2" gem "coffee-rails" gem "sass-rails" gem "turbolinks", "~> 5" +gem "webmock" # require: false so bcrypt is loaded only when has_secure_password is used. # This is to avoid Active Model (and by extension the entire framework) diff --git a/Gemfile.lock b/Gemfile.lock index 6e07fdc3fe..8c9b1c7388 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -69,6 +69,7 @@ PATH activestorage (5.2.0.beta2) actionpack (= 5.2.0.beta2) activerecord (= 5.2.0.beta2) + marcel (~> 0.3.1) activesupport (5.2.0.beta2) concurrent-ruby (~> 1.0, >= 1.0.2) i18n (~> 0.7) @@ -194,6 +195,8 @@ GEM concurrent-ruby (1.0.5-java) connection_pool (2.2.1) cookiejar (0.3.3) + crack (0.4.3) + safe_yaml (~> 1.0.0) crass (1.0.3) curses (1.0.2) daemons (1.2.4) @@ -266,6 +269,7 @@ GEM multi_json (~> 1.11) os (~> 0.9) signet (~> 0.7) + hashdiff (0.3.7) hiredis (0.6.1) hiredis (0.6.1-java) http_parser.rb (0.6.0) @@ -297,12 +301,15 @@ GEM nokogiri (>= 1.5.9) mail (2.7.0) mini_mime (>= 0.1.1) + marcel (0.3.1) + mimemagic (~> 0.3.2) memoist (0.16.0) metaclass (0.0.4) method_source (0.9.0) mime-types (3.1) mime-types-data (~> 3.2015) mime-types-data (3.2016.0521) + mimemagic (0.3.2) mini_magick (4.8.0) mini_mime (0.1.4) mini_portile2 (2.3.0) @@ -402,6 +409,7 @@ GEM rubyzip (1.2.1) rufus-scheduler (3.4.2) et-orbi (~> 1.0) + safe_yaml (1.0.4) sass (3.5.3) sass-listen (~> 4.0.0) sass-listen (4.0.0) @@ -481,6 +489,10 @@ GEM json (>= 1.8) nokogiri (~> 1.6) wdm (0.1.1) + webmock (3.2.1) + addressable (>= 2.3.6) + crack (>= 0.3.2) + hashdiff websocket (1.2.4) websocket-driver (0.6.5) websocket-extensions (>= 0.1.0) @@ -557,6 +569,7 @@ DEPENDENCIES uglifier (>= 1.3.0) w3c_validators wdm (>= 0.1.0) + webmock websocket-client-simple! BUNDLED WITH diff --git a/activestorage/activestorage.gemspec b/activestorage/activestorage.gemspec index 7f7f1a26ac..d135324700 100644 --- a/activestorage/activestorage.gemspec +++ b/activestorage/activestorage.gemspec @@ -27,4 +27,8 @@ Gem::Specification.new do |s| s.add_dependency "actionpack", version s.add_dependency "activerecord", version + + s.add_dependency "marcel", "~> 0.3.1" + + s.add_development_dependency "webmock", "~> 3.2.1" end diff --git a/activestorage/app/models/active_storage/attachment.rb b/activestorage/app/models/active_storage/attachment.rb index 9f61a5dbf3..19f48c57d6 100644 --- a/activestorage/app/models/active_storage/attachment.rb +++ b/activestorage/app/models/active_storage/attachment.rb @@ -14,7 +14,7 @@ class ActiveStorage::Attachment < ActiveRecord::Base delegate_missing_to :blob - after_create_commit :analyze_blob_later + after_create_commit :identify_blob, :analyze_blob_later # Synchronously purges the blob (deletes it from the configured service) and destroys the attachment. def purge @@ -29,6 +29,10 @@ class ActiveStorage::Attachment < ActiveRecord::Base end private + def identify_blob + blob.identify + end + def analyze_blob_later blob.analyze_later unless blob.analyzed? end diff --git a/activestorage/app/models/active_storage/blob.rb b/activestorage/app/models/active_storage/blob.rb index 2ac09d3062..7067c58259 100644 --- a/activestorage/app/models/active_storage/blob.rb +++ b/activestorage/app/models/active_storage/blob.rb @@ -14,12 +14,12 @@ # update a blob's metadata on a subsequent pass, but you should not update the key or change the uploaded file. # If you need to create a derivative or otherwise change the blob, simply create a new blob and purge the old one. class ActiveStorage::Blob < ActiveRecord::Base - include Analyzable, Representable + include Analyzable, Identifiable, Representable self.table_name = "active_storage_blobs" has_secure_token :key - store :metadata, accessors: [ :analyzed ], coder: JSON + store :metadata, accessors: [ :analyzed, :identified ], coder: JSON class_attribute :service @@ -136,8 +136,10 @@ class ActiveStorage::Blob < ActiveRecord::Base # Normally, you do not have to call this method directly at all. Use the factory class methods of +build_after_upload+ # and +create_after_upload!+. def upload(io) - self.checksum = compute_checksum_in_chunks(io) - self.byte_size = io.size + self.checksum = compute_checksum_in_chunks(io) + self.content_type = extract_content_type(io) + self.byte_size = io.size + self.identified = true service.upload(key, io, checksum: checksum) end @@ -182,6 +184,10 @@ class ActiveStorage::Blob < ActiveRecord::Base end.base64digest end + def extract_content_type(io) + Marcel::MimeType.for io, name: filename.to_s, declared_type: content_type + end + def forcibly_serve_as_binary? ActiveStorage.content_types_to_serve_as_binary.include?(content_type) end diff --git a/activestorage/app/models/active_storage/blob/identifiable.rb b/activestorage/app/models/active_storage/blob/identifiable.rb new file mode 100644 index 0000000000..40ca84ac70 --- /dev/null +++ b/activestorage/app/models/active_storage/blob/identifiable.rb @@ -0,0 +1,11 @@ +# frozen_string_literal: true + +module ActiveStorage::Blob::Identifiable + def identify + ActiveStorage::Identification.new(self).apply + end + + def identified? + identified + end +end diff --git a/activestorage/app/models/active_storage/identification.rb b/activestorage/app/models/active_storage/identification.rb new file mode 100644 index 0000000000..4f295257ae --- /dev/null +++ b/activestorage/app/models/active_storage/identification.rb @@ -0,0 +1,38 @@ +# frozen_string_literal: true + +class ActiveStorage::Identification + attr_reader :blob + + def initialize(blob) + @blob = blob + end + + def apply + blob.update!(content_type: content_type, identified: true) unless blob.identified? + end + + private + def content_type + Marcel::MimeType.for(identifiable_chunk, name: filename, declared_type: declared_content_type) + end + + + def identifiable_chunk + Net::HTTP.start(uri.host, uri.port, use_ssl: uri.scheme == "https") do |client| + client.get(uri, "Range" => "0-4096").body + end + end + + def uri + @uri ||= URI.parse(blob.service_url) + end + + + def filename + blob.filename.to_s + end + + def declared_content_type + blob.content_type + end +end diff --git a/activestorage/lib/active_storage.rb b/activestorage/lib/active_storage.rb index 02442cfbe1..e1bd974853 100644 --- a/activestorage/lib/active_storage.rb +++ b/activestorage/lib/active_storage.rb @@ -30,6 +30,8 @@ require "active_support/rails" require "active_storage/version" require "active_storage/errors" +require "marcel" + module ActiveStorage extend ActiveSupport::Autoload diff --git a/activestorage/lib/active_storage/service/disk_service.rb b/activestorage/lib/active_storage/service/disk_service.rb index a8728c5bc3..27769db584 100644 --- a/activestorage/lib/active_storage/service/disk_service.rb +++ b/activestorage/lib/active_storage/service/disk_service.rb @@ -9,10 +9,10 @@ module ActiveStorage # Wraps a local disk path as an Active Storage service. See ActiveStorage::Service for the generic API # documentation that applies to all services. class Service::DiskService < Service - attr_reader :root + attr_reader :root, :host - def initialize(root:) - @root = root + def initialize(root:, host:) + @root, @host = root, host end def upload(key, io, checksum: nil) @@ -69,14 +69,13 @@ module ActiveStorage verified_key_with_expiration = ActiveStorage.verifier.generate(key, expires_in: expires_in, purpose: :blob_key) generated_url = - if defined?(Rails.application) - Rails.application.routes.url_helpers.rails_disk_service_path \ - verified_key_with_expiration, - filename: filename, disposition: content_disposition_with(type: disposition, filename: filename), content_type: content_type - else - "/rails/active_storage/disk/#{verified_key_with_expiration}/#{filename}?content_type=#{content_type}" \ - "&disposition=#{content_disposition_with(type: disposition, filename: filename)}" - end + Rails.application.routes.url_helpers.rails_disk_service_url( + verified_key_with_expiration, + filename: filename, + disposition: content_disposition_with(type: disposition, filename: filename), + content_type: content_type, + host: host + ) payload[:url] = generated_url @@ -97,12 +96,7 @@ module ActiveStorage purpose: :blob_token } ) - generated_url = - if defined?(Rails.application) - Rails.application.routes.url_helpers.update_rails_disk_service_path verified_token_with_expiration - else - "/rails/active_storage/disk/#{verified_token_with_expiration}" - end + generated_url = Rails.application.routes.url_helpers.update_rails_disk_service_url(verified_token_with_expiration, host: host) payload[:url] = generated_url diff --git a/activestorage/test/analyzer/video_analyzer_test.rb b/activestorage/test/analyzer/video_analyzer_test.rb index b3b9c97fe4..7fc1cfce06 100644 --- a/activestorage/test/analyzer/video_analyzer_test.rb +++ b/activestorage/test/analyzer/video_analyzer_test.rb @@ -30,6 +30,6 @@ class ActiveStorage::Analyzer::VideoAnalyzerTest < ActiveSupport::TestCase test "analyzing a video without a video stream" do blob = create_file_blob(filename: "video_without_video_stream.mp4", content_type: "video/mp4") - assert_equal({ "analyzed" => true }, blob.tap(&:analyze).metadata) + assert_equal({ "analyzed" => true, "identified" => true }, blob.tap(&:analyze).metadata) end end diff --git a/activestorage/test/dummy/config/storage.yml b/activestorage/test/dummy/config/storage.yml index 2c6762e0d6..2acc78dc4c 100644 --- a/activestorage/test/dummy/config/storage.yml +++ b/activestorage/test/dummy/config/storage.yml @@ -1,3 +1,4 @@ local: service: Disk root: <%= Rails.root.join("storage") %> + host: http://localhost:3000 diff --git a/activestorage/test/models/attachments_test.rb b/activestorage/test/models/attachments_test.rb index 20eec3c220..f0aa96b411 100644 --- a/activestorage/test/models/attachments_test.rb +++ b/activestorage/test/models/attachments_test.rb @@ -97,6 +97,29 @@ class ActiveStorage::AttachmentsTest < ActiveSupport::TestCase assert_equal "funky.jpg", @user.avatar_attachment.blob.filename.to_s end + test "identify newly-attached, directly-uploaded blob" do + # Simulate a direct upload. + blob = create_blob_before_direct_upload(filename: "racecar.jpg", content_type: "application/octet-stream", byte_size: 1124062, checksum: "7GjDDNEQb4mzMzsW+MS0JQ==") + ActiveStorage::Blob.service.upload(blob.key, file_fixture("racecar.jpg").open) + + stub_request(:get, %r{localhost:3000/rails/active_storage/disk/.*}).to_return(body: file_fixture("racecar.jpg")) + @user.avatar.attach(blob) + + assert_equal "image/jpeg", @user.avatar.reload.content_type + assert @user.avatar.identified? + end + + test "identify newly-attached blob only once" do + blob = create_file_blob + assert blob.identified? + + # The blob's backing file is a PNG image. Fudge its content type so we can tell if it's identified when we attach it. + blob.update! content_type: "application/octet-stream" + + @user.avatar.attach blob + assert_equal "application/octet-stream", blob.content_type + end + test "analyze newly-attached blob" do perform_enqueued_jobs do @user.avatar.attach create_file_blob @@ -115,7 +138,7 @@ class ActiveStorage::AttachmentsTest < ActiveSupport::TestCase assert blob.reload.analyzed? - @user.avatar.attachment.destroy + @user.avatar.detach assert_no_enqueued_jobs do @user.reload.avatar.attach blob diff --git a/activestorage/test/models/blob_test.rb b/activestorage/test/models/blob_test.rb index 2db516c088..b5daee2b57 100644 --- a/activestorage/test/models/blob_test.rb +++ b/activestorage/test/models/blob_test.rb @@ -13,6 +13,16 @@ class ActiveStorage::BlobTest < ActiveSupport::TestCase assert_equal Digest::MD5.base64digest(data), blob.checksum end + test "create after upload extracts content type from data" do + blob = create_file_blob content_type: "application/octet-stream" + assert_equal "image/jpeg", blob.content_type + end + + test "create after upload extracts content type from filename" do + blob = create_blob content_type: "application/octet-stream" + assert_equal "text/plain", blob.content_type + end + test "text?" do blob = create_blob data: "Hello world!" assert blob.text? @@ -79,6 +89,6 @@ class ActiveStorage::BlobTest < ActiveSupport::TestCase def expected_url_for(blob, disposition: :inline, filename: nil) filename ||= blob.filename query_string = { content_type: blob.content_type, disposition: "#{disposition}; #{filename.parameters}" }.to_param - "/rails/active_storage/disk/#{ActiveStorage.verifier.generate(blob.key, expires_in: 5.minutes, purpose: :blob_key)}/#{filename}?#{query_string}" + "http://localhost:3000/rails/active_storage/disk/#{ActiveStorage.verifier.generate(blob.key, expires_in: 5.minutes, purpose: :blob_key)}/#{filename}?#{query_string}" end end diff --git a/activestorage/test/models/variant_test.rb b/activestorage/test/models/variant_test.rb index 49660e880a..0cf8a583bd 100644 --- a/activestorage/test/models/variant_test.rb +++ b/activestorage/test/models/variant_test.rb @@ -67,6 +67,6 @@ class ActiveStorage::VariantTest < ActiveSupport::TestCase test "service_url doesn't grow in length despite long variant options" do blob = create_file_blob(filename: "racecar.jpg") variant = blob.variant(font: "a" * 10_000).processed - assert_operator variant.service_url.length, :<, 500 + assert_operator variant.service_url.length, :<, 525 end end diff --git a/activestorage/test/service/configurator_test.rb b/activestorage/test/service/configurator_test.rb index a2fd035e02..0a79b37aa4 100644 --- a/activestorage/test/service/configurator_test.rb +++ b/activestorage/test/service/configurator_test.rb @@ -4,8 +4,11 @@ require "service/shared_service_tests" class ActiveStorage::Service::ConfiguratorTest < ActiveSupport::TestCase test "builds correct service instance based on service name" do - service = ActiveStorage::Service::Configurator.build(:foo, foo: { service: "Disk", root: "path" }) + service = ActiveStorage::Service::Configurator.build(:foo, foo: { service: "Disk", root: "path", host: "http://localhost:3000" }) + assert_instance_of ActiveStorage::Service::DiskService, service + assert_equal "path", service.root + assert_equal "http://localhost:3000", service.host end test "raises error when passing non-existent service name" do diff --git a/activestorage/test/service/disk_service_test.rb b/activestorage/test/service/disk_service_test.rb index 4a6361b920..f0171f8999 100644 --- a/activestorage/test/service/disk_service_test.rb +++ b/activestorage/test/service/disk_service_test.rb @@ -3,7 +3,7 @@ require "service/shared_service_tests" class ActiveStorage::Service::DiskServiceTest < ActiveSupport::TestCase - SERVICE = ActiveStorage::Service::DiskService.new(root: File.join(Dir.tmpdir, "active_storage")) + SERVICE = ActiveStorage::Service::DiskService.new(root: File.join(Dir.tmpdir, "active_storage"), host: "http://localhost:3000") include ActiveStorage::Service::SharedServiceTests diff --git a/activestorage/test/service/mirror_service_test.rb b/activestorage/test/service/mirror_service_test.rb index 92101b1282..6b601f3b2b 100644 --- a/activestorage/test/service/mirror_service_test.rb +++ b/activestorage/test/service/mirror_service_test.rb @@ -6,12 +6,13 @@ class ActiveStorage::Service::MirrorServiceTest < ActiveSupport::TestCase mirror_config = (1..3).map do |i| [ "mirror_#{i}", service: "Disk", - root: Dir.mktmpdir("active_storage_tests_mirror_#{i}") ] + root: Dir.mktmpdir("active_storage_tests_mirror_#{i}"), + host: "http://localhost:3000" ] end.to_h config = mirror_config.merge \ - mirror: { service: "Mirror", primary: "primary", mirrors: mirror_config.keys }, - primary: { service: "Disk", root: Dir.mktmpdir("active_storage_tests_primary") } + mirror: { service: "Mirror", primary: "primary", mirrors: mirror_config.keys }, + primary: { service: "Disk", root: Dir.mktmpdir("active_storage_tests_primary"), host: "http://localhost:3000" } SERVICE = ActiveStorage::Service.configure :mirror, config diff --git a/activestorage/test/test_helper.rb b/activestorage/test/test_helper.rb index aaf1d452ea..51f8ebad18 100644 --- a/activestorage/test/test_helper.rb +++ b/activestorage/test/test_helper.rb @@ -7,6 +7,7 @@ require "bundler/setup" require "active_support" require "active_support/test_case" require "active_support/testing/autorun" +require "webmock/minitest" require "mini_magick" begin @@ -33,7 +34,7 @@ rescue Errno::ENOENT end require "tmpdir" -ActiveStorage::Blob.service = ActiveStorage::Service::DiskService.new(root: Dir.mktmpdir("active_storage_tests")) +ActiveStorage::Blob.service = ActiveStorage::Service::DiskService.new(root: Dir.mktmpdir("active_storage_tests"), host: "http://localhost:3000") ActiveStorage.logger = ActiveSupport::Logger.new(nil) ActiveStorage.verifier = ActiveSupport::MessageVerifier.new("Testing") @@ -41,6 +42,8 @@ ActiveStorage.verifier = ActiveSupport::MessageVerifier.new("Testing") class ActiveSupport::TestCase self.file_fixture_path = File.expand_path("fixtures/files", __dir__) + setup { WebMock.allow_net_connect! } + private def create_blob(data: "Hello world!", filename: "hello.txt", content_type: "text/plain") ActiveStorage::Blob.create_after_upload! io: StringIO.new(data), filename: filename, content_type: content_type diff --git a/guides/source/active_storage_overview.md b/guides/source/active_storage_overview.md index f45dbdee3e..c5bd09ead6 100644 --- a/guides/source/active_storage_overview.md +++ b/guides/source/active_storage_overview.md @@ -49,10 +49,12 @@ below declares three services named `local`, `test`, and `amazon`: local: service: Disk root: <%= Rails.root.join("storage") %> + host: http://localhost:3000 test: service: Disk root: <%= Rails.root.join("tmp/storage") %> + host: http://localhost:3000 amazon: service: S3 @@ -91,6 +93,7 @@ Declare a Disk service in `config/storage.yml`: local: service: Disk root: <%= Rails.root.join("storage") %> + host: http://localhost:3000 ``` ### Amazon S3 Service diff --git a/railties/lib/rails/generators/rails/app/templates/config/storage.yml.tt b/railties/lib/rails/generators/rails/app/templates/config/storage.yml.tt index 1c0cde0b09..fac165a24d 100644 --- a/railties/lib/rails/generators/rails/app/templates/config/storage.yml.tt +++ b/railties/lib/rails/generators/rails/app/templates/config/storage.yml.tt @@ -1,10 +1,12 @@ test: service: Disk root: <%%= Rails.root.join("tmp/storage") %> + host: http://localhost:3000 local: service: Disk root: <%%= Rails.root.join("storage") %> + host: http://localhost:3000 # Use rails credentials:edit to set the AWS secrets (as aws:access_key_id|secret_access_key) # amazon: -- cgit v1.2.3