From 04454839a1a07cacac58cdf756a6b8e3adde0ef5 Mon Sep 17 00:00:00 2001 From: schneems <richard.schneeman+foo@gmail.com> Date: Fri, 7 Sep 2018 21:41:55 -0500 Subject: Use raw time string from DB to generate ActiveRecord#cache_version MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, the `updated_at` field is used to generate a `cache_version`. Some database adapters return this timestamp value as a string that must then be converted to a Time value. This process requires a lot of memory and even more CPU time. In the case where this value is only being used for a cache version, we can skip the Time conversion by using the string value directly. - This PR preserves existing cache format by converting a UTC string from the database to `:usec` format. - Some databases return an already converted Time object, in those instances, we can directly use `created_at`. - The `updated_at_before_type_cast` can be a value that comes from either the database or the user. We only want to optimize the case where it is from the database. - If the format of the cache version has been changed, we cannot apply this optimization, and it is skipped. - If the format of the time in the database is not UTC, then we cannot use this optimization, and it is skipped. Some databases (notably PostgreSQL) returns a variable length nanosecond value in the time string. If the value ends in a zero, then it is truncated For instance instead of `2018-10-12 05:00:00.000000` the value `2018-10-12 05:00:00` is returned. We detect this case and pad the remaining zeros to ensure consistent cache version generation. Before: Total allocated: 743842 bytes (6626 objects) After: Total allocated: 702955 bytes (6063 objects) (743842 - 702955) / 743842.0 # => 5.4% ⚡️⚡️⚡️⚡️⚡️ Using the CodeTriage application and derailed benchmarks this PR shows between 9-11% (statistically significant) performance improvement versus the commit before it. Special thanks to @lsylvester for helping to figure out a way to preserve the usec format and for helping with many implementation details. --- activerecord/lib/active_record/integration.rb | 48 ++++++++++++++++- activerecord/test/cases/cache_key_test.rb | 74 ++++++++++++++++++++++++++- 2 files changed, 118 insertions(+), 4 deletions(-) (limited to 'activerecord') diff --git a/activerecord/lib/active_record/integration.rb b/activerecord/lib/active_record/integration.rb index 456689ec6d..43f6afbb42 100644 --- a/activerecord/lib/active_record/integration.rb +++ b/activerecord/lib/active_record/integration.rb @@ -96,8 +96,14 @@ module ActiveRecord # Note, this method will return nil if ActiveRecord::Base.cache_versioning is set to # +false+ (which it is by default until Rails 6.0). def cache_version - if cache_versioning && timestamp = try(:updated_at) - timestamp.utc.to_s(:usec) + return unless cache_versioning + return unless has_attribute?("updated_at") + + timestamp = updated_at_before_type_cast + if can_use_fast_cache_version?(timestamp) + raw_timestamp_to_cache_version(timestamp) + elsif timestamp = updated_at + timestamp.utc.to_s(cache_timestamp_format) end end @@ -151,5 +157,43 @@ module ActiveRecord end end end + + private + # Detects if the value before type cast + # can be used to generate a cache_version. + # + # The fast cache version only works with a + # string value directly from the database. + # + # We also must check if the timestamp format has been changed + # or if the timezone is not set to UTC then + # we cannot apply our transformations correctly. + def can_use_fast_cache_version?(timestamp) + timestamp.is_a?(String) && + cache_timestamp_format == :usec && + default_timezone == :utc && + !updated_at_came_from_user? + end + + # Converts a raw database string to `:usec` + # format. + # + # Example: + # + # timestamp = "2018-10-15 20:02:15.266505" + # raw_timestamp_to_cache_version(timestamp) + # # => "20181015200215266505" + # + # Postgres truncates trailing zeros, https://bit.ly/2QUlXiZ + # to account for this we pad the output with zeros + def raw_timestamp_to_cache_version(timestamp) + key = timestamp.delete("- :.") + padding = 20 - key.length + if padding != 0 + key << "0" * padding + else + key + end + end end end diff --git a/activerecord/test/cases/cache_key_test.rb b/activerecord/test/cases/cache_key_test.rb index 3a569f226e..535511d1cb 100644 --- a/activerecord/test/cases/cache_key_test.rb +++ b/activerecord/test/cases/cache_key_test.rb @@ -44,10 +44,80 @@ module ActiveRecord test "cache_key_with_version always has both key and version" do r1 = CacheMeWithVersion.create - assert_equal "active_record/cache_key_test/cache_me_with_versions/#{r1.id}-#{r1.updated_at.to_s(:usec)}", r1.cache_key_with_version + assert_equal "active_record/cache_key_test/cache_me_with_versions/#{r1.id}-#{r1.updated_at.utc.to_s(:usec)}", r1.cache_key_with_version r2 = CacheMe.create - assert_equal "active_record/cache_key_test/cache_mes/#{r2.id}-#{r2.updated_at.to_s(:usec)}", r2.cache_key_with_version + assert_equal "active_record/cache_key_test/cache_mes/#{r2.id}-#{r2.updated_at.utc.to_s(:usec)}", r2.cache_key_with_version + end + + test "cache_version is the same when it comes from the DB or from the user" do + skip("Mysql2 does not return a string value for updated_at") if current_adapter?(:Mysql2Adapter) + + record = CacheMeWithVersion.create + record_from_db = CacheMeWithVersion.find(record.id) + assert_not_called(record_from_db, :updated_at) do + record_from_db.cache_version + end + + assert_equal record.cache_version, record_from_db.cache_version + end + + test "cache_version does not truncate zeros when timestamp ends in zeros" do + skip("Mysql2 does not return a string value for updated_at") if current_adapter?(:Mysql2Adapter) + + travel_to Time.now.beginning_of_day do + record = CacheMeWithVersion.create + record_from_db = CacheMeWithVersion.find(record.id) + assert_not_called(record_from_db, :updated_at) do + record_from_db.cache_version + end + + assert_equal record.cache_version, record_from_db.cache_version + end + end + + test "cache_version calls updated_at when the value is generated at create time" do + record = CacheMeWithVersion.create + assert_called(record, :updated_at) do + record.cache_version + end + end + + test "cache_version does NOT call updated_at when value is from the database" do + skip("Mysql2 does not return a string value for updated_at") if current_adapter?(:Mysql2Adapter) + + record = CacheMeWithVersion.create + record_from_db = CacheMeWithVersion.find(record.id) + assert_not_called(record_from_db, :updated_at) do + record_from_db.cache_version + end + end + + test "cache_version does call updated_at when it is assigned via a Time object" do + record = CacheMeWithVersion.create + record_from_db = CacheMeWithVersion.find(record.id) + assert_called(record_from_db, :updated_at) do + record_from_db.updated_at = Time.now + record_from_db.cache_version + end + end + + test "cache_version does call updated_at when it is assigned via a string" do + record = CacheMeWithVersion.create + record_from_db = CacheMeWithVersion.find(record.id) + assert_called(record_from_db, :updated_at) do + record_from_db.updated_at = Time.now.to_s + record_from_db.cache_version + end + end + + test "cache_version does call updated_at when it is assigned via a hash" do + record = CacheMeWithVersion.create + record_from_db = CacheMeWithVersion.find(record.id) + assert_called(record_from_db, :updated_at) do + record_from_db.updated_at = { 1 => 2016, 2 => 11, 3 => 12, 4 => 1, 5 => 2, 6 => 3, 7 => 22 } + record_from_db.cache_version + end end end end -- cgit v1.2.3 From 2f99da00c7b311af0bc5969985eee97937790e4f Mon Sep 17 00:00:00 2001 From: schneems <richard.schneeman+foo@gmail.com> Date: Mon, 15 Oct 2018 15:31:12 -0500 Subject: Do not silently fail to generate a cache_version When an `updated_at` column exists on the model, but is not available on the instance (likely due to a select), we should raise an error rather than silently not generating a cache_version. Without this behavior it's likely that cache entries will not be able to be invalidated and this will happen without notice. This behavior was reported and described by @lsylvester in https://github.com/rails/rails/pull/34197#issuecomment-429668759. --- activerecord/lib/active_record/integration.rb | 17 +++++++++++------ activerecord/test/cases/cache_key_test.rb | 8 ++++++++ 2 files changed, 19 insertions(+), 6 deletions(-) (limited to 'activerecord') diff --git a/activerecord/lib/active_record/integration.rb b/activerecord/lib/active_record/integration.rb index 43f6afbb42..db7200bceb 100644 --- a/activerecord/lib/active_record/integration.rb +++ b/activerecord/lib/active_record/integration.rb @@ -97,13 +97,18 @@ module ActiveRecord # +false+ (which it is by default until Rails 6.0). def cache_version return unless cache_versioning - return unless has_attribute?("updated_at") - timestamp = updated_at_before_type_cast - if can_use_fast_cache_version?(timestamp) - raw_timestamp_to_cache_version(timestamp) - elsif timestamp = updated_at - timestamp.utc.to_s(cache_timestamp_format) + if has_attribute?("updated_at") + timestamp = updated_at_before_type_cast + if can_use_fast_cache_version?(timestamp) + raw_timestamp_to_cache_version(timestamp) + elsif timestamp = updated_at + timestamp.utc.to_s(cache_timestamp_format) + end + else + if self.class.has_attribute?("updated_at") + raise ActiveModel::MissingAttributeError, "missing attribute: updated_at" + end end end diff --git a/activerecord/test/cases/cache_key_test.rb b/activerecord/test/cases/cache_key_test.rb index 535511d1cb..3a06b1c795 100644 --- a/activerecord/test/cases/cache_key_test.rb +++ b/activerecord/test/cases/cache_key_test.rb @@ -119,5 +119,13 @@ module ActiveRecord record_from_db.cache_version end end + + test "updated_at on class but not on instance raises an error" do + record = CacheMeWithVersion.create + record_from_db = CacheMeWithVersion.where(id: record.id).select(:id).first + assert_raises(ActiveModel::MissingAttributeError) do + record_from_db.cache_version + end + end end end -- cgit v1.2.3 From 73f9cd2da452c97061a7b01d3d8e9f480950fe80 Mon Sep 17 00:00:00 2001 From: lsylvester <lachlan.sylvester@hypothetical.com.au> Date: Wed, 17 Oct 2018 10:48:11 -0500 Subject: Prefer String#ljust over String#<< for padding --- activerecord/lib/active_record/integration.rb | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'activerecord') diff --git a/activerecord/lib/active_record/integration.rb b/activerecord/lib/active_record/integration.rb index db7200bceb..04d0d46d3e 100644 --- a/activerecord/lib/active_record/integration.rb +++ b/activerecord/lib/active_record/integration.rb @@ -193,9 +193,8 @@ module ActiveRecord # to account for this we pad the output with zeros def raw_timestamp_to_cache_version(timestamp) key = timestamp.delete("- :.") - padding = 20 - key.length - if padding != 0 - key << "0" * padding + if key.length < 20 + key.ljust(20, "0") else key end -- cgit v1.2.3