From 04454839a1a07cacac58cdf756a6b8e3adde0ef5 Mon Sep 17 00:00:00 2001
From: schneems <richard.schneeman+foo@gmail.com>
Date: Fri, 7 Sep 2018 21:41:55 -0500
Subject: Use raw time string from DB to generate ActiveRecord#cache_version
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Currently, the `updated_at` field is used to generate a `cache_version`. Some database adapters return this timestamp value as a string that must then be converted to a Time value. This process requires a lot of memory and even more CPU time. In the case where this value is only being used for a cache version, we can skip the Time conversion by using the string value directly.

- This PR preserves existing cache format by converting a UTC string from the database to `:usec` format.
- Some databases return an already converted Time object, in those instances, we can directly use `created_at`.
- The `updated_at_before_type_cast` can be a value that comes from either the database or the user. We only want to optimize the case where it is from the database.
- If the format of the cache version has been changed, we cannot apply this optimization, and it is skipped.
- If the format of the time in the database is not UTC, then we cannot use this optimization, and it is skipped.

Some databases (notably PostgreSQL) returns a variable length nanosecond value in the time string. If the value ends in a zero, then it is truncated For instance instead of `2018-10-12 05:00:00.000000` the value `2018-10-12 05:00:00` is returned. We detect this case and pad the remaining zeros to ensure consistent cache version generation.

Before: Total allocated: 743842 bytes (6626 objects)
After: Total allocated: 702955 bytes (6063 objects)

(743842 - 702955) / 743842.0 # => 5.4% ⚡️⚡️⚡️⚡️⚡️

Using the CodeTriage application and derailed benchmarks this PR shows between 9-11% (statistically significant) performance improvement versus the commit before it.

Special thanks to @lsylvester for helping to figure out a way to preserve the usec format and for helping with many implementation details.
---
 activerecord/lib/active_record/integration.rb | 48 ++++++++++++++++-
 activerecord/test/cases/cache_key_test.rb     | 74 ++++++++++++++++++++++++++-
 2 files changed, 118 insertions(+), 4 deletions(-)

(limited to 'activerecord')

diff --git a/activerecord/lib/active_record/integration.rb b/activerecord/lib/active_record/integration.rb
index 456689ec6d..43f6afbb42 100644
--- a/activerecord/lib/active_record/integration.rb
+++ b/activerecord/lib/active_record/integration.rb
@@ -96,8 +96,14 @@ module ActiveRecord
     # Note, this method will return nil if ActiveRecord::Base.cache_versioning is set to
     # +false+ (which it is by default until Rails 6.0).
     def cache_version
-      if cache_versioning && timestamp = try(:updated_at)
-        timestamp.utc.to_s(:usec)
+      return unless cache_versioning
+      return unless has_attribute?("updated_at")
+
+      timestamp = updated_at_before_type_cast
+      if can_use_fast_cache_version?(timestamp)
+        raw_timestamp_to_cache_version(timestamp)
+      elsif timestamp = updated_at
+        timestamp.utc.to_s(cache_timestamp_format)
       end
     end
 
@@ -151,5 +157,43 @@ module ActiveRecord
         end
       end
     end
+
+    private
+      # Detects if the value before type cast
+      # can be used to generate a cache_version.
+      #
+      # The fast cache version only works with a
+      # string value directly from the database.
+      #
+      # We also must check if the timestamp format has been changed
+      # or if the timezone is not set to UTC then
+      # we cannot apply our transformations correctly.
+      def can_use_fast_cache_version?(timestamp)
+        timestamp.is_a?(String) &&
+          cache_timestamp_format == :usec &&
+          default_timezone == :utc &&
+          !updated_at_came_from_user?
+      end
+
+      # Converts a raw database string to `:usec`
+      # format.
+      #
+      # Example:
+      #
+      #   timestamp = "2018-10-15 20:02:15.266505"
+      #   raw_timestamp_to_cache_version(timestamp)
+      #   # => "20181015200215266505"
+      #
+      # Postgres truncates trailing zeros, https://bit.ly/2QUlXiZ
+      # to account for this we pad the output with zeros
+      def raw_timestamp_to_cache_version(timestamp)
+        key = timestamp.delete("- :.")
+        padding = 20 - key.length
+        if padding != 0
+          key << "0" * padding
+        else
+          key
+        end
+      end
   end
 end
diff --git a/activerecord/test/cases/cache_key_test.rb b/activerecord/test/cases/cache_key_test.rb
index 3a569f226e..535511d1cb 100644
--- a/activerecord/test/cases/cache_key_test.rb
+++ b/activerecord/test/cases/cache_key_test.rb
@@ -44,10 +44,80 @@ module ActiveRecord
 
     test "cache_key_with_version always has both key and version" do
       r1 = CacheMeWithVersion.create
-      assert_equal "active_record/cache_key_test/cache_me_with_versions/#{r1.id}-#{r1.updated_at.to_s(:usec)}", r1.cache_key_with_version
+      assert_equal "active_record/cache_key_test/cache_me_with_versions/#{r1.id}-#{r1.updated_at.utc.to_s(:usec)}", r1.cache_key_with_version
 
       r2 = CacheMe.create
-      assert_equal "active_record/cache_key_test/cache_mes/#{r2.id}-#{r2.updated_at.to_s(:usec)}", r2.cache_key_with_version
+      assert_equal "active_record/cache_key_test/cache_mes/#{r2.id}-#{r2.updated_at.utc.to_s(:usec)}", r2.cache_key_with_version
+    end
+
+    test "cache_version is the same when it comes from the DB or from the user" do
+      skip("Mysql2 does not return a string value for updated_at") if current_adapter?(:Mysql2Adapter)
+
+      record = CacheMeWithVersion.create
+      record_from_db = CacheMeWithVersion.find(record.id)
+      assert_not_called(record_from_db, :updated_at) do
+        record_from_db.cache_version
+      end
+
+      assert_equal record.cache_version, record_from_db.cache_version
+    end
+
+    test "cache_version does not truncate zeros when timestamp ends in zeros" do
+      skip("Mysql2 does not return a string value for updated_at") if current_adapter?(:Mysql2Adapter)
+
+      travel_to Time.now.beginning_of_day do
+        record = CacheMeWithVersion.create
+        record_from_db = CacheMeWithVersion.find(record.id)
+        assert_not_called(record_from_db, :updated_at) do
+          record_from_db.cache_version
+        end
+
+        assert_equal record.cache_version, record_from_db.cache_version
+      end
+    end
+
+    test "cache_version calls updated_at when the value is generated at create time" do
+      record = CacheMeWithVersion.create
+      assert_called(record, :updated_at) do
+        record.cache_version
+      end
+    end
+
+    test "cache_version does NOT call updated_at when value is from the database" do
+      skip("Mysql2 does not return a string value for updated_at") if current_adapter?(:Mysql2Adapter)
+
+      record = CacheMeWithVersion.create
+      record_from_db = CacheMeWithVersion.find(record.id)
+      assert_not_called(record_from_db, :updated_at) do
+        record_from_db.cache_version
+      end
+    end
+
+    test "cache_version does call updated_at when it is assigned via a Time object" do
+      record = CacheMeWithVersion.create
+      record_from_db = CacheMeWithVersion.find(record.id)
+      assert_called(record_from_db, :updated_at) do
+        record_from_db.updated_at = Time.now
+        record_from_db.cache_version
+      end
+    end
+
+    test "cache_version does call updated_at when it is assigned via a string" do
+      record = CacheMeWithVersion.create
+      record_from_db = CacheMeWithVersion.find(record.id)
+      assert_called(record_from_db, :updated_at) do
+        record_from_db.updated_at = Time.now.to_s
+        record_from_db.cache_version
+      end
+    end
+
+    test "cache_version does call updated_at when it is assigned via a hash" do
+      record = CacheMeWithVersion.create
+      record_from_db = CacheMeWithVersion.find(record.id)
+      assert_called(record_from_db, :updated_at) do
+        record_from_db.updated_at = { 1 => 2016, 2 => 11, 3 => 12, 4 => 1, 5 => 2, 6 => 3, 7 => 22 }
+        record_from_db.cache_version
+      end
     end
   end
 end
-- 
cgit v1.2.3


From 2f99da00c7b311af0bc5969985eee97937790e4f Mon Sep 17 00:00:00 2001
From: schneems <richard.schneeman+foo@gmail.com>
Date: Mon, 15 Oct 2018 15:31:12 -0500
Subject: Do not silently fail to generate a cache_version

When an `updated_at` column exists on the model, but is not available on the instance (likely due to a select), we should raise an error rather than silently not generating a cache_version. Without this behavior it's likely that cache entries will not be able to be invalidated and this will happen without notice.

This behavior was reported and described by @lsylvester in https://github.com/rails/rails/pull/34197#issuecomment-429668759.
---
 activerecord/lib/active_record/integration.rb | 17 +++++++++++------
 activerecord/test/cases/cache_key_test.rb     |  8 ++++++++
 2 files changed, 19 insertions(+), 6 deletions(-)

(limited to 'activerecord')

diff --git a/activerecord/lib/active_record/integration.rb b/activerecord/lib/active_record/integration.rb
index 43f6afbb42..db7200bceb 100644
--- a/activerecord/lib/active_record/integration.rb
+++ b/activerecord/lib/active_record/integration.rb
@@ -97,13 +97,18 @@ module ActiveRecord
     # +false+ (which it is by default until Rails 6.0).
     def cache_version
       return unless cache_versioning
-      return unless has_attribute?("updated_at")
 
-      timestamp = updated_at_before_type_cast
-      if can_use_fast_cache_version?(timestamp)
-        raw_timestamp_to_cache_version(timestamp)
-      elsif timestamp = updated_at
-        timestamp.utc.to_s(cache_timestamp_format)
+      if has_attribute?("updated_at")
+        timestamp = updated_at_before_type_cast
+        if can_use_fast_cache_version?(timestamp)
+          raw_timestamp_to_cache_version(timestamp)
+        elsif timestamp = updated_at
+          timestamp.utc.to_s(cache_timestamp_format)
+        end
+      else
+        if self.class.has_attribute?("updated_at")
+          raise ActiveModel::MissingAttributeError, "missing attribute: updated_at"
+        end
       end
     end
 
diff --git a/activerecord/test/cases/cache_key_test.rb b/activerecord/test/cases/cache_key_test.rb
index 535511d1cb..3a06b1c795 100644
--- a/activerecord/test/cases/cache_key_test.rb
+++ b/activerecord/test/cases/cache_key_test.rb
@@ -119,5 +119,13 @@ module ActiveRecord
         record_from_db.cache_version
       end
     end
+
+    test "updated_at on class but not on instance raises an error" do
+      record = CacheMeWithVersion.create
+      record_from_db = CacheMeWithVersion.where(id: record.id).select(:id).first
+      assert_raises(ActiveModel::MissingAttributeError) do
+        record_from_db.cache_version
+      end
+    end
   end
 end
-- 
cgit v1.2.3


From 73f9cd2da452c97061a7b01d3d8e9f480950fe80 Mon Sep 17 00:00:00 2001
From: lsylvester <lachlan.sylvester@hypothetical.com.au>
Date: Wed, 17 Oct 2018 10:48:11 -0500
Subject: Prefer String#ljust over String#<< for padding

---
 activerecord/lib/active_record/integration.rb | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

(limited to 'activerecord')

diff --git a/activerecord/lib/active_record/integration.rb b/activerecord/lib/active_record/integration.rb
index db7200bceb..04d0d46d3e 100644
--- a/activerecord/lib/active_record/integration.rb
+++ b/activerecord/lib/active_record/integration.rb
@@ -193,9 +193,8 @@ module ActiveRecord
       # to account for this we pad the output with zeros
       def raw_timestamp_to_cache_version(timestamp)
         key = timestamp.delete("- :.")
-        padding = 20 - key.length
-        if padding != 0
-          key << "0" * padding
+        if key.length < 20
+          key.ljust(20, "0")
         else
           key
         end
-- 
cgit v1.2.3