aboutsummaryrefslogtreecommitdiffstats
path: root/activerecord/lib
diff options
context:
space:
mode:
authorBob Lail <bob.lailfamily@gmail.com>2019-03-05 13:16:44 -0600
committerDavid Heinemeier Hansson <david@loudthinking.com>2019-03-05 11:16:44 -0800
commit91ed21b304c468db8ce9fd830312c151432935d0 (patch)
tree354a329c8b7308a8762ef39a73e2ed1c30c51baa /activerecord/lib
parentdb94f492c099db89746f945a522aa7e59ede97cb (diff)
downloadrails-91ed21b304c468db8ce9fd830312c151432935d0.tar.gz
rails-91ed21b304c468db8ce9fd830312c151432935d0.tar.bz2
rails-91ed21b304c468db8ce9fd830312c151432935d0.zip
Add insert_all to ActiveRecord models (#35077)
Adds a method to ActiveRecord allowing records to be inserted in bulk without instantiating ActiveRecord models. This method supports options for handling uniqueness violations by skipping duplicate records or overwriting them in an UPSERT operation. ActiveRecord already supports bulk-update and bulk-destroy actions that execute SQL UPDATE and DELETE commands directly. It also supports bulk-read actions through `pluck`. It makes sense for it also to support bulk-creation.
Diffstat (limited to 'activerecord/lib')
-rw-r--r--activerecord/lib/active_record/connection_adapters/abstract/database_statements.rb22
-rw-r--r--activerecord/lib/active_record/connection_adapters/abstract_adapter.rb29
-rw-r--r--activerecord/lib/active_record/connection_adapters/abstract_mysql_adapter.rb22
-rw-r--r--activerecord/lib/active_record/connection_adapters/postgresql_adapter.rb25
-rw-r--r--activerecord/lib/active_record/connection_adapters/sqlite3_adapter.rb20
-rw-r--r--activerecord/lib/active_record/insert_all.rb153
-rw-r--r--activerecord/lib/active_record/persistence.rb197
7 files changed, 457 insertions, 11 deletions
diff --git a/activerecord/lib/active_record/connection_adapters/abstract/database_statements.rb b/activerecord/lib/active_record/connection_adapters/abstract/database_statements.rb
index b5e6d03cf5..3c872d6c1b 100644
--- a/activerecord/lib/active_record/connection_adapters/abstract/database_statements.rb
+++ b/activerecord/lib/active_record/connection_adapters/abstract/database_statements.rb
@@ -404,6 +404,17 @@ module ActiveRecord
end
end
+ # Fixture value is quoted by Arel, however scalar values
+ # are not quotable. In this case we want to convert
+ # the column value to YAML.
+ def with_yaml_fallback(value) # :nodoc:
+ if value.is_a?(Hash) || value.is_a?(Array)
+ YAML.dump(value)
+ else
+ value
+ end
+ end
+
private
def default_insert_value(column)
Arel.sql("DEFAULT")
@@ -473,17 +484,6 @@ module ActiveRecord
relation
end
end
-
- # Fixture value is quoted by Arel, however scalar values
- # are not quotable. In this case we want to convert
- # the column value to YAML.
- def with_yaml_fallback(value)
- if value.is_a?(Hash) || value.is_a?(Array)
- YAML.dump(value)
- else
- value
- end
- end
end
end
end
diff --git a/activerecord/lib/active_record/connection_adapters/abstract_adapter.rb b/activerecord/lib/active_record/connection_adapters/abstract_adapter.rb
index 823efee301..cf6a1217a0 100644
--- a/activerecord/lib/active_record/connection_adapters/abstract_adapter.rb
+++ b/activerecord/lib/active_record/connection_adapters/abstract_adapter.rb
@@ -387,6 +387,22 @@ module ActiveRecord
false
end
+ def supports_insert_returning?
+ false
+ end
+
+ def supports_insert_on_duplicate_skip?
+ false
+ end
+
+ def supports_insert_on_duplicate_update?
+ false
+ end
+
+ def supports_insert_conflict_target?
+ false
+ end
+
# This is meant to be implemented by the adapters that support extensions
def disable_extension(name)
end
@@ -542,6 +558,19 @@ module ActiveRecord
index.using.nil?
end
+ # Called by ActiveRecord::InsertAll,
+ # Passed an instance of ActiveRecord::InsertAll::Builder,
+ # This method implements standard bulk inserts for all databases, but
+ # should be overridden by adapters to implement common features with
+ # non-standard syntax like handling duplicates or returning values.
+ def build_insert_sql(insert) # :nodoc:
+ if insert.skip_duplicates? || insert.update_duplicates?
+ raise NotImplementedError, "#{self.class} should define `build_insert_sql` to implement adapter-specific logic for handling duplicates during INSERT"
+ end
+
+ "INSERT #{insert.into} #{insert.values_list}"
+ end
+
private
def check_version
end
diff --git a/activerecord/lib/active_record/connection_adapters/abstract_mysql_adapter.rb b/activerecord/lib/active_record/connection_adapters/abstract_mysql_adapter.rb
index 96f902792e..1aab36c865 100644
--- a/activerecord/lib/active_record/connection_adapters/abstract_mysql_adapter.rb
+++ b/activerecord/lib/active_record/connection_adapters/abstract_mysql_adapter.rb
@@ -109,6 +109,14 @@ module ActiveRecord
true
end
+ def supports_insert_on_duplicate_skip?
+ true
+ end
+
+ def supports_insert_on_duplicate_update?
+ true
+ end
+
def get_advisory_lock(lock_name, timeout = 0) # :nodoc:
query_value("SELECT GET_LOCK(#{quote(lock_name.to_s)}, #{timeout})") == 1
end
@@ -511,6 +519,20 @@ module ActiveRecord
end
end
+ def build_insert_sql(insert) # :nodoc:
+ sql = +"INSERT #{insert.into} #{insert.values_list}"
+
+ if insert.skip_duplicates?
+ any_column = quote_column_name(insert.model.columns.first.name)
+ sql << " ON DUPLICATE KEY UPDATE #{any_column}=#{any_column}"
+ elsif insert.update_duplicates?
+ sql << " ON DUPLICATE KEY UPDATE "
+ sql << insert.updatable_columns.map { |column| "#{column}=VALUES(#{column})" }.join(",")
+ end
+
+ sql
+ end
+
private
def check_version
if version < "5.5.8"
diff --git a/activerecord/lib/active_record/connection_adapters/postgresql_adapter.rb b/activerecord/lib/active_record/connection_adapters/postgresql_adapter.rb
index 95beeb4cae..5919801519 100644
--- a/activerecord/lib/active_record/connection_adapters/postgresql_adapter.rb
+++ b/activerecord/lib/active_record/connection_adapters/postgresql_adapter.rb
@@ -196,6 +196,17 @@ module ActiveRecord
true
end
+ def supports_insert_returning?
+ true
+ end
+
+ def supports_insert_on_conflict?
+ postgresql_version >= 90500
+ end
+ alias supports_insert_on_duplicate_skip? supports_insert_on_conflict?
+ alias supports_insert_on_duplicate_update? supports_insert_on_conflict?
+ alias supports_insert_conflict_target? supports_insert_on_conflict?
+
def index_algorithms
{ concurrently: "CONCURRENTLY" }
end
@@ -425,6 +436,20 @@ module ActiveRecord
index.using == :btree || super
end
+ def build_insert_sql(insert) # :nodoc:
+ sql = +"INSERT #{insert.into} #{insert.values_list}"
+
+ if insert.skip_duplicates?
+ sql << " ON CONFLICT #{insert.conflict_target} DO NOTHING"
+ elsif insert.update_duplicates?
+ sql << " ON CONFLICT #{insert.conflict_target} DO UPDATE SET "
+ sql << insert.updatable_columns.map { |column| "#{column}=excluded.#{column}" }.join(",")
+ end
+
+ sql << " RETURNING #{insert.returning}" if insert.returning
+ sql
+ end
+
private
def check_version
if postgresql_version < 90300
diff --git a/activerecord/lib/active_record/connection_adapters/sqlite3_adapter.rb b/activerecord/lib/active_record/connection_adapters/sqlite3_adapter.rb
index 63e07932d9..0ed7f3988d 100644
--- a/activerecord/lib/active_record/connection_adapters/sqlite3_adapter.rb
+++ b/activerecord/lib/active_record/connection_adapters/sqlite3_adapter.rb
@@ -137,6 +137,13 @@ module ActiveRecord
true
end
+ def supports_insert_on_conflict?
+ sqlite_version >= "3.24.0"
+ end
+ alias supports_insert_on_duplicate_skip? supports_insert_on_conflict?
+ alias supports_insert_on_duplicate_update? supports_insert_on_conflict?
+ alias supports_insert_conflict_target? supports_insert_on_conflict?
+
def active?
@active
end
@@ -397,6 +404,19 @@ module ActiveRecord
end
end
+ def build_insert_sql(insert) # :nodoc:
+ sql = +"INSERT #{insert.into} #{insert.values_list}"
+
+ if insert.skip_duplicates?
+ sql << " ON CONFLICT #{insert.conflict_target} DO NOTHING"
+ elsif insert.update_duplicates?
+ sql << " ON CONFLICT #{insert.conflict_target} DO UPDATE SET "
+ sql << insert.updatable_columns.map { |column| "#{column}=excluded.#{column}" }.join(",")
+ end
+
+ sql
+ end
+
private
# See https://www.sqlite.org/limits.html,
# the default value is 999 when not configured.
diff --git a/activerecord/lib/active_record/insert_all.rb b/activerecord/lib/active_record/insert_all.rb
new file mode 100644
index 0000000000..ca5ce11d79
--- /dev/null
+++ b/activerecord/lib/active_record/insert_all.rb
@@ -0,0 +1,153 @@
+# frozen_string_literal: true
+
+module ActiveRecord
+ class InsertAll
+ attr_reader :model, :connection, :inserts, :on_duplicate, :returning, :unique_by
+
+ def initialize(model, inserts, on_duplicate:, returning: nil, unique_by: nil)
+ @model, @connection, @inserts, @on_duplicate, @returning, @unique_by = model, model.connection, inserts, on_duplicate, returning, unique_by
+ @returning = (connection.supports_insert_returning? ? primary_keys : false) if @returning.nil?
+ @returning = false if @returning == []
+ @on_duplicate = :skip if @on_duplicate == :update && updatable_columns.empty?
+
+ ensure_valid_options_for_connection!
+ end
+
+ def execute
+ if inserts.present?
+ connection.exec_query to_sql, "Bulk Insert"
+ else
+ ActiveRecord::Result.new([], [])
+ end
+ end
+
+ def keys
+ inserts.present? ? inserts.first.keys.map(&:to_s) : []
+ end
+
+ def updatable_columns
+ keys - readonly_columns - unique_by_columns
+ end
+
+ def skip_duplicates?
+ on_duplicate == :skip
+ end
+
+ def update_duplicates?
+ on_duplicate == :update
+ end
+
+ private
+ def ensure_valid_options_for_connection!
+ if returning && !connection.supports_insert_returning?
+ raise ArgumentError, "#{connection.class} does not support :returning"
+ end
+
+ unless %i{ raise skip update }.member?(on_duplicate)
+ raise NotImplementedError, "#{on_duplicate.inspect} is an unknown value for :on_duplicate. Valid values are :raise, :skip, and :update"
+ end
+
+ if on_duplicate == :skip && !connection.supports_insert_on_duplicate_skip?
+ raise ArgumentError, "#{connection.class} does not support skipping duplicates"
+ end
+
+ if on_duplicate == :update && !connection.supports_insert_on_duplicate_update?
+ raise ArgumentError, "#{connection.class} does not support upsert"
+ end
+
+ if unique_by && !connection.supports_insert_conflict_target?
+ raise ArgumentError, "#{connection.class} does not support :unique_by"
+ end
+ end
+
+ def to_sql
+ connection.build_insert_sql(ActiveRecord::InsertAll::Builder.new(self))
+ end
+
+ def readonly_columns
+ primary_keys + model.readonly_attributes.to_a
+ end
+
+ def unique_by_columns
+ unique_by ? unique_by.fetch(:columns).map(&:to_s) : []
+ end
+
+ def primary_keys
+ Array.wrap(model.primary_key)
+ end
+
+
+ class Builder
+ attr_reader :model
+
+ delegate :skip_duplicates?, :update_duplicates?, to: :insert_all
+
+ def initialize(insert_all)
+ @insert_all, @model, @connection = insert_all, insert_all.model, insert_all.connection
+ end
+
+ def into
+ "INTO #{model.quoted_table_name}(#{columns_list})"
+ end
+
+ def values_list
+ columns = connection.schema_cache.columns_hash(model.table_name)
+ keys = insert_all.keys.to_set
+ types = keys.map { |key| [ key, connection.lookup_cast_type_from_column(columns[key]) ] }.to_h
+
+ values_list = insert_all.inserts.map do |attributes|
+ attributes = attributes.stringify_keys
+
+ unless attributes.keys.to_set == keys
+ raise ArgumentError, "All objects being inserted must have the same keys"
+ end
+
+ keys.map do |key|
+ bind = Relation::QueryAttribute.new(key, attributes[key], types[key])
+ connection.with_yaml_fallback(bind.value_for_database)
+ end
+ end
+
+ Arel::InsertManager.new.create_values_list(values_list).to_sql
+ end
+
+ def returning
+ quote_columns(insert_all.returning).join(",") if insert_all.returning
+ end
+
+ def conflict_target
+ return unless conflict_columns
+ sql = +"(#{quote_columns(conflict_columns).join(',')})"
+ sql << " WHERE #{where}" if where
+ sql
+ end
+
+ def updatable_columns
+ quote_columns(insert_all.updatable_columns)
+ end
+
+ private
+ attr_reader :connection, :insert_all
+
+ def columns_list
+ quote_columns(insert_all.keys).join(",")
+ end
+
+ def quote_columns(columns)
+ columns.map(&connection.method(:quote_column_name))
+ end
+
+ def conflict_columns
+ @conflict_columns ||= begin
+ conflict_columns = insert_all.unique_by.fetch(:columns) if insert_all.unique_by
+ conflict_columns ||= Array.wrap(model.primary_key) if update_duplicates?
+ conflict_columns
+ end
+ end
+
+ def where
+ insert_all.unique_by && insert_all.unique_by[:where]
+ end
+ end
+ end
+end
diff --git a/activerecord/lib/active_record/persistence.rb b/activerecord/lib/active_record/persistence.rb
index 10148d0dca..069bd75a71 100644
--- a/activerecord/lib/active_record/persistence.rb
+++ b/activerecord/lib/active_record/persistence.rb
@@ -1,5 +1,7 @@
# frozen_string_literal: true
+require "active_record/insert_all"
+
module ActiveRecord
# = Active Record \Persistence
module Persistence
@@ -55,6 +57,201 @@ module ActiveRecord
end
end
+ # Inserts a single record into the databases. This method constructs a single SQL INSERT
+ # statement and sends it straight to the database. It does not instantiate the involved
+ # models and it does not trigger Active Record callbacks or validations. However, values
+ # passed to #insert will still go through Active Record's normal type casting and
+ # serialization.
+ #
+ # See <tt>ActiveRecord::Persistence#insert_all</tt> for documentation.
+ def insert(attributes, returning: nil, unique_by: nil)
+ insert_all([ attributes ], returning: returning, unique_by: unique_by)
+ end
+
+ # Inserts multiple records into the database. This method constructs a single SQL INSERT
+ # statement and sends it straight to the database. It does not instantiate the involved
+ # models and it does not trigger Active Record callbacks or validations. However, values
+ # passed to #insert_all will still go through Active Record's normal type casting and
+ # serialization.
+ #
+ # The +attributes+ parameter is an Array of Hashes. These Hashes describe the
+ # attributes on the objects that are to be created. All of the Hashes must have
+ # same keys.
+ #
+ # Records that would violate a unique constraint on the table are skipped.
+ #
+ # Returns an <tt>ActiveRecord::Result</tt>. The contents of the result depend on the
+ # value of <tt>:returning</tt> (see below).
+ #
+ # ==== Options
+ #
+ # [:returning]
+ # (Postgres-only) An array of attributes that should be returned for all successfully
+ # inserted records. For databases that support <tt>INSERT ... RETURNING</tt>, this will default
+ # to returning the primary keys of the successfully inserted records. Pass
+ # <tt>returning: %w[ id name ]</tt> to return the id and name of every successfully inserted
+ # record or pass <tt>returning: false</tt> to omit the clause.
+ #
+ # [:unique_by]
+ # (Postgres and SQLite only) In a table with more than one unique constaint or index,
+ # new records may considered duplicates according to different criteria. By default,
+ # new rows will be skipped if they violate _any_ unique constraint/index. By defining
+ # <tt>:unique_by</tt>, you can skip rows that would create duplicates according to the given
+ # constraint but raise <tt>ActiveRecord::RecordNotUnique</tt> if rows violate other constraints.
+ #
+ # (For example, maybe you assume a client will try to import the same ISBNs more than
+ # once and want to silently ignore the duplicate records, but you don't except any of
+ # your code to attempt to create two rows with the same primary key and would appreciate
+ # an exception report in that scenario.)
+ #
+ # Indexes can be identified by an array of columns:
+ #
+ # unique_by: { columns: %w[ isbn ] }
+ #
+ # Partial indexes can be identified by an array of columns and a <tt>:where</tt> condition:
+ #
+ # unique_by: { columns: %w[ isbn ], where: "published_on IS NOT NULL" }
+ #
+ # ==== Example
+ #
+ # # Insert multiple records and skip duplicates
+ # # ('Eloquent Ruby' will be skipped because its id is duplicate)
+ # Book.insert_all([
+ # { id: 1, title: 'Rework', author: 'David' },
+ # { id: 1, title: 'Eloquent Ruby', author: 'Russ' }
+ # ])
+ #
+ def insert_all(attributes, returning: nil, unique_by: nil)
+ InsertAll.new(self, attributes, on_duplicate: :skip, returning: returning, unique_by: unique_by).execute
+ end
+
+ # Inserts a single record into the databases. This method constructs a single SQL INSERT
+ # statement and sends it straight to the database. It does not instantiate the involved
+ # models and it does not trigger Active Record callbacks or validations. However, values
+ # passed to #insert! will still go through Active Record's normal type casting and
+ # serialization.
+ #
+ # See <tt>ActiveRecord::Persistence#insert_all!</tt> for documentation.
+ def insert!(attributes, returning: nil)
+ insert_all!([ attributes ], returning: returning)
+ end
+
+ # Inserts multiple records into the database. This method constructs a single SQL INSERT
+ # statement and sends it straight to the database. It does not instantiate the involved
+ # models and it does not trigger Active Record callbacks or validations. However, values
+ # passed to #insert_all! will still go through Active Record's normal type casting and
+ # serialization.
+ #
+ # The +attributes+ parameter is an Array of Hashes. These Hashes describe the
+ # attributes on the objects that are to be created. All of the Hashes must have
+ # same keys.
+ #
+ # #insert_all! will raise <tt>ActiveRecord::RecordNotUnique</tt> if any of the records being
+ # inserts would violate a unique constraint on the table. In that case, no records
+ # would be inserted.
+ #
+ # To skip duplicate records, see <tt>ActiveRecord::Persistence#insert_all</tt>.
+ # To replace them, see <tt>ActiveRecord::Persistence#upsert_all</tt>.
+ #
+ # Returns an <tt>ActiveRecord::Result</tt>. The contents of the result depend on the
+ # value of <tt>:returning</tt> (see below).
+ #
+ # ==== Options
+ #
+ # [:returning]
+ # (Postgres-only) An array of attributes that should be returned for all successfully
+ # inserted records. For databases that support <tt>INSERT ... RETURNING</tt>, this will default
+ # to returning the primary keys of the successfully inserted records. Pass
+ # <tt>returning: %w[ id name ]</tt> to return the id and name of every successfully inserted
+ # record or pass <tt>returning: false</tt> to omit the clause.
+ #
+ # ==== Examples
+ #
+ # # Insert multiple records
+ # Book.insert_all!([
+ # { title: 'Rework', author: 'David' },
+ # { title: 'Eloquent Ruby', author: 'Russ' }
+ # ])
+ #
+ # # raises ActiveRecord::RecordNotUnique beacuse 'Eloquent Ruby'
+ # # does not have a unique ID
+ # Book.insert_all!([
+ # { id: 1, title: 'Rework', author: 'David' },
+ # { id: 1, title: 'Eloquent Ruby', author: 'Russ' }
+ # ])
+ #
+ def insert_all!(attributes, returning: nil)
+ InsertAll.new(self, attributes, on_duplicate: :raise, returning: returning).execute
+ end
+
+ # Upserts (inserts-or-creates) a single record into the databases. This method constructs
+ # a single SQL INSERT statement and sends it straight to the database. It does not
+ # instantiate the involved models and it does not trigger Active Record callbacks or
+ # validations. However, values passed to #upsert will still go through Active Record's
+ # normal type casting and serialization.
+ #
+ # See <tt>ActiveRecord::Persistence#upsert_all</tt> for documentation.
+ def upsert(attributes, returning: nil, unique_by: nil)
+ upsert_all([ attributes ], returning: returning, unique_by: unique_by)
+ end
+
+ # Upserts (creates-or-updates) multiple records into the database. This method constructs
+ # a single SQL INSERT statement and sends it straight to the database. It does not
+ # instantiate the involved models and it does not trigger Active Record callbacks or
+ # validations. However, values passed to #upsert_all will still go through Active Record's
+ # normal type casting and serialization.
+ #
+ # The +attributes+ parameter is an Array of Hashes. These Hashes describe the
+ # attributes on the objects that are to be created. All of the Hashes must have
+ # same keys.
+ #
+ # Returns an <tt>ActiveRecord::Result</tt>. The contents of the result depend on the
+ # value of <tt>:returning</tt> (see below).
+ #
+ # ==== Options
+ #
+ # [:returning]
+ # (Postgres-only) An array of attributes that should be returned for all successfully
+ # inserted records. For databases that support <tt>INSERT ... RETURNING</tt>, this will default
+ # to returning the primary keys of the successfully inserted records. Pass
+ # <tt>returning: %w[ id name ]</tt> to return the id and name of every successfully inserted
+ # record or pass <tt>returning: false</tt> to omit the clause.
+ #
+ # [:unique_by]
+ # (Postgres and SQLite only) In a table with more than one unique constaint or index,
+ # new records may considered duplicates according to different criteria. For MySQL,
+ # an upsert will take place if a new record violates _any_ unique constraint. For
+ # Postgres and SQLite, new rows will replace existing rows when the new row has the
+ # same primary key as the existing row. By defining <tt>:unique_by</tt>, you can supply
+ # a different key for matching new records to existing ones than the primary key.
+ #
+ # (For example, if you have a unique index on the ISBN column and use that as
+ # the <tt>:unique_by</tt>, a new record with the same ISBN as an existing record
+ # will replace the existing record but a new record with the same primary key
+ # as an existing record will raise <tt>ActiveRecord::RecordNotUnique</tt>.)
+ #
+ # Indexes can be identified by an array of columns:
+ #
+ # unique_by: { columns: %w[ isbn ] }
+ #
+ # Partial indexes can be identified by an array of columns and a <tt>:where</tt> condition:
+ #
+ # unique_by: { columns: %w[ isbn ], where: "published_on IS NOT NULL" }
+ #
+ # ==== Examples
+ #
+ # # Insert multiple records, performing an upsert when records have duplicate ISBNs
+ # # ('Eloquent Ruby' will overwrite 'Rework' because its ISBN is duplicate)
+ # Book.upsert_all([
+ # { title: 'Rework', author: 'David', isbn: '1' },
+ # { title: 'Eloquent Ruby', author: 'Russ', isbn: '1' }
+ # ],
+ # unique_by: { columns: %w[ isbn ] })
+ #
+ def upsert_all(attributes, returning: nil, unique_by: nil)
+ InsertAll.new(self, attributes, on_duplicate: :update, returning: returning, unique_by: unique_by).execute
+ end
+
# Given an attributes hash, +instantiate+ returns a new instance of
# the appropriate class. Accepts only keys as strings.
#