aboutsummaryrefslogtreecommitdiffstats
path: root/activerecord/lib/active_record
diff options
context:
space:
mode:
authorKasper Timm Hansen <kaspth@gmail.com>2019-03-20 18:11:31 +0100
committerGitHub <noreply@github.com>2019-03-20 18:11:31 +0100
commit6b5130720eee785b45dfc0fc5044224cdc2b6c98 (patch)
tree149970471bcb437ee8bf486c31ffd7dc7d90587f /activerecord/lib/active_record
parentd36991147863a95bea93506dc5411d00043790f5 (diff)
parent60c29e1cc7416235c6fc45e0d936c36e58def21a (diff)
downloadrails-6b5130720eee785b45dfc0fc5044224cdc2b6c98.tar.gz
rails-6b5130720eee785b45dfc0fc5044224cdc2b6c98.tar.bz2
rails-6b5130720eee785b45dfc0fc5044224cdc2b6c98.zip
Merge pull request #35546 from rails/bulk-inserts-with-index
Bulk Insert: Reuse indexes for unique_by
Diffstat (limited to 'activerecord/lib/active_record')
-rw-r--r--activerecord/lib/active_record/insert_all.rb51
-rw-r--r--activerecord/lib/active_record/persistence.rb217
2 files changed, 132 insertions, 136 deletions
diff --git a/activerecord/lib/active_record/insert_all.rb b/activerecord/lib/active_record/insert_all.rb
index 98c98d61cd..d30aee7c00 100644
--- a/activerecord/lib/active_record/insert_all.rb
+++ b/activerecord/lib/active_record/insert_all.rb
@@ -14,6 +14,7 @@ module ActiveRecord
@returning = (connection.supports_insert_returning? ? primary_keys : false) if @returning.nil?
@returning = false if @returning == []
+ @unique_by = find_unique_index_for(unique_by) if unique_by
@on_duplicate = :skip if @on_duplicate == :update && updatable_columns.empty?
ensure_valid_options_for_connection!
@@ -27,6 +28,11 @@ module ActiveRecord
keys - readonly_columns - unique_by_columns
end
+ def primary_keys
+ Array(model.primary_key)
+ end
+
+
def skip_duplicates?
on_duplicate == :skip
end
@@ -47,6 +53,21 @@ module ActiveRecord
end
private
+ def find_unique_index_for(unique_by)
+ match = Array(unique_by).map(&:to_s)
+
+ if index = unique_indexes.find { |i| match.include?(i.name) || i.columns == match }
+ index
+ else
+ raise ArgumentError, "No unique index found for #{unique_by}"
+ end
+ end
+
+ def unique_indexes
+ connection.schema_cache.indexes(model.table_name).select(&:unique)
+ end
+
+
def ensure_valid_options_for_connection!
if returning && !connection.supports_insert_returning?
raise ArgumentError, "#{connection.class} does not support :returning"
@@ -69,21 +90,20 @@ module ActiveRecord
end
end
+
def to_sql
connection.build_insert_sql(ActiveRecord::InsertAll::Builder.new(self))
end
+
def readonly_columns
primary_keys + model.readonly_attributes.to_a
end
def unique_by_columns
- unique_by ? unique_by.fetch(:columns).map(&:to_s) : []
+ Array(unique_by&.columns)
end
- def primary_keys
- Array.wrap(model.primary_key)
- end
def verify_attributes(attributes)
if keys != attributes.keys.to_set
@@ -121,10 +141,13 @@ module ActiveRecord
end
def conflict_target
- return unless conflict_columns
- sql = +"(#{quote_columns(conflict_columns).join(',')})"
- sql << " WHERE #{where}" if where
- sql
+ if index = insert_all.unique_by
+ sql = +"(#{quote_columns(index.columns).join(',')})"
+ sql << " WHERE #{index.where}" if index.where
+ sql
+ elsif update_duplicates?
+ "(#{quote_columns(insert_all.primary_keys).join(',')})"
+ end
end
def updatable_columns
@@ -150,18 +173,6 @@ module ActiveRecord
def quote_columns(columns)
columns.map(&connection.method(:quote_column_name))
end
-
- def conflict_columns
- @conflict_columns ||= begin
- conflict_columns = insert_all.unique_by.fetch(:columns) if insert_all.unique_by
- conflict_columns ||= Array.wrap(model.primary_key) if update_duplicates?
- conflict_columns
- end
- end
-
- def where
- insert_all.unique_by && insert_all.unique_by[:where]
- end
end
end
end
diff --git a/activerecord/lib/active_record/persistence.rb b/activerecord/lib/active_record/persistence.rb
index 0c31f0f57e..ba03a3773a 100644
--- a/activerecord/lib/active_record/persistence.rb
+++ b/activerecord/lib/active_record/persistence.rb
@@ -57,203 +57,188 @@ module ActiveRecord
end
end
- # Inserts a single record into the database. This method constructs a single SQL INSERT
- # statement and sends it straight to the database. It does not instantiate the involved
- # models and it does not trigger Active Record callbacks or validations. However, values
- # passed to #insert will still go through Active Record's normal type casting and
- # serialization.
+ # Inserts a single record into the database in a single SQL INSERT
+ # statement. It does not instantiate any models nor does it trigger
+ # Active Record callbacks or validations. Though passed values
+ # go through Active Record's type casting and serialization.
#
# See <tt>ActiveRecord::Persistence#insert_all</tt> for documentation.
def insert(attributes, returning: nil, unique_by: nil)
insert_all([ attributes ], returning: returning, unique_by: unique_by)
end
- # Inserts multiple records into the database. This method constructs a single SQL INSERT
- # statement and sends it straight to the database. It does not instantiate the involved
- # models and it does not trigger Active Record callbacks or validations. However, values
- # passed to #insert_all will still go through Active Record's normal type casting and
- # serialization.
+ # Inserts multiple records into the database in a single SQL INSERT
+ # statement. It does not instantiate any models nor does it trigger
+ # Active Record callbacks or validations. Though passed values
+ # go through Active Record's type casting and serialization.
#
- # The +attributes+ parameter is an Array of Hashes. These Hashes describe the
- # attributes on the objects that are to be created. All of the Hashes must have
- # same keys.
+ # The +attributes+ parameter is an Array of Hashes. Every Hash determines
+ # the attributes for a single row and must have the same keys.
#
- # Records that would violate a unique constraint on the table are skipped.
+ # Rows are considered to be unique by every unique index on the table. Any
+ # duplicate rows are skipped.
+ # Override with <tt>:unique_by</tt> (see below).
#
- # Returns an <tt>ActiveRecord::Result</tt>. The contents of the result depend on the
- # value of <tt>:returning</tt> (see below).
+ # Returns an <tt>ActiveRecord::Result</tt> with its contents based on
+ # <tt>:returning</tt> (see below).
#
# ==== Options
#
# [:returning]
- # (Postgres-only) An array of attributes that should be returned for all successfully
- # inserted records. For databases that support <tt>INSERT ... RETURNING</tt>, this will default
- # to returning the primary keys of the successfully inserted records. Pass
- # <tt>returning: %w[ id name ]</tt> to return the id and name of every successfully inserted
- # record or pass <tt>returning: false</tt> to omit the clause.
+ # (Postgres-only) An array of attributes to return for all successfully
+ # inserted records, which by default is the primary key.
+ # Pass <tt>returning: %w[ id name ]</tt> for both id and name
+ # or <tt>returning: false</tt> to omit the underlying RETURNING SQL
+ # clause entirely.
#
# [:unique_by]
- # (Postgres and SQLite only) In a table with more than one unique constraint or index,
- # new records may be considered duplicates according to different criteria. By default,
- # new rows will be skipped if they violate _any_ unique constraint or index. By defining
- # <tt>:unique_by</tt>, you can skip rows that would create duplicates according to the given
- # constraint but raise <tt>ActiveRecord::RecordNotUnique</tt> if rows violate other constraints.
+ # (Postgres and SQLite only) By default rows are considered to be unique
+ # by every unique index on the table. Any duplicate rows are skipped.
#
- # (For example, maybe you assume a client will try to import the same ISBNs more than
- # once and want to silently ignore the duplicate records, but you don't except any of
- # your code to attempt to create two rows with the same primary key and would appreciate
- # an exception report in that scenario.)
+ # To skip rows according to just one unique index pass <tt>:unique_by</tt>.
#
- # Indexes can be identified by an array of columns:
+ # Consider a Book model where no duplicate ISBNs make sense, but if any
+ # row has an existing id, or is not unique by another unique index,
+ # <tt>ActiveRecord::RecordNotUnique</tt> is raised.
#
- # unique_by: { columns: %w[ isbn ] }
+ # Unique indexes can be identified by columns or name:
#
- # Partial indexes can be identified by an array of columns and a <tt>:where</tt> condition:
+ # unique_by: :isbn
+ # unique_by: %i[ author_id name ]
+ # unique_by: :index_books_on_isbn
#
- # unique_by: { columns: %w[ isbn ], where: "published_on IS NOT NULL" }
+ # Because it relies on the index information from the database
+ # <tt>:unique_by</tt> is recommended to be paired with
+ # Active Record's schema_cache.
#
# ==== Example
#
- # # Insert multiple records and skip duplicates
- # # ('Eloquent Ruby' will be skipped because its id is duplicate)
+ # # Insert records and skip inserting any duplicates.
+ # # Here "Eloquent Ruby" is skipped because its id is not unique.
+ #
# Book.insert_all([
- # { id: 1, title: 'Rework', author: 'David' },
- # { id: 1, title: 'Eloquent Ruby', author: 'Russ' }
+ # { id: 1, title: "Rework", author: "David" },
+ # { id: 1, title: "Eloquent Ruby", author: "Russ" }
# ])
- #
def insert_all(attributes, returning: nil, unique_by: nil)
InsertAll.new(self, attributes, on_duplicate: :skip, returning: returning, unique_by: unique_by).execute
end
- # Inserts a single record into the database. This method constructs a single SQL INSERT
- # statement and sends it straight to the database. It does not instantiate the involved
- # models and it does not trigger Active Record callbacks or validations. However, values
- # passed to #insert! will still go through Active Record's normal type casting and
- # serialization.
+ # Inserts a single record into the database in a single SQL INSERT
+ # statement. It does not instantiate any models nor does it trigger
+ # Active Record callbacks or validations. Though passed values
+ # go through Active Record's type casting and serialization.
#
- # See <tt>ActiveRecord::Persistence#insert_all!</tt> for documentation.
+ # See <tt>ActiveRecord::Persistence#insert_all!</tt> for more.
def insert!(attributes, returning: nil)
insert_all!([ attributes ], returning: returning)
end
- # Inserts multiple records into the database. This method constructs a single SQL INSERT
- # statement and sends it straight to the database. It does not instantiate the involved
- # models and it does not trigger Active Record callbacks or validations. However, values
- # passed to #insert_all! will still go through Active Record's normal type casting and
- # serialization.
+ # Inserts multiple records into the database in a single SQL INSERT
+ # statement. It does not instantiate any models nor does it trigger
+ # Active Record callbacks or validations. Though passed values
+ # go through Active Record's type casting and serialization.
#
- # The +attributes+ parameter is an Array of Hashes. These Hashes describe the
- # attributes on the objects that are to be created. All of the Hashes must have
- # same keys.
+ # The +attributes+ parameter is an Array of Hashes. Every Hash determines
+ # the attributes for a single row and must have the same keys.
#
- # #insert_all! will raise <tt>ActiveRecord::RecordNotUnique</tt> if any of the records being
- # inserts would violate a unique constraint on the table. In that case, no records
- # would be inserted.
+ # Raises <tt>ActiveRecord::RecordNotUnique</tt> if any rows violate a
+ # unique index on the table. In that case, no rows are inserted.
#
- # To skip duplicate records, see <tt>ActiveRecord::Persistence#insert_all</tt>.
+ # To skip duplicate rows, see <tt>ActiveRecord::Persistence#insert_all</tt>.
# To replace them, see <tt>ActiveRecord::Persistence#upsert_all</tt>.
#
- # Returns an <tt>ActiveRecord::Result</tt>. The contents of the result depend on the
- # value of <tt>:returning</tt> (see below).
+ # Returns an <tt>ActiveRecord::Result</tt> with its contents based on
+ # <tt>:returning</tt> (see below).
#
# ==== Options
#
# [:returning]
- # (Postgres-only) An array of attributes that should be returned for all successfully
- # inserted records. For databases that support <tt>INSERT ... RETURNING</tt>, this will default
- # to returning the primary keys of the successfully inserted records. Pass
- # <tt>returning: %w[ id name ]</tt> to return the id and name of every successfully inserted
- # record or pass <tt>returning: false</tt> to omit the clause.
+ # (Postgres-only) An array of attributes to return for all successfully
+ # inserted records, which by default is the primary key.
+ # Pass <tt>returning: %w[ id name ]</tt> for both id and name
+ # or <tt>returning: false</tt> to omit the underlying RETURNING SQL
+ # clause entirely.
#
# ==== Examples
#
# # Insert multiple records
# Book.insert_all!([
- # { title: 'Rework', author: 'David' },
- # { title: 'Eloquent Ruby', author: 'Russ' }
+ # { title: "Rework", author: "David" },
+ # { title: "Eloquent Ruby", author: "Russ" }
# ])
#
- # # Raises ActiveRecord::RecordNotUnique because 'Eloquent Ruby'
- # # does not have a unique ID
+ # # Raises ActiveRecord::RecordNotUnique because "Eloquent Ruby"
+ # # does not have a unique id.
# Book.insert_all!([
- # { id: 1, title: 'Rework', author: 'David' },
- # { id: 1, title: 'Eloquent Ruby', author: 'Russ' }
+ # { id: 1, title: "Rework", author: "David" },
+ # { id: 1, title: "Eloquent Ruby", author: "Russ" }
# ])
- #
def insert_all!(attributes, returning: nil)
InsertAll.new(self, attributes, on_duplicate: :raise, returning: returning).execute
end
- # Upserts (updates or inserts) a single record into the database. This method constructs
- # a single SQL INSERT statement and sends it straight to the database. It does not
- # instantiate the involved models and it does not trigger Active Record callbacks or
- # validations. However, values passed to #upsert will still go through Active Record's
- # normal type casting and serialization.
+ # Updates or inserts (upserts) multiple records into the database in a
+ # single SQL INSERT statement. It does not instantiate any models nor does
+ # it trigger Active Record callbacks or validations. Though passed values
+ # go through Active Record's type casting and serialization.
#
# See <tt>ActiveRecord::Persistence#upsert_all</tt> for documentation.
def upsert(attributes, returning: nil, unique_by: nil)
upsert_all([ attributes ], returning: returning, unique_by: unique_by)
end
- # Upserts (updates or inserts) multiple records into the database. This method constructs
- # a single SQL INSERT statement and sends it straight to the database. It does not
- # instantiate the involved models and it does not trigger Active Record callbacks or
- # validations. However, values passed to #upsert_all will still go through Active Record's
- # normal type casting and serialization.
+ # Updates or inserts (upserts) multiple records into the database in a
+ # single SQL INSERT statement. It does not instantiate any models nor does
+ # it trigger Active Record callbacks or validations. Though passed values
+ # go through Active Record's type casting and serialization.
#
- # The +attributes+ parameter is an Array of Hashes. These Hashes describe the
- # attributes on the objects that are to be created. All of the Hashes must have
- # same keys.
+ # The +attributes+ parameter is an Array of Hashes. Every Hash determines
+ # the attributes for a single row and must have the same keys.
#
- # Returns an <tt>ActiveRecord::Result</tt>. The contents of the result depend on the
- # value of <tt>:returning</tt> (see below).
+ # Returns an <tt>ActiveRecord::Result</tt> with its contents based on
+ # <tt>:returning</tt> (see below).
#
# ==== Options
#
# [:returning]
- # (Postgres-only) An array of attributes that should be returned for all successfully
- # inserted records. For databases that support <tt>INSERT ... RETURNING</tt>, this will default
- # to returning the primary keys of the successfully inserted records. Pass
- # <tt>returning: %w[ id name ]</tt> to return the id and name of every successfully inserted
- # record or pass <tt>returning: false</tt> to omit the clause.
+ # (Postgres-only) An array of attributes to return for all successfully
+ # inserted records, which by default is the primary key.
+ # Pass <tt>returning: %w[ id name ]</tt> for both id and name
+ # or <tt>returning: false</tt> to omit the underlying RETURNING SQL
+ # clause entirely.
#
# [:unique_by]
- # (Postgres and SQLite only) In a table with more than one unique constraint or index,
- # new records may be considered duplicates according to different criteria. For MySQL,
- # an upsert will take place if a new record violates _any_ unique constraint. For
- # Postgres and SQLite, new rows will replace existing rows when the new row has the
- # same primary key as the existing row. In case of SQLite, an upsert operation causes
- # an insert to behave as an update or a no-op if the insert would violate
- # a uniqueness constraint. By defining <tt>:unique_by</tt>, you can supply
- # a different unique constraint for matching new records to existing ones than the
- # primary key.
+ # (Postgres and SQLite only) By default rows are considered to be unique
+ # by every unique index on the table. Any duplicate rows are skipped.
#
- # (For example, if you have a unique index on the ISBN column and use that as
- # the <tt>:unique_by</tt>, a new record with the same ISBN as an existing record
- # will replace the existing record but a new record with the same primary key
- # as an existing record will raise <tt>ActiveRecord::RecordNotUnique</tt>.)
+ # To skip rows according to just one unique index pass <tt>:unique_by</tt>.
#
- # Indexes can be identified by an array of columns:
+ # Consider a Book model where no duplicate ISBNs make sense, but if any
+ # row has an existing id, or is not unique by another unique index,
+ # <tt>ActiveRecord::RecordNotUnique</tt> is raised.
#
- # unique_by: { columns: %w[ isbn ] }
+ # Unique indexes can be identified by columns or name:
#
- # Partial indexes can be identified by an array of columns and a <tt>:where</tt> condition:
+ # unique_by: :isbn
+ # unique_by: %i[ author_id name ]
+ # unique_by: :index_books_on_isbn
#
- # unique_by: { columns: %w[ isbn ], where: "published_on IS NOT NULL" }
+ # Because it relies on the index information from the database
+ # <tt>:unique_by</tt> is recommended to be paired with
+ # Active Record's schema_cache.
#
# ==== Examples
#
- # # Given a unique index on <tt>books.isbn</tt> and the following record:
- # Book.create!(title: 'Rework', author: 'David', isbn: '1')
+ # # Inserts multiple records, performing an upsert when records have duplicate ISBNs.
+ # # Here "Eloquent Ruby" overwrites "Rework" because its ISBN is duplicate.
#
- # # Insert multiple records, allowing new records with the same ISBN
- # # as an existing record to overwrite the existing record.
- # # ('Eloquent Ruby' will overwrite 'Rework' because its ISBN is duplicate)
# Book.upsert_all([
- # { title: 'Eloquent Ruby', author: 'Russ', isbn: '1' },
- # { title: 'Clean Code', author: 'Robert', isbn: '2' }
- # ], unique_by: { columns: %w[ isbn ] })
+ # { title: "Rework", author: "David", isbn: "1" },
+ # { title: "Eloquent Ruby", author: "Russ", isbn: "1" }
+ # ], unique_by: :isbn)
#
+ # Book.find_by(isbn: "1").title # => "Eloquent Ruby"
def upsert_all(attributes, returning: nil, unique_by: nil)
InsertAll.new(self, attributes, on_duplicate: :update, returning: returning, unique_by: unique_by).execute
end