From 2ea5c24085e39efb6c1bc6e7062bcc5f496d1fa6 Mon Sep 17 00:00:00 2001 From: Kasper Timm Hansen Date: Sat, 9 Mar 2019 01:15:17 +0100 Subject: Bulk Insert: Reuse indexes for unique_by I found `:unique_by` with `:columns` and `:where` inside it tough to grasp. The documentation only mentioned indexes and partial indexes. So why duplicate a model's indexes in an insert_all/upsert_all call when we can just look it up? This has the added benefit of raising if no index is found, such that people can't insert thousands of records without relying on an index of some form. --- activerecord/lib/active_record/insert_all.rb | 51 ++++++++++++--------- activerecord/lib/active_record/persistence.rb | 65 ++++++++++----------------- 2 files changed, 55 insertions(+), 61 deletions(-) (limited to 'activerecord/lib') diff --git a/activerecord/lib/active_record/insert_all.rb b/activerecord/lib/active_record/insert_all.rb index 98c98d61cd..d30aee7c00 100644 --- a/activerecord/lib/active_record/insert_all.rb +++ b/activerecord/lib/active_record/insert_all.rb @@ -14,6 +14,7 @@ module ActiveRecord @returning = (connection.supports_insert_returning? ? primary_keys : false) if @returning.nil? @returning = false if @returning == [] + @unique_by = find_unique_index_for(unique_by) if unique_by @on_duplicate = :skip if @on_duplicate == :update && updatable_columns.empty? ensure_valid_options_for_connection! @@ -27,6 +28,11 @@ module ActiveRecord keys - readonly_columns - unique_by_columns end + def primary_keys + Array(model.primary_key) + end + + def skip_duplicates? on_duplicate == :skip end @@ -47,6 +53,21 @@ module ActiveRecord end private + def find_unique_index_for(unique_by) + match = Array(unique_by).map(&:to_s) + + if index = unique_indexes.find { |i| match.include?(i.name) || i.columns == match } + index + else + raise ArgumentError, "No unique index found for #{unique_by}" + end + end + + def unique_indexes + connection.schema_cache.indexes(model.table_name).select(&:unique) + end + + def ensure_valid_options_for_connection! if returning && !connection.supports_insert_returning? raise ArgumentError, "#{connection.class} does not support :returning" @@ -69,21 +90,20 @@ module ActiveRecord end end + def to_sql connection.build_insert_sql(ActiveRecord::InsertAll::Builder.new(self)) end + def readonly_columns primary_keys + model.readonly_attributes.to_a end def unique_by_columns - unique_by ? unique_by.fetch(:columns).map(&:to_s) : [] + Array(unique_by&.columns) end - def primary_keys - Array.wrap(model.primary_key) - end def verify_attributes(attributes) if keys != attributes.keys.to_set @@ -121,10 +141,13 @@ module ActiveRecord end def conflict_target - return unless conflict_columns - sql = +"(#{quote_columns(conflict_columns).join(',')})" - sql << " WHERE #{where}" if where - sql + if index = insert_all.unique_by + sql = +"(#{quote_columns(index.columns).join(',')})" + sql << " WHERE #{index.where}" if index.where + sql + elsif update_duplicates? + "(#{quote_columns(insert_all.primary_keys).join(',')})" + end end def updatable_columns @@ -150,18 +173,6 @@ module ActiveRecord def quote_columns(columns) columns.map(&connection.method(:quote_column_name)) end - - def conflict_columns - @conflict_columns ||= begin - conflict_columns = insert_all.unique_by.fetch(:columns) if insert_all.unique_by - conflict_columns ||= Array.wrap(model.primary_key) if update_duplicates? - conflict_columns - end - end - - def where - insert_all.unique_by && insert_all.unique_by[:where] - end end end end diff --git a/activerecord/lib/active_record/persistence.rb b/activerecord/lib/active_record/persistence.rb index 0c31f0f57e..8b7c9f59fa 100644 --- a/activerecord/lib/active_record/persistence.rb +++ b/activerecord/lib/active_record/persistence.rb @@ -92,25 +92,21 @@ module ActiveRecord # returning: %w[ id name ] to return the id and name of every successfully inserted # record or pass returning: false to omit the clause. # - # [:unique_by] - # (Postgres and SQLite only) In a table with more than one unique constraint or index, + # [:unique_index] + # (Postgres and SQLite only) In a table with more than one unique constaint or index, # new records may be considered duplicates according to different criteria. By default, - # new rows will be skipped if they violate _any_ unique constraint or index. By defining - # :unique_by, you can skip rows that would create duplicates according to the given + # new rows will be skipped if they violate _any_ unique constraint/index. By passing + # :unique_index, you can skip rows that would create duplicates according to the given # constraint but raise ActiveRecord::RecordNotUnique if rows violate other constraints. # - # (For example, maybe you assume a client will try to import the same ISBNs more than + # For example, maybe you assume a client will try to import the same ISBNs more than # once and want to silently ignore the duplicate records, but you don't except any of # your code to attempt to create two rows with the same primary key and would appreciate - # an exception report in that scenario.) + # an exception instead. # - # Indexes can be identified by an array of columns: + # Indexes can be identified by name: # - # unique_by: { columns: %w[ isbn ] } - # - # Partial indexes can be identified by an array of columns and a :where condition: - # - # unique_by: { columns: %w[ isbn ], where: "published_on IS NOT NULL" } + # unique_index: :isbn # # ==== Example # @@ -120,7 +116,6 @@ module ActiveRecord # { id: 1, title: 'Rework', author: 'David' }, # { id: 1, title: 'Eloquent Ruby', author: 'Russ' } # ]) - # def insert_all(attributes, returning: nil, unique_by: nil) InsertAll.new(self, attributes, on_duplicate: :skip, returning: returning, unique_by: unique_by).execute end @@ -174,12 +169,11 @@ module ActiveRecord # ]) # # # Raises ActiveRecord::RecordNotUnique because 'Eloquent Ruby' - # # does not have a unique ID + # # does not have a unique id. # Book.insert_all!([ # { id: 1, title: 'Rework', author: 'David' }, # { id: 1, title: 'Eloquent Ruby', author: 'Russ' } # ]) - # def insert_all!(attributes, returning: nil) InsertAll.new(self, attributes, on_duplicate: :raise, returning: returning).execute end @@ -217,43 +211,32 @@ module ActiveRecord # returning: %w[ id name ] to return the id and name of every successfully inserted # record or pass returning: false to omit the clause. # - # [:unique_by] - # (Postgres and SQLite only) In a table with more than one unique constraint or index, + # [:unique_index] + # (Postgres and SQLite only) In a table with more than one unique constaint or index, # new records may be considered duplicates according to different criteria. For MySQL, # an upsert will take place if a new record violates _any_ unique constraint. For # Postgres and SQLite, new rows will replace existing rows when the new row has the - # same primary key as the existing row. In case of SQLite, an upsert operation causes - # an insert to behave as an update or a no-op if the insert would violate - # a uniqueness constraint. By defining :unique_by, you can supply - # a different unique constraint for matching new records to existing ones than the - # primary key. - # - # (For example, if you have a unique index on the ISBN column and use that as - # the :unique_by, a new record with the same ISBN as an existing record - # will replace the existing record but a new record with the same primary key - # as an existing record will raise ActiveRecord::RecordNotUnique.) - # - # Indexes can be identified by an array of columns: + # same primary key as the existing row. By passing :unique_index, you can supply + # a different index than the primary key one to match new records to existing ones. # - # unique_by: { columns: %w[ isbn ] } + # For example, if you have a unique index on the ISBN column and pass that as + # :unique_index, a new record with the same ISBN as an existing record + # will replace the existing record but a new record with the same primary key + # as an existing record will raise ActiveRecord::RecordNotUnique. # - # Partial indexes can be identified by an array of columns and a :where condition: + # Indexes can be identified by name: # - # unique_by: { columns: %w[ isbn ], where: "published_on IS NOT NULL" } + # unique_index: :isbn # # ==== Examples # - # # Given a unique index on books.isbn and the following record: - # Book.create!(title: 'Rework', author: 'David', isbn: '1') + # # Insert multiple records, performing an upsert when records have duplicate ISBNs. + # # Here 'Eloquent Ruby' will overwrite 'Rework' because its ISBN is duplicate. # - # # Insert multiple records, allowing new records with the same ISBN - # # as an existing record to overwrite the existing record. - # # ('Eloquent Ruby' will overwrite 'Rework' because its ISBN is duplicate) # Book.upsert_all([ - # { title: 'Eloquent Ruby', author: 'Russ', isbn: '1' }, - # { title: 'Clean Code', author: 'Robert', isbn: '2' } - # ], unique_by: { columns: %w[ isbn ] }) - # + # { title: 'Rework', author: 'David', isbn: '1' }, + # { title: 'Eloquent Ruby', author: 'Russ', isbn: '1' } + # ], unique_index: :isbn) def upsert_all(attributes, returning: nil, unique_by: nil) InsertAll.new(self, attributes, on_duplicate: :update, returning: returning, unique_by: unique_by).execute end -- cgit v1.2.3 From 60c29e1cc7416235c6fc45e0d936c36e58def21a Mon Sep 17 00:00:00 2001 From: Kasper Timm Hansen Date: Wed, 20 Mar 2019 13:27:51 +0100 Subject: [ci skip] Documentation pass for insert_all etc. --- activerecord/lib/active_record/persistence.rb | 212 +++++++++++++------------- 1 file changed, 107 insertions(+), 105 deletions(-) (limited to 'activerecord/lib') diff --git a/activerecord/lib/active_record/persistence.rb b/activerecord/lib/active_record/persistence.rb index 8b7c9f59fa..ba03a3773a 100644 --- a/activerecord/lib/active_record/persistence.rb +++ b/activerecord/lib/active_record/persistence.rb @@ -57,186 +57,188 @@ module ActiveRecord end end - # Inserts a single record into the database. This method constructs a single SQL INSERT - # statement and sends it straight to the database. It does not instantiate the involved - # models and it does not trigger Active Record callbacks or validations. However, values - # passed to #insert will still go through Active Record's normal type casting and - # serialization. + # Inserts a single record into the database in a single SQL INSERT + # statement. It does not instantiate any models nor does it trigger + # Active Record callbacks or validations. Though passed values + # go through Active Record's type casting and serialization. # # See ActiveRecord::Persistence#insert_all for documentation. def insert(attributes, returning: nil, unique_by: nil) insert_all([ attributes ], returning: returning, unique_by: unique_by) end - # Inserts multiple records into the database. This method constructs a single SQL INSERT - # statement and sends it straight to the database. It does not instantiate the involved - # models and it does not trigger Active Record callbacks or validations. However, values - # passed to #insert_all will still go through Active Record's normal type casting and - # serialization. + # Inserts multiple records into the database in a single SQL INSERT + # statement. It does not instantiate any models nor does it trigger + # Active Record callbacks or validations. Though passed values + # go through Active Record's type casting and serialization. # - # The +attributes+ parameter is an Array of Hashes. These Hashes describe the - # attributes on the objects that are to be created. All of the Hashes must have - # same keys. + # The +attributes+ parameter is an Array of Hashes. Every Hash determines + # the attributes for a single row and must have the same keys. # - # Records that would violate a unique constraint on the table are skipped. + # Rows are considered to be unique by every unique index on the table. Any + # duplicate rows are skipped. + # Override with :unique_by (see below). # - # Returns an ActiveRecord::Result. The contents of the result depend on the - # value of :returning (see below). + # Returns an ActiveRecord::Result with its contents based on + # :returning (see below). # # ==== Options # # [:returning] - # (Postgres-only) An array of attributes that should be returned for all successfully - # inserted records. For databases that support INSERT ... RETURNING, this will default - # to returning the primary keys of the successfully inserted records. Pass - # returning: %w[ id name ] to return the id and name of every successfully inserted - # record or pass returning: false to omit the clause. + # (Postgres-only) An array of attributes to return for all successfully + # inserted records, which by default is the primary key. + # Pass returning: %w[ id name ] for both id and name + # or returning: false to omit the underlying RETURNING SQL + # clause entirely. # - # [:unique_index] - # (Postgres and SQLite only) In a table with more than one unique constaint or index, - # new records may be considered duplicates according to different criteria. By default, - # new rows will be skipped if they violate _any_ unique constraint/index. By passing - # :unique_index, you can skip rows that would create duplicates according to the given - # constraint but raise ActiveRecord::RecordNotUnique if rows violate other constraints. + # [:unique_by] + # (Postgres and SQLite only) By default rows are considered to be unique + # by every unique index on the table. Any duplicate rows are skipped. # - # For example, maybe you assume a client will try to import the same ISBNs more than - # once and want to silently ignore the duplicate records, but you don't except any of - # your code to attempt to create two rows with the same primary key and would appreciate - # an exception instead. + # To skip rows according to just one unique index pass :unique_by. # - # Indexes can be identified by name: + # Consider a Book model where no duplicate ISBNs make sense, but if any + # row has an existing id, or is not unique by another unique index, + # ActiveRecord::RecordNotUnique is raised. # - # unique_index: :isbn + # Unique indexes can be identified by columns or name: + # + # unique_by: :isbn + # unique_by: %i[ author_id name ] + # unique_by: :index_books_on_isbn + # + # Because it relies on the index information from the database + # :unique_by is recommended to be paired with + # Active Record's schema_cache. # # ==== Example # - # # Insert multiple records and skip duplicates - # # ('Eloquent Ruby' will be skipped because its id is duplicate) + # # Insert records and skip inserting any duplicates. + # # Here "Eloquent Ruby" is skipped because its id is not unique. + # # Book.insert_all([ - # { id: 1, title: 'Rework', author: 'David' }, - # { id: 1, title: 'Eloquent Ruby', author: 'Russ' } + # { id: 1, title: "Rework", author: "David" }, + # { id: 1, title: "Eloquent Ruby", author: "Russ" } # ]) def insert_all(attributes, returning: nil, unique_by: nil) InsertAll.new(self, attributes, on_duplicate: :skip, returning: returning, unique_by: unique_by).execute end - # Inserts a single record into the database. This method constructs a single SQL INSERT - # statement and sends it straight to the database. It does not instantiate the involved - # models and it does not trigger Active Record callbacks or validations. However, values - # passed to #insert! will still go through Active Record's normal type casting and - # serialization. + # Inserts a single record into the database in a single SQL INSERT + # statement. It does not instantiate any models nor does it trigger + # Active Record callbacks or validations. Though passed values + # go through Active Record's type casting and serialization. # - # See ActiveRecord::Persistence#insert_all! for documentation. + # See ActiveRecord::Persistence#insert_all! for more. def insert!(attributes, returning: nil) insert_all!([ attributes ], returning: returning) end - # Inserts multiple records into the database. This method constructs a single SQL INSERT - # statement and sends it straight to the database. It does not instantiate the involved - # models and it does not trigger Active Record callbacks or validations. However, values - # passed to #insert_all! will still go through Active Record's normal type casting and - # serialization. + # Inserts multiple records into the database in a single SQL INSERT + # statement. It does not instantiate any models nor does it trigger + # Active Record callbacks or validations. Though passed values + # go through Active Record's type casting and serialization. # - # The +attributes+ parameter is an Array of Hashes. These Hashes describe the - # attributes on the objects that are to be created. All of the Hashes must have - # same keys. + # The +attributes+ parameter is an Array of Hashes. Every Hash determines + # the attributes for a single row and must have the same keys. # - # #insert_all! will raise ActiveRecord::RecordNotUnique if any of the records being - # inserts would violate a unique constraint on the table. In that case, no records - # would be inserted. + # Raises ActiveRecord::RecordNotUnique if any rows violate a + # unique index on the table. In that case, no rows are inserted. # - # To skip duplicate records, see ActiveRecord::Persistence#insert_all. + # To skip duplicate rows, see ActiveRecord::Persistence#insert_all. # To replace them, see ActiveRecord::Persistence#upsert_all. # - # Returns an ActiveRecord::Result. The contents of the result depend on the - # value of :returning (see below). + # Returns an ActiveRecord::Result with its contents based on + # :returning (see below). # # ==== Options # # [:returning] - # (Postgres-only) An array of attributes that should be returned for all successfully - # inserted records. For databases that support INSERT ... RETURNING, this will default - # to returning the primary keys of the successfully inserted records. Pass - # returning: %w[ id name ] to return the id and name of every successfully inserted - # record or pass returning: false to omit the clause. + # (Postgres-only) An array of attributes to return for all successfully + # inserted records, which by default is the primary key. + # Pass returning: %w[ id name ] for both id and name + # or returning: false to omit the underlying RETURNING SQL + # clause entirely. # # ==== Examples # # # Insert multiple records # Book.insert_all!([ - # { title: 'Rework', author: 'David' }, - # { title: 'Eloquent Ruby', author: 'Russ' } + # { title: "Rework", author: "David" }, + # { title: "Eloquent Ruby", author: "Russ" } # ]) # - # # Raises ActiveRecord::RecordNotUnique because 'Eloquent Ruby' + # # Raises ActiveRecord::RecordNotUnique because "Eloquent Ruby" # # does not have a unique id. # Book.insert_all!([ - # { id: 1, title: 'Rework', author: 'David' }, - # { id: 1, title: 'Eloquent Ruby', author: 'Russ' } + # { id: 1, title: "Rework", author: "David" }, + # { id: 1, title: "Eloquent Ruby", author: "Russ" } # ]) def insert_all!(attributes, returning: nil) InsertAll.new(self, attributes, on_duplicate: :raise, returning: returning).execute end - # Upserts (updates or inserts) a single record into the database. This method constructs - # a single SQL INSERT statement and sends it straight to the database. It does not - # instantiate the involved models and it does not trigger Active Record callbacks or - # validations. However, values passed to #upsert will still go through Active Record's - # normal type casting and serialization. + # Updates or inserts (upserts) multiple records into the database in a + # single SQL INSERT statement. It does not instantiate any models nor does + # it trigger Active Record callbacks or validations. Though passed values + # go through Active Record's type casting and serialization. # # See ActiveRecord::Persistence#upsert_all for documentation. def upsert(attributes, returning: nil, unique_by: nil) upsert_all([ attributes ], returning: returning, unique_by: unique_by) end - # Upserts (updates or inserts) multiple records into the database. This method constructs - # a single SQL INSERT statement and sends it straight to the database. It does not - # instantiate the involved models and it does not trigger Active Record callbacks or - # validations. However, values passed to #upsert_all will still go through Active Record's - # normal type casting and serialization. + # Updates or inserts (upserts) multiple records into the database in a + # single SQL INSERT statement. It does not instantiate any models nor does + # it trigger Active Record callbacks or validations. Though passed values + # go through Active Record's type casting and serialization. # - # The +attributes+ parameter is an Array of Hashes. These Hashes describe the - # attributes on the objects that are to be created. All of the Hashes must have - # same keys. + # The +attributes+ parameter is an Array of Hashes. Every Hash determines + # the attributes for a single row and must have the same keys. # - # Returns an ActiveRecord::Result. The contents of the result depend on the - # value of :returning (see below). + # Returns an ActiveRecord::Result with its contents based on + # :returning (see below). # # ==== Options # # [:returning] - # (Postgres-only) An array of attributes that should be returned for all successfully - # inserted records. For databases that support INSERT ... RETURNING, this will default - # to returning the primary keys of the successfully inserted records. Pass - # returning: %w[ id name ] to return the id and name of every successfully inserted - # record or pass returning: false to omit the clause. + # (Postgres-only) An array of attributes to return for all successfully + # inserted records, which by default is the primary key. + # Pass returning: %w[ id name ] for both id and name + # or returning: false to omit the underlying RETURNING SQL + # clause entirely. # - # [:unique_index] - # (Postgres and SQLite only) In a table with more than one unique constaint or index, - # new records may be considered duplicates according to different criteria. For MySQL, - # an upsert will take place if a new record violates _any_ unique constraint. For - # Postgres and SQLite, new rows will replace existing rows when the new row has the - # same primary key as the existing row. By passing :unique_index, you can supply - # a different index than the primary key one to match new records to existing ones. + # [:unique_by] + # (Postgres and SQLite only) By default rows are considered to be unique + # by every unique index on the table. Any duplicate rows are skipped. # - # For example, if you have a unique index on the ISBN column and pass that as - # :unique_index, a new record with the same ISBN as an existing record - # will replace the existing record but a new record with the same primary key - # as an existing record will raise ActiveRecord::RecordNotUnique. + # To skip rows according to just one unique index pass :unique_by. # - # Indexes can be identified by name: + # Consider a Book model where no duplicate ISBNs make sense, but if any + # row has an existing id, or is not unique by another unique index, + # ActiveRecord::RecordNotUnique is raised. # - # unique_index: :isbn + # Unique indexes can be identified by columns or name: + # + # unique_by: :isbn + # unique_by: %i[ author_id name ] + # unique_by: :index_books_on_isbn + # + # Because it relies on the index information from the database + # :unique_by is recommended to be paired with + # Active Record's schema_cache. # # ==== Examples # - # # Insert multiple records, performing an upsert when records have duplicate ISBNs. - # # Here 'Eloquent Ruby' will overwrite 'Rework' because its ISBN is duplicate. + # # Inserts multiple records, performing an upsert when records have duplicate ISBNs. + # # Here "Eloquent Ruby" overwrites "Rework" because its ISBN is duplicate. # # Book.upsert_all([ - # { title: 'Rework', author: 'David', isbn: '1' }, - # { title: 'Eloquent Ruby', author: 'Russ', isbn: '1' } - # ], unique_index: :isbn) + # { title: "Rework", author: "David", isbn: "1" }, + # { title: "Eloquent Ruby", author: "Russ", isbn: "1" } + # ], unique_by: :isbn) + # + # Book.find_by(isbn: "1").title # => "Eloquent Ruby" def upsert_all(attributes, returning: nil, unique_by: nil) InsertAll.new(self, attributes, on_duplicate: :update, returning: returning, unique_by: unique_by).execute end -- cgit v1.2.3