diff options
Diffstat (limited to 'activerecord/lib/active_record/relation')
8 files changed, 184 insertions, 38 deletions
diff --git a/activerecord/lib/active_record/relation/batches.rb b/activerecord/lib/active_record/relation/batches.rb index e07580a563..beb8fa511c 100644 --- a/activerecord/lib/active_record/relation/batches.rb +++ b/activerecord/lib/active_record/relation/batches.rb @@ -1,3 +1,5 @@ +require "active_record/relation/batches/batch_enumerator" + module ActiveRecord module Batches # Looping through a collection of records from the database @@ -122,24 +124,102 @@ module ActiveRecord end end + in_batches(of: batch_size, begin_at: begin_at, end_at: end_at, load: true) do |batch| + yield batch.to_a + end + end + + # Yields ActiveRecord::Relation objects to work with a batch of records. + # + # Person.where("age > 21").in_batches do |relation| + # relation.delete_all + # sleep(10) # Throttle the delete queries + # end + # + # If you do not provide a block to #in_batches, it will return a + # BatchEnumerator which is enumerable. + # + # Person.in_batches.with_index do |relation, batch_index| + # puts "Processing relation ##{batch_index}" + # relation.each { |relation| relation.delete_all } + # end + # + # Examples of calling methods on the returned BatchEnumerator object: + # + # Person.in_batches.delete_all + # Person.in_batches.update_all(awesome: true) + # Person.in_batches.each_record(&:party_all_night!) + # + # ==== Options + # * <tt>:of</tt> - Specifies the size of the batch. Default to 1000. + # * <tt>:load</tt> - Specifies if the relation should be loaded. Default to false. + # * <tt>:begin_at</tt> - Specifies the primary key value to start from, inclusive of the value. + # * <tt>:end_at</tt> - Specifies the primary key value to end at, inclusive of the value. + # + # This is especially useful if you want to work with the + # ActiveRecord::Relation object instead of the array of records, or if + # you want multiple workers dealing with the same processing queue. You can + # make worker 1 handle all the records between id 0 and 10,000 and worker 2 + # handle from 10,000 and beyond (by setting the +:begin_at+ and +:end_at+ + # option on each worker). + # + # # Let's process the next 2000 records + # Person.in_batches(of: 2000, begin_at: 2000).update_all(awesome: true) + # + # An example of calling where query method on the relation: + # + # Person.in_batches.each do |relation| + # relation.update_all('age = age + 1') + # relation.where('age > 21').update_all(should_party: true) + # relation.where('age <= 21').delete_all + # end + # + # NOTE: If you are going to iterate through each record, you should call + # #each_record on the yielded BatchEnumerator: + # + # Person.in_batches.each_record(&:party_all_night!) + # + # NOTE: It's not possible to set the order. That is automatically set to + # ascending on the primary key ("id ASC") to make the batch ordering + # consistent. Therefore the primary key must be orderable, e.g an integer + # or a string. + # + # NOTE: You can't set the limit either, that's used to control the batch + # sizes. + def in_batches(of: 1000, begin_at: nil, end_at: nil, load: false) + relation = self + unless block_given? + return BatchEnumerator.new(of: of, begin_at: begin_at, end_at: end_at, relation: self) + end + if logger && (arel.orders.present? || arel.taken.present?) logger.warn("Scoped order and limit are ignored, it's forced to be batch order and batch size") end - relation = relation.reorder(batch_order).limit(batch_size) + relation = relation.reorder(batch_order).limit(of) relation = apply_limits(relation, begin_at, end_at) - records = relation.to_a + batch_relation = relation + + loop do + if load + records = batch_relation.to_a + ids = records.map(&:id) + yielded_relation = self.where(primary_key => ids) + yielded_relation.load_records(records) + else + ids = batch_relation.pluck(primary_key) + yielded_relation = self.where(primary_key => ids) + end - while records.any? - records_size = records.size - primary_key_offset = records.last.id - raise "Primary key not included in the custom select clause" unless primary_key_offset + break if ids.empty? - yield records + primary_key_offset = ids.last + raise ArgumentError.new("Primary key not included in the custom select clause") unless primary_key_offset - break if records_size < batch_size + yield yielded_relation - records = relation.where(table[primary_key].gt(primary_key_offset)).to_a + break if ids.length < of + batch_relation = relation.where(table[primary_key].gt(primary_key_offset)) end end diff --git a/activerecord/lib/active_record/relation/batches/batch_enumerator.rb b/activerecord/lib/active_record/relation/batches/batch_enumerator.rb new file mode 100644 index 0000000000..153aae9584 --- /dev/null +++ b/activerecord/lib/active_record/relation/batches/batch_enumerator.rb @@ -0,0 +1,67 @@ +module ActiveRecord + module Batches + class BatchEnumerator + include Enumerable + + def initialize(of: 1000, begin_at: nil, end_at: nil, relation:) #:nodoc: + @of = of + @relation = relation + @begin_at = begin_at + @end_at = end_at + end + + # Looping through a collection of records from the database (using the + # +all+ method, for example) is very inefficient since it will try to + # instantiate all the objects at once. + # + # In that case, batch processing methods allow you to work with the + # records in batches, thereby greatly reducing memory consumption. + # + # Person.in_batches.each_record do |person| + # person.do_awesome_stuff + # end + # + # Person.where("age > 21").in_batches(of: 10).each_record do |person| + # person.party_all_night! + # end + # + # If you do not provide a block to #each_record, it will return an Enumerator + # for chaining with other methods: + # + # Person.in_batches.each_record.with_index do |person, index| + # person.award_trophy(index + 1) + # end + def each_record + return to_enum(:each_record) unless block_given? + + @relation.to_enum(:in_batches, of: @of, begin_at: @begin_at, end_at: @end_at, load: true).each do |relation| + relation.to_a.each { |record| yield record } + end + end + + # Delegates #delete_all, #update_all, #destroy_all methods to each batch. + # + # People.in_batches.delete_all + # People.in_batches.destroy_all('age < 10') + # People.in_batches.update_all('age = age + 1') + [:delete_all, :update_all, :destroy_all].each do |method| + define_method(method) do |*args, &block| + @relation.to_enum(:in_batches, of: @of, begin_at: @begin_at, end_at: @end_at, load: false).each do |relation| + relation.send(method, *args, &block) + end + end + end + + # Yields an ActiveRecord::Relation object for each batch of records. + # + # Person.in_batches.each do |relation| + # relation.update_all(awesome: true) + # end + def each + enum = @relation.to_enum(:in_batches, of: @of, begin_at: @begin_at, end_at: @end_at, load: false) + return enum.each { |relation| yield relation } if block_given? + enum + end + end + end +end diff --git a/activerecord/lib/active_record/relation/calculations.rb b/activerecord/lib/active_record/relation/calculations.rb index 402b317d9c..0f6015fa93 100644 --- a/activerecord/lib/active_record/relation/calculations.rb +++ b/activerecord/lib/active_record/relation/calculations.rb @@ -71,6 +71,7 @@ module ActiveRecord # # Person.sum(:age) # => 4562 def sum(*args) + return super if block_given? calculate(:sum, *args) end @@ -138,7 +139,7 @@ module ActiveRecord # # SELECT people.id, people.name FROM people # # => [[1, 'David'], [2, 'Jeremy'], [3, 'Jose']] # - # Person.pluck('DISTINCT role') + # Person.distinct.pluck(:role) # # SELECT DISTINCT role FROM people # # => ['admin', 'member', 'guest'] # @@ -161,6 +162,10 @@ module ActiveRecord end end + if loaded? && (column_names - @klass.column_names).empty? + return @records.pluck(*column_names) + end + if has_include?(column_names.first) construct_relation_for_association_calculations.pluck(*column_names) else @@ -190,7 +195,8 @@ module ActiveRecord def perform_calculation(operation, column_name) operation = operation.to_s.downcase - # If #count is used with #distinct / #uniq it is considered distinct. (eg. relation.distinct.count) + # If #count is used with #distinct (i.e. `relation.distinct.count`) it is + # considered distinct. distinct = self.distinct_value if operation == "count" diff --git a/activerecord/lib/active_record/relation/delegation.rb b/activerecord/lib/active_record/relation/delegation.rb index 86f2c30168..d75ec72b1a 100644 --- a/activerecord/lib/active_record/relation/delegation.rb +++ b/activerecord/lib/active_record/relation/delegation.rb @@ -18,7 +18,7 @@ module ActiveRecord delegate = Class.new(klass) { include ClassSpecificRelation } - const_set klass.name.gsub('::', '_'), delegate + const_set klass.name.gsub('::'.freeze, '_'.freeze), delegate cache[klass] = delegate end end diff --git a/activerecord/lib/active_record/relation/finder_methods.rb b/activerecord/lib/active_record/relation/finder_methods.rb index 6020aa238f..009b2bad57 100644 --- a/activerecord/lib/active_record/relation/finder_methods.rb +++ b/activerecord/lib/active_record/relation/finder_methods.rb @@ -62,11 +62,8 @@ module ActiveRecord # Person.where(name: 'Spartacus', rating: 4).pluck(:field1, :field2) # # returns an Array of the required fields. def find(*args) - if block_given? - to_a.find(*args) { |*block_args| yield(*block_args) } - else - find_with_ids(*args) - end + return super if block_given? + find_with_ids(*args) end # Finds the first record matching the specified conditions. There @@ -88,7 +85,8 @@ module ActiveRecord def find_by!(arg, *args) where(arg, *args).take! rescue RangeError - raise RecordNotFound, "Couldn't find #{@klass.name} with an out of range value" + raise RecordNotFound.new("Couldn't find #{@klass.name} with an out of range value", + @klass.name) end # Gives a record (or N records if a parameter is supplied) without any implied diff --git a/activerecord/lib/active_record/relation/merger.rb b/activerecord/lib/active_record/relation/merger.rb index dd8f0aa298..0b38666ce9 100644 --- a/activerecord/lib/active_record/relation/merger.rb +++ b/activerecord/lib/active_record/relation/merger.rb @@ -87,8 +87,8 @@ module ActiveRecord return if other.preload_values.empty? && other.includes_values.empty? if other.klass == relation.klass - relation.preload! other.preload_values unless other.preload_values.empty? - relation.includes! other.includes_values unless other.includes_values.empty? + relation.preload!(*other.preload_values) unless other.preload_values.empty? + relation.includes!(other.includes_values) unless other.includes_values.empty? else reflection = relation.klass.reflect_on_all_associations.find do |r| r.class_name == other.klass.name diff --git a/activerecord/lib/active_record/relation/predicate_builder.rb b/activerecord/lib/active_record/relation/predicate_builder.rb index 43e9afe853..d26db7d4cf 100644 --- a/activerecord/lib/active_record/relation/predicate_builder.rb +++ b/activerecord/lib/active_record/relation/predicate_builder.rb @@ -52,7 +52,7 @@ module ActiveRecord key else key = key.to_s - key.split('.').first if key.include?('.') + key.split('.'.freeze).first if key.include?('.'.freeze) end end.compact end @@ -123,10 +123,10 @@ module ActiveRecord end def convert_dot_notation_to_hash(attributes) - dot_notation = attributes.keys.select { |s| s.include?(".") } + dot_notation = attributes.keys.select { |s| s.include?(".".freeze) } dot_notation.each do |key| - table_name, column_name = key.split(".") + table_name, column_name = key.split(".".freeze) value = attributes.delete(key) attributes[table_name] ||= {} diff --git a/activerecord/lib/active_record/relation/query_methods.rb b/activerecord/lib/active_record/relation/query_methods.rb index fd78db2e95..706c99c245 100644 --- a/activerecord/lib/active_record/relation/query_methods.rb +++ b/activerecord/lib/active_record/relation/query_methods.rb @@ -242,12 +242,9 @@ module ActiveRecord # Model.select(:field).first.other_field # # => ActiveModel::MissingAttributeError: missing attribute: other_field def select(*fields) - if block_given? - to_a.select { |*block_args| yield(*block_args) } - else - raise ArgumentError, 'Call this with at least one field' if fields.empty? - spawn._select!(*fields) - end + return super if block_given? + raise ArgumentError, 'Call this with at least one field' if fields.empty? + spawn._select!(*fields) end def _select!(*fields) # :nodoc: @@ -1001,15 +998,13 @@ module ActiveRecord end def arel_columns(columns) - if from_clause.value - columns - else - columns.map do |field| - if (Symbol === field || String === field) && columns_hash.key?(field.to_s) - arel_table[field] - else - field - end + columns.map do |field| + if (Symbol === field || String === field) && columns_hash.key?(field.to_s) && !from_clause.value + arel_table[field] + elsif Symbol === field + connection.quote_table_name(field.to_s) + else + field end end end |