aboutsummaryrefslogtreecommitdiffstats
path: root/activerecord/lib/active_record/relation
diff options
context:
space:
mode:
Diffstat (limited to 'activerecord/lib/active_record/relation')
-rw-r--r--activerecord/lib/active_record/relation/batches.rb98
-rw-r--r--activerecord/lib/active_record/relation/batches/batch_enumerator.rb67
2 files changed, 156 insertions, 9 deletions
diff --git a/activerecord/lib/active_record/relation/batches.rb b/activerecord/lib/active_record/relation/batches.rb
index e07580a563..beb8fa511c 100644
--- a/activerecord/lib/active_record/relation/batches.rb
+++ b/activerecord/lib/active_record/relation/batches.rb
@@ -1,3 +1,5 @@
+require "active_record/relation/batches/batch_enumerator"
+
module ActiveRecord
module Batches
# Looping through a collection of records from the database
@@ -122,24 +124,102 @@ module ActiveRecord
end
end
+ in_batches(of: batch_size, begin_at: begin_at, end_at: end_at, load: true) do |batch|
+ yield batch.to_a
+ end
+ end
+
+ # Yields ActiveRecord::Relation objects to work with a batch of records.
+ #
+ # Person.where("age > 21").in_batches do |relation|
+ # relation.delete_all
+ # sleep(10) # Throttle the delete queries
+ # end
+ #
+ # If you do not provide a block to #in_batches, it will return a
+ # BatchEnumerator which is enumerable.
+ #
+ # Person.in_batches.with_index do |relation, batch_index|
+ # puts "Processing relation ##{batch_index}"
+ # relation.each { |relation| relation.delete_all }
+ # end
+ #
+ # Examples of calling methods on the returned BatchEnumerator object:
+ #
+ # Person.in_batches.delete_all
+ # Person.in_batches.update_all(awesome: true)
+ # Person.in_batches.each_record(&:party_all_night!)
+ #
+ # ==== Options
+ # * <tt>:of</tt> - Specifies the size of the batch. Default to 1000.
+ # * <tt>:load</tt> - Specifies if the relation should be loaded. Default to false.
+ # * <tt>:begin_at</tt> - Specifies the primary key value to start from, inclusive of the value.
+ # * <tt>:end_at</tt> - Specifies the primary key value to end at, inclusive of the value.
+ #
+ # This is especially useful if you want to work with the
+ # ActiveRecord::Relation object instead of the array of records, or if
+ # you want multiple workers dealing with the same processing queue. You can
+ # make worker 1 handle all the records between id 0 and 10,000 and worker 2
+ # handle from 10,000 and beyond (by setting the +:begin_at+ and +:end_at+
+ # option on each worker).
+ #
+ # # Let's process the next 2000 records
+ # Person.in_batches(of: 2000, begin_at: 2000).update_all(awesome: true)
+ #
+ # An example of calling where query method on the relation:
+ #
+ # Person.in_batches.each do |relation|
+ # relation.update_all('age = age + 1')
+ # relation.where('age > 21').update_all(should_party: true)
+ # relation.where('age <= 21').delete_all
+ # end
+ #
+ # NOTE: If you are going to iterate through each record, you should call
+ # #each_record on the yielded BatchEnumerator:
+ #
+ # Person.in_batches.each_record(&:party_all_night!)
+ #
+ # NOTE: It's not possible to set the order. That is automatically set to
+ # ascending on the primary key ("id ASC") to make the batch ordering
+ # consistent. Therefore the primary key must be orderable, e.g an integer
+ # or a string.
+ #
+ # NOTE: You can't set the limit either, that's used to control the batch
+ # sizes.
+ def in_batches(of: 1000, begin_at: nil, end_at: nil, load: false)
+ relation = self
+ unless block_given?
+ return BatchEnumerator.new(of: of, begin_at: begin_at, end_at: end_at, relation: self)
+ end
+
if logger && (arel.orders.present? || arel.taken.present?)
logger.warn("Scoped order and limit are ignored, it's forced to be batch order and batch size")
end
- relation = relation.reorder(batch_order).limit(batch_size)
+ relation = relation.reorder(batch_order).limit(of)
relation = apply_limits(relation, begin_at, end_at)
- records = relation.to_a
+ batch_relation = relation
+
+ loop do
+ if load
+ records = batch_relation.to_a
+ ids = records.map(&:id)
+ yielded_relation = self.where(primary_key => ids)
+ yielded_relation.load_records(records)
+ else
+ ids = batch_relation.pluck(primary_key)
+ yielded_relation = self.where(primary_key => ids)
+ end
- while records.any?
- records_size = records.size
- primary_key_offset = records.last.id
- raise "Primary key not included in the custom select clause" unless primary_key_offset
+ break if ids.empty?
- yield records
+ primary_key_offset = ids.last
+ raise ArgumentError.new("Primary key not included in the custom select clause") unless primary_key_offset
- break if records_size < batch_size
+ yield yielded_relation
- records = relation.where(table[primary_key].gt(primary_key_offset)).to_a
+ break if ids.length < of
+ batch_relation = relation.where(table[primary_key].gt(primary_key_offset))
end
end
diff --git a/activerecord/lib/active_record/relation/batches/batch_enumerator.rb b/activerecord/lib/active_record/relation/batches/batch_enumerator.rb
new file mode 100644
index 0000000000..153aae9584
--- /dev/null
+++ b/activerecord/lib/active_record/relation/batches/batch_enumerator.rb
@@ -0,0 +1,67 @@
+module ActiveRecord
+ module Batches
+ class BatchEnumerator
+ include Enumerable
+
+ def initialize(of: 1000, begin_at: nil, end_at: nil, relation:) #:nodoc:
+ @of = of
+ @relation = relation
+ @begin_at = begin_at
+ @end_at = end_at
+ end
+
+ # Looping through a collection of records from the database (using the
+ # +all+ method, for example) is very inefficient since it will try to
+ # instantiate all the objects at once.
+ #
+ # In that case, batch processing methods allow you to work with the
+ # records in batches, thereby greatly reducing memory consumption.
+ #
+ # Person.in_batches.each_record do |person|
+ # person.do_awesome_stuff
+ # end
+ #
+ # Person.where("age > 21").in_batches(of: 10).each_record do |person|
+ # person.party_all_night!
+ # end
+ #
+ # If you do not provide a block to #each_record, it will return an Enumerator
+ # for chaining with other methods:
+ #
+ # Person.in_batches.each_record.with_index do |person, index|
+ # person.award_trophy(index + 1)
+ # end
+ def each_record
+ return to_enum(:each_record) unless block_given?
+
+ @relation.to_enum(:in_batches, of: @of, begin_at: @begin_at, end_at: @end_at, load: true).each do |relation|
+ relation.to_a.each { |record| yield record }
+ end
+ end
+
+ # Delegates #delete_all, #update_all, #destroy_all methods to each batch.
+ #
+ # People.in_batches.delete_all
+ # People.in_batches.destroy_all('age < 10')
+ # People.in_batches.update_all('age = age + 1')
+ [:delete_all, :update_all, :destroy_all].each do |method|
+ define_method(method) do |*args, &block|
+ @relation.to_enum(:in_batches, of: @of, begin_at: @begin_at, end_at: @end_at, load: false).each do |relation|
+ relation.send(method, *args, &block)
+ end
+ end
+ end
+
+ # Yields an ActiveRecord::Relation object for each batch of records.
+ #
+ # Person.in_batches.each do |relation|
+ # relation.update_all(awesome: true)
+ # end
+ def each
+ enum = @relation.to_enum(:in_batches, of: @of, begin_at: @begin_at, end_at: @end_at, load: false)
+ return enum.each { |relation| yield relation } if block_given?
+ enum
+ end
+ end
+ end
+end