aboutsummaryrefslogtreecommitdiffstats
path: root/activerecord/lib/active_record/relation/batches.rb
diff options
context:
space:
mode:
authorSina Siadat <siadat@gmail.com>2015-07-18 20:44:13 +0430
committerSina Siadat <siadat@gmail.com>2015-08-07 10:26:38 +0430
commit25cee1f0373aa3b1d893413a959375480e0ac684 (patch)
treeafb44866a3b5f9411b02f6c91aa9c572e5beeafe /activerecord/lib/active_record/relation/batches.rb
parent4b91db5b125dd7bd735e7f42eb8e2c14c0e6757e (diff)
downloadrails-25cee1f0373aa3b1d893413a959375480e0ac684.tar.gz
rails-25cee1f0373aa3b1d893413a959375480e0ac684.tar.bz2
rails-25cee1f0373aa3b1d893413a959375480e0ac684.zip
Add ActiveRecord::Relation#in_batches
`in_batches` yields Relation objects if a block is given, otherwise it returns an instance of `BatchEnumerator`. The existing `find_each` and `find_in_batches` methods work with batches of records. The new API allows working with relation batches as well. Examples: Person.in_batches.each_record(&:party_all_night!) Person.in_batches.update_all(awesome: true) Person.in_batches.delete_all Person.in_batches.map do |relation| relation.delete_all sleep 10 # Throttles the delete queries end
Diffstat (limited to 'activerecord/lib/active_record/relation/batches.rb')
-rw-r--r--activerecord/lib/active_record/relation/batches.rb98
1 files changed, 89 insertions, 9 deletions
diff --git a/activerecord/lib/active_record/relation/batches.rb b/activerecord/lib/active_record/relation/batches.rb
index e07580a563..beb8fa511c 100644
--- a/activerecord/lib/active_record/relation/batches.rb
+++ b/activerecord/lib/active_record/relation/batches.rb
@@ -1,3 +1,5 @@
+require "active_record/relation/batches/batch_enumerator"
+
module ActiveRecord
module Batches
# Looping through a collection of records from the database
@@ -122,24 +124,102 @@ module ActiveRecord
end
end
+ in_batches(of: batch_size, begin_at: begin_at, end_at: end_at, load: true) do |batch|
+ yield batch.to_a
+ end
+ end
+
+ # Yields ActiveRecord::Relation objects to work with a batch of records.
+ #
+ # Person.where("age > 21").in_batches do |relation|
+ # relation.delete_all
+ # sleep(10) # Throttle the delete queries
+ # end
+ #
+ # If you do not provide a block to #in_batches, it will return a
+ # BatchEnumerator which is enumerable.
+ #
+ # Person.in_batches.with_index do |relation, batch_index|
+ # puts "Processing relation ##{batch_index}"
+ # relation.each { |relation| relation.delete_all }
+ # end
+ #
+ # Examples of calling methods on the returned BatchEnumerator object:
+ #
+ # Person.in_batches.delete_all
+ # Person.in_batches.update_all(awesome: true)
+ # Person.in_batches.each_record(&:party_all_night!)
+ #
+ # ==== Options
+ # * <tt>:of</tt> - Specifies the size of the batch. Default to 1000.
+ # * <tt>:load</tt> - Specifies if the relation should be loaded. Default to false.
+ # * <tt>:begin_at</tt> - Specifies the primary key value to start from, inclusive of the value.
+ # * <tt>:end_at</tt> - Specifies the primary key value to end at, inclusive of the value.
+ #
+ # This is especially useful if you want to work with the
+ # ActiveRecord::Relation object instead of the array of records, or if
+ # you want multiple workers dealing with the same processing queue. You can
+ # make worker 1 handle all the records between id 0 and 10,000 and worker 2
+ # handle from 10,000 and beyond (by setting the +:begin_at+ and +:end_at+
+ # option on each worker).
+ #
+ # # Let's process the next 2000 records
+ # Person.in_batches(of: 2000, begin_at: 2000).update_all(awesome: true)
+ #
+ # An example of calling where query method on the relation:
+ #
+ # Person.in_batches.each do |relation|
+ # relation.update_all('age = age + 1')
+ # relation.where('age > 21').update_all(should_party: true)
+ # relation.where('age <= 21').delete_all
+ # end
+ #
+ # NOTE: If you are going to iterate through each record, you should call
+ # #each_record on the yielded BatchEnumerator:
+ #
+ # Person.in_batches.each_record(&:party_all_night!)
+ #
+ # NOTE: It's not possible to set the order. That is automatically set to
+ # ascending on the primary key ("id ASC") to make the batch ordering
+ # consistent. Therefore the primary key must be orderable, e.g an integer
+ # or a string.
+ #
+ # NOTE: You can't set the limit either, that's used to control the batch
+ # sizes.
+ def in_batches(of: 1000, begin_at: nil, end_at: nil, load: false)
+ relation = self
+ unless block_given?
+ return BatchEnumerator.new(of: of, begin_at: begin_at, end_at: end_at, relation: self)
+ end
+
if logger && (arel.orders.present? || arel.taken.present?)
logger.warn("Scoped order and limit are ignored, it's forced to be batch order and batch size")
end
- relation = relation.reorder(batch_order).limit(batch_size)
+ relation = relation.reorder(batch_order).limit(of)
relation = apply_limits(relation, begin_at, end_at)
- records = relation.to_a
+ batch_relation = relation
+
+ loop do
+ if load
+ records = batch_relation.to_a
+ ids = records.map(&:id)
+ yielded_relation = self.where(primary_key => ids)
+ yielded_relation.load_records(records)
+ else
+ ids = batch_relation.pluck(primary_key)
+ yielded_relation = self.where(primary_key => ids)
+ end
- while records.any?
- records_size = records.size
- primary_key_offset = records.last.id
- raise "Primary key not included in the custom select clause" unless primary_key_offset
+ break if ids.empty?
- yield records
+ primary_key_offset = ids.last
+ raise ArgumentError.new("Primary key not included in the custom select clause") unless primary_key_offset
- break if records_size < batch_size
+ yield yielded_relation
- records = relation.where(table[primary_key].gt(primary_key_offset)).to_a
+ break if ids.length < of
+ batch_relation = relation.where(table[primary_key].gt(primary_key_offset))
end
end