Class: EachBatch::BatchEnumerator

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/each_batch/batch_enumerator.rb

Constant Summary collapse

DEFAULT_BATCH_SIZE =
1000

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(relation, of: DEFAULT_BATCH_SIZE, load: false, order: :asc, keys: nil) ⇒ BatchEnumerator

Returns a new instance of BatchEnumerator.

Raises:

  • (ArgumentError)


13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/each_batch/batch_enumerator.rb', line 13

def initialize(relation, of: DEFAULT_BATCH_SIZE, load: false, order: :asc, keys: nil)
  raise ArgumentError, 'Batch size must be a positive integer' if of != of.to_i || of <= 0 
  
  order = order.to_s
  raise ArgumentError, 'Invalid order' if !order.casecmp('desc').zero? && !order.casecmp('asc').zero?
  
  pk_name = relation.primary_key.to_s
  keys = keys&.map(&:to_s) || [pk_name.to_s]

  # TODO: This is for safety, since there is no easy way to determine whether the order
  # is deterministic or not. PK guarantees that.
  raise ArgumentError, 'Primary key must be that last key' if keys.last != pk_name

  if relation.select_values.present? && (relation.select_values.map(&:to_s) & keys).to_set != keys.to_set
    raise ArgumentError, 'Not all keys are included in the custom select clause'
  end

  @relation = relation
  @of = of
  @load = load
  @order = order
  @keys = keys
end

Instance Attribute Details

#keysObject (readonly)

Returns the value of attribute keys.



11
12
13
# File 'lib/each_batch/batch_enumerator.rb', line 11

def keys
  @keys
end

#orderObject (readonly)

Returns the value of attribute order.



11
12
13
# File 'lib/each_batch/batch_enumerator.rb', line 11

def order
  @order
end

#relationObject (readonly)

Returns the value of attribute relation.



11
12
13
# File 'lib/each_batch/batch_enumerator.rb', line 11

def relation
  @relation
end

Instance Method Details

#batch_sizeObject



37
38
39
# File 'lib/each_batch/batch_enumerator.rb', line 37

def batch_size
  @of
end

#eachObject



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/each_batch/batch_enumerator.rb', line 41

def each
  return self unless block_given?

  batch_relation = relation.reorder(keys.product([order]).to_h).limit(batch_size)
  batch_relation.skip_query_cache! # Retaining the results in the query cache would undermine the point of batching

  yielded_relation = batch_relation
  op = order.to_s.casecmp('desc').zero? ? :lt : :gt
  pk = relation.primary_key.to_sym

  loop do
    # consistent with rails load behavior.
    if @load
      records = yielded_relation.records
      yielded_relation = relation.where(pk => records.map(&pk))
      yielded_relation.send(:load_records, records)
    end

    yield yielded_relation

    offsets =
      if @load || yielded_relation.loaded?
        break if yielded_relation.length < batch_size

        yielded_relation.last.attributes_before_type_cast&.values_at(*keys)
      else
        # we need an additional query to fetch the last key set
        offsets = yielded_relation.offset(batch_size - 1).limit(1).pluck(*keys).first

        break if offsets.nil?

        Array.wrap(offsets)
      end

    yielded_relation = batch_relation.where_row(*keys).public_send(op, *offsets)
  end
end

#each_record(&block) ⇒ Object



79
80
81
82
83
# File 'lib/each_batch/batch_enumerator.rb', line 79

def each_record(&block)
  return to_enum(:each_record) unless block_given?

  each { |yielded_relation| yielded_relation.to_a.each(&block) }
end

#pluck(*pluck_keys) {|x| ... } ⇒ EachBatch::PluckedBatchEnumerator

Pluck selected columns in batches. The batching is the one specified on the { BatchEnumerator } instance.

Parameters:

  • pluck_keys (Array<Symbol, String>)

    The keys of the columns to pluck.

Yield Parameters:

  • x (Array<Object>)

    The array of the plucked values

Returns:



94
95
96
97
98
99
100
101
102
103
104
105
106
# File 'lib/each_batch/batch_enumerator.rb', line 94

def pluck(*pluck_keys, &block)
  plucked_batch_enumerator = ::EachBatch::PluckedBatchEnumerator.new(
    relation,
    of: batch_size,
    order: order,
    keys: keys,
    pluck_keys: pluck_keys.map(&:to_s)
  )

  return plucked_batch_enumerator unless block_given?

  plucked_batch_enumerator.each(&block)
end