Class: Gitlab::Database::BatchCounter

Inherits:
Object
  • Object
show all
Defined in:
lib/gitlab/database/batch_counter.rb

Constant Summary collapse

FALLBACK =
-1
MIN_REQUIRED_BATCH_SIZE =
1_250
DEFAULT_SUM_BATCH_SIZE =
1_000
MAX_ALLOWED_LOOPS =
10_000
SLEEP_TIME_IN_SECONDS =

10 msec sleep

0.01
ALLOWED_MODES =
[:itself, :distinct].freeze
FALLBACK_FINISH =
0
OFFSET_BY_ONE =
1
DEFAULT_DISTINCT_BATCH_SIZE =
10_000
DEFAULT_BATCH_SIZE =
100_000

Instance Method Summary collapse

Constructor Details

#initialize(relation, column: nil, operation: :count, operation_args: nil, max_allowed_loops: nil) ⇒ BatchCounter

Returns a new instance of BatchCounter.



19
20
21
22
23
24
25
# File 'lib/gitlab/database/batch_counter.rb', line 19

def initialize(relation, column: nil, operation: :count, operation_args: nil, max_allowed_loops: nil)
  @relation = relation
  @column = column || relation.primary_key
  @operation = operation
  @operation_args = operation_args
  @max_allowed_loops = max_allowed_loops || MAX_ALLOWED_LOOPS
end

Instance Method Details

#count(batch_size: nil, mode: :itself, start: nil, finish: nil) ⇒ Object



34
35
36
37
38
39
40
# File 'lib/gitlab/database/batch_counter.rb', line 34

def count(batch_size: nil, mode: :itself, start: nil, finish: nil)
  result = count_with_timeout(batch_size: batch_size, mode: mode, start: start, finish: finish, timeout: nil)

  return FALLBACK if result[:status] != :completed

  result[:count]
end

#count_with_timeout(batch_size: nil, mode: :itself, start: nil, finish: nil, timeout: nil, partial_results: nil) ⇒ Object



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/gitlab/database/batch_counter.rb', line 42

def count_with_timeout(batch_size: nil, mode: :itself, start: nil, finish: nil, timeout: nil, partial_results: nil)
  raise 'BatchCount can not be run inside a transaction' if transaction_open?

  check_mode!(mode)

  # non-distinct have better performance
  batch_size ||= batch_size_for_mode_and_operation(mode, @operation)

  start = actual_start(start)
  finish = actual_finish(finish)

  raise "Batch counting expects positive values only for #{@column}" if start < 0 || finish < 0
  return { status: :bad_config } if unwanted_configuration?(finish, batch_size, start)

  results = partial_results
  batch_start = start

  start_time = ::Gitlab::Metrics::System.monotonic_time.seconds

  while batch_start < finish

    # Timeout elapsed, return partial result so the caller can continue later
    if timeout && ::Gitlab::Metrics::System.monotonic_time.seconds - start_time > timeout
      return { status: :timeout, partial_results: results, continue_from: batch_start }
    end

    begin
      batch_end = [batch_start + batch_size, finish].min
      batch_relation = build_relation_batch(batch_start, batch_end, mode)

      results = merge_results(results, batch_relation.send(@operation, *@operation_args)) # rubocop:disable GitlabSecurity/PublicSend
      batch_start = batch_end
    rescue ActiveRecord::QueryCanceled => error
      # retry with a safe batch size & warmer cache
      if batch_size >= 2 * MIN_REQUIRED_BATCH_SIZE
        batch_size /= 2
      else
        log_canceled_batch_fetch(batch_start, mode, batch_relation.to_sql, error)
        return { status: :cancelled }
      end
    end

    sleep(SLEEP_TIME_IN_SECONDS)
  end

  { status: :completed, count: results }
end

#merge_results(results, object) ⇒ Object



94
95
96
97
98
99
100
101
102
# File 'lib/gitlab/database/batch_counter.rb', line 94

def merge_results(results, object)
  return object unless results

  if object.is_a?(Hash)
    results.merge!(object) { |_, a, b| a + b }
  else
    results + object
  end
end

#transaction_open?Boolean

Returns:

  • (Boolean)


90
91
92
# File 'lib/gitlab/database/batch_counter.rb', line 90

def transaction_open?
  @relation.connection.transaction_open?
end

#unwanted_configuration?(finish, batch_size, start) ⇒ Boolean

Returns:

  • (Boolean)


27
28
29
30
31
32
# File 'lib/gitlab/database/batch_counter.rb', line 27

def unwanted_configuration?(finish, batch_size, start)
  (@operation == :count && batch_size <= MIN_REQUIRED_BATCH_SIZE) ||
    (@operation == :sum && batch_size < DEFAULT_SUM_BATCH_SIZE) ||
    (finish - start) / batch_size >= @max_allowed_loops ||
    start >= finish
end