Class: Gitlab::Database::Migrations::TestBatchedBackgroundRunner

Inherits:
BaseBackgroundRunner show all
Includes:
DynamicModelHelpers
Defined in:
lib/gitlab/database/migrations/test_batched_background_runner.rb

Constant Summary collapse

MIGRATION_DETAILS_FILE_NAME =
'details.json'

Constants included from DynamicModelHelpers

DynamicModelHelpers::BATCH_SIZE

Instance Attribute Summary

Attributes inherited from BaseBackgroundRunner

#connection, #result_dir

Instance Method Summary collapse

Methods included from DynamicModelHelpers

#define_batchable_model, #each_batch, #each_batch_range

Methods inherited from BaseBackgroundRunner

#run_jobs

Constructor Details

#initialize(result_dir:, connection:, from_id:) ⇒ TestBatchedBackgroundRunner

Returns a new instance of TestBatchedBackgroundRunner.



11
12
13
14
15
# File 'lib/gitlab/database/migrations/test_batched_background_runner.rb', line 11

def initialize(result_dir:, connection:, from_id:)
  super(result_dir: result_dir, connection: connection)
  @connection = connection
  @from_id = from_id
end

Instance Method Details

#jobs_by_migration_nameObject



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/gitlab/database/migrations/test_batched_background_runner.rb', line 17

def jobs_by_migration_name
  set_shared_model_connection do
    Gitlab::Database::BackgroundMigration::BatchedMigration
      .executable
      .where('id > ?', from_id)
      .to_h do |migration|
      batching_strategy = migration.batch_class.new(connection: connection)

      smallest_batch_start = migration.next_min_value

      table_max_value = define_batchable_model(migration.table_name, connection: connection)
                          .maximum(migration.column_name)

      largest_batch_start = [table_max_value - migration.batch_size, smallest_batch_start].max

      # variance is the portion of the batch range that we shrink between variance * 0 and variance * 1
      # to pick actual batches to sample.
      variance = largest_batch_start - smallest_batch_start

      batch_starts = uniform_fractions
                       .lazy # frac varies from 0 to 1, values in smallest_batch_start..largest_batch_start
                       .map { |frac| (variance * frac).to_i + smallest_batch_start }

      # Track previously run batches so that we stop sampling if a new batch would intersect an older one
      completed_batches = []

      jobs_to_sample = batch_starts
                         # Stop sampling if a batch would intersect a previous batch
                         .take_while { |start| completed_batches.none? { |batch| batch.cover?(start) } }
                         .map do |batch_start|
        # The current block is lazily evaluated as part of the jobs_to_sample enumerable
        # so it executes after the enclosing using_connection block has already executed
        # Therefore we need to re-associate with the explicit connection again
        Gitlab::Database::SharedModel.using_connection(connection) do
          next_bounds = batching_strategy.next_batch(
            migration.table_name,
            migration.column_name,
            batch_min_value: batch_start,
            batch_size: migration.batch_size,
            job_class: migration.job_class,
            job_arguments: migration.job_arguments
          )

          # If no rows match, the next_bounds are nil.
          # This will only happen if there are zero rows to match from the current sampling point to the end
          # of the table
          # Simulate the approach in the actual background migration worker by not sampling a batch
          # from this range.
          # (The actual worker would finish the migration, but we may find batches that can be sampled elsewhere
          # in the table)
          if next_bounds.nil?
            # If the migration has no work to do across the entire table, sampling can get stuck
            # in a loop if we don't mark the attempted batches as completed
            completed_batches << (batch_start..(batch_start + migration.batch_size))
            next
          end

          batch_min, batch_max = next_bounds

          job = migration.create_batched_job!(batch_min, batch_max)

          completed_batches << (batch_min..batch_max)

          job
        end
      end.reject(&:nil?) # Remove skipped batches from the lazy list of batches to test

      job_class_name = migration.job_class_name

      export_migration_details(job_class_name, migration.slice(:interval, :total_tuple_count, :max_batch_size))

      [job_class_name, jobs_to_sample]
    end
  end
end

#run_job(job) ⇒ Object



93
94
95
96
97
# File 'lib/gitlab/database/migrations/test_batched_background_runner.rb', line 93

def run_job(job)
  set_shared_model_connection do
    Gitlab::Database::BackgroundMigration::BatchedMigrationWrapper.new(connection: connection).perform(job)
  end
end

#uniform_fractionsObject



99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
# File 'lib/gitlab/database/migrations/test_batched_background_runner.rb', line 99

def uniform_fractions
  Enumerator.new do |y|
    # Generates equally distributed fractions between 0 and 1, with increasing detail as more are pulled from
    # the enumerator.
    # 0, 1 (special case)
    # 1/2
    # 1/4, 3/4
    # 1/8, 3/8, 5/8, 7/8
    # etc.
    # The pattern here is at each outer loop, the denominator multiplies by 2, and at each inner loop,
    # the numerator counts up all odd numbers 1 <= n < denominator.
    y << 0
    y << 1

    # denominators are each increasing power of 2
    denominators = (1..).lazy.map { |exponent| 2**exponent }

    denominators.each do |denominator|
      # Numerators at the current step are all odd numbers between 1 and the denominator
      numerators = (1..denominator).step(2)

      numerators.each do |numerator|
        next_frac = numerator.fdiv(denominator)
        y << next_frac
      end
    end
  end
end