Module: Gitlab::Database::Migrations::BackgroundMigrationHelpers

Included in:
Gitlab::Database::MigrationHelpers, PartitioningMigrationHelpers::TableManagementHelpers
Defined in:
lib/gitlab/database/migrations/background_migration_helpers.rb

Constant Summary collapse

BACKGROUND_MIGRATION_BATCH_SIZE =

Number of rows to process per job

1_000
BACKGROUND_MIGRATION_JOB_BUFFER_SIZE =

Number of jobs to bulk queue at a time

1_000

Instance Method Summary collapse

Instance Method Details

#bulk_migrate_async(*args) ⇒ Object


139
140
141
142
143
# File 'lib/gitlab/database/migrations/background_migration_helpers.rb', line 139

def bulk_migrate_async(*args)
  with_migration_context do
    BackgroundMigrationWorker.bulk_perform_async(*args)
  end
end

#bulk_migrate_in(*args) ⇒ Object


133
134
135
136
137
# File 'lib/gitlab/database/migrations/background_migration_helpers.rb', line 133

def bulk_migrate_in(*args)
  with_migration_context do
    BackgroundMigrationWorker.bulk_perform_in(*args)
  end
end

#bulk_queue_background_migration_jobs_by_range(model_class, job_class_name, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE) ⇒ Object

Bulk queues background migration jobs for an entire table, batched by ID range. “Bulk” meaning many jobs will be pushed at a time for efficiency. If you need a delay interval per job, then use `queue_background_migration_jobs_by_range_at_intervals`.

model_class - The table being iterated over job_class_name - The background migration job class as a string batch_size - The maximum number of rows per job

Example:

class Route < ActiveRecord::Base
  include EachBatch
  self.table_name = 'routes'
end

bulk_queue_background_migration_jobs_by_range(Route, 'ProcessRoutes')

Where the model_class includes EachBatch, and the background migration exists:

class Gitlab::BackgroundMigration::ProcessRoutes
  def perform(start_id, end_id)
    # do something
  end
end

34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/gitlab/database/migrations/background_migration_helpers.rb', line 34

def bulk_queue_background_migration_jobs_by_range(model_class, job_class_name, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE)
  raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')

  jobs = []
  table_name = model_class.quoted_table_name

  model_class.each_batch(of: batch_size) do |relation|
    start_id, end_id = relation.pluck("MIN(#{table_name}.id)", "MAX(#{table_name}.id)").first

    if jobs.length >= BACKGROUND_MIGRATION_JOB_BUFFER_SIZE
      # Note: This code path generally only helps with many millions of rows
      # We push multiple jobs at a time to reduce the time spent in
      # Sidekiq/Redis operations. We're using this buffer based approach so we
      # don't need to run additional queries for every range.
      bulk_migrate_async(jobs)
      jobs.clear
    end

    jobs << [job_class_name, [start_id, end_id]]
  end

  bulk_migrate_async(jobs) unless jobs.empty?
end

#migrate_async(*args) ⇒ Object


121
122
123
124
125
# File 'lib/gitlab/database/migrations/background_migration_helpers.rb', line 121

def migrate_async(*args)
  with_migration_context do
    BackgroundMigrationWorker.perform_async(*args)
  end
end

#migrate_in(*args) ⇒ Object


127
128
129
130
131
# File 'lib/gitlab/database/migrations/background_migration_helpers.rb', line 127

def migrate_in(*args)
  with_migration_context do
    BackgroundMigrationWorker.perform_in(*args)
  end
end

#perform_background_migration_inline?Boolean

Returns:

  • (Boolean)

117
118
119
# File 'lib/gitlab/database/migrations/background_migration_helpers.rb', line 117

def perform_background_migration_inline?
  Rails.env.test? || Rails.env.development?
end

#queue_background_migration_jobs_by_range_at_intervals(model_class, job_class_name, delay_interval, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE, other_job_arguments: [], initial_delay: 0, track_jobs: false) ⇒ Object

Queues background migration jobs for an entire table, batched by ID range. Each job is scheduled with a `delay_interval` in between. If you use a small interval, then some jobs may run at the same time.

model_class - The table or relation being iterated over job_class_name - The background migration job class as a string delay_interval - The duration between each job's scheduled time (must respond to `to_f`) batch_size - The maximum number of rows per job other_arguments - Other arguments to send to the job track_jobs - When this flag is set, creates a record in the background_migration_jobs table for each job that is scheduled to be run. These records can be used to trace execution of the background job, but there is no builtin support to manage that automatically at this time. You should only set this flag if you are aware of how it works, and intend to manually cleanup the database records in your background job.

*Returns the final migration delay*

Example:

class Route < ActiveRecord::Base
  include EachBatch
  self.table_name = 'routes'
end

queue_background_migration_jobs_by_range_at_intervals(Route, 'ProcessRoutes', 1.minute)

Where the model_class includes EachBatch, and the background migration exists:

class Gitlab::BackgroundMigration::ProcessRoutes
  def perform(start_id, end_id)
    # do something
  end
end

90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
# File 'lib/gitlab/database/migrations/background_migration_helpers.rb', line 90

def queue_background_migration_jobs_by_range_at_intervals(model_class, job_class_name, delay_interval, batch_size: BACKGROUND_MIGRATION_BATCH_SIZE, other_job_arguments: [], initial_delay: 0, track_jobs: false)
  raise "#{model_class} does not have an ID to use for batch ranges" unless model_class.column_names.include?('id')

  # To not overload the worker too much we enforce a minimum interval both
  # when scheduling and performing jobs.
  if delay_interval < BackgroundMigrationWorker.minimum_interval
    delay_interval = BackgroundMigrationWorker.minimum_interval
  end

  final_delay = 0

  model_class.each_batch(of: batch_size) do |relation, index|
    start_id, end_id = relation.pluck(Arel.sql('MIN(id), MAX(id)')).first

    # `BackgroundMigrationWorker.bulk_perform_in` schedules all jobs for
    # the same time, which is not helpful in most cases where we wish to
    # spread the work over time.
    final_delay = initial_delay + delay_interval * index
    full_job_arguments = [start_id, end_id] + other_job_arguments

    track_in_database(job_class_name, full_job_arguments) if track_jobs
    migrate_in(final_delay, job_class_name, full_job_arguments)
  end

  final_delay
end

#with_migration_context(&block) ⇒ Object


145
146
147
# File 'lib/gitlab/database/migrations/background_migration_helpers.rb', line 145

def with_migration_context(&block)
  Gitlab::ApplicationContext.with_context(caller_id: self.class.to_s, &block)
end