Class: Gitlab::Database::BackgroundMigration::BatchedMigration

Inherits:
SharedModel
  • Object
show all
Includes:
Utils::StrongMemoize
Defined in:
lib/gitlab/database/background_migration/batched_migration.rb

Constant Summary collapse

JOB_CLASS_MODULE =
'Gitlab::BackgroundMigration'
BATCH_CLASS_MODULE =
"#{JOB_CLASS_MODULE}::BatchingStrategies"
MAXIMUM_FAILED_RATIO =
0.5
MINIMUM_JOBS =
50
FINISHED_PROGRESS_VALUE =
100
MINIMUM_PAUSE_MS =
100
DEFAULT_NUMBER_OF_JOBS =
20
DEFAULT_EMA_ALPHA =
0.4

Constants inherited from SharedModel

SharedModel::SHARED_SCHEMAS

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from SharedModel

connection, #connection_db_config, connection_pool, ensure_connection_set!, using_connection

Class Method Details

.active_migrations_distinct_on_table(connection:, limit:) ⇒ Object



147
148
149
150
151
152
153
154
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 147

def self.active_migrations_distinct_on_table(connection:, limit:)
  distinct_on_table = select('DISTINCT ON (table_name) id')
    .for_gitlab_schema(Gitlab::Database.gitlab_schemas_for_connection(connection))
    .executable
    .order(table_name: :asc, id: :asc)

  where(id: distinct_on_table).queue_order.limit(limit)
end

.find_executable(id, connection:) ⇒ Object



142
143
144
145
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 142

def self.find_executable(id, connection:)
  for_gitlab_schema(Gitlab::Database.gitlab_schemas_for_connection(connection))
    .executable.find_by_id(id)
end

.find_for_configuration(gitlab_schema, job_class_name, table_name, column_name, job_arguments, include_compatible: false) ⇒ Object



133
134
135
136
137
138
139
140
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 133

def self.find_for_configuration(
  gitlab_schema, job_class_name, table_name, column_name, job_arguments, include_compatible: false
)
  for_configuration(
    gitlab_schema, job_class_name, table_name, column_name, job_arguments,
    include_compatible: include_compatible
  ).first
end

.gitlab_schema_column_exists?Boolean

Returns:

  • (Boolean)


66
67
68
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 66

def self.gitlab_schema_column_exists?
  column_names.include?('gitlab_schema')
end

.successful_rows_counts(migrations) ⇒ Object



156
157
158
159
160
161
162
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 156

def self.successful_rows_counts(migrations)
  BatchedJob
    .with_status(:succeeded)
    .where(batched_background_migration_id: migrations)
    .group(:batched_background_migration_id)
    .sum(:batch_size)
end

.valid_statusObject



129
130
131
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 129

def self.valid_status
  state_machine.states.map(&:name)
end

Instance Method Details

#batch_classObject



238
239
240
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 238

def batch_class
  "#{BATCH_CLASS_MODULE}::#{batch_class_name}".constantize
end

#batch_class_name=(class_name) ⇒ Object



246
247
248
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 246

def batch_class_name=(class_name)
  write_attribute(:batch_class_name, class_name.delete_prefix("::"))
end

#create_batched_job!(min, max) ⇒ Object



179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 179

def create_batched_job!(min, max)
  job_arguments = {
    batch_size: batch_size,
    sub_batch_size: sub_batch_size,
    pause_ms: pause_ms
  }

  if cursor?
    job_arguments[:min_cursor] = min
    job_arguments[:max_cursor] = max
  else
    job_arguments[:min_value] = min
    job_arguments[:max_value] = max
  end

  batched_jobs.create!(job_arguments)
end

#finalize_commandObject



340
341
342
343
344
345
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 340

def finalize_command
  <<~SCRIPT.delete("\n").squeeze(' ').strip
    sudo gitlab-rake gitlab:background_migrations:finalize
    [#{job_class_name},#{table_name},#{column_name},'#{job_arguments.to_json.gsub(',', '\,')}']
  SCRIPT
end

#health_contextObject



290
291
292
293
294
295
296
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 290

def health_context
  @health_context ||= Gitlab::Database::HealthStatus::Context.new(
    self,
    connection,
    health_context_tables
  )
end

#health_context_tablesObject



298
299
300
301
302
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 298

def health_context_tables
  return [table_name] unless job_class.respond_to?(:health_context_tables)

  job_class.health_context_tables.presence || [table_name]
end

#hold!(until_time: 10.minutes.from_now) ⇒ Object



304
305
306
307
308
309
310
311
312
313
314
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 304

def hold!(until_time: 10.minutes.from_now)
  duration_s = (until_time - Time.current).round
  Gitlab::AppLogger.info(
    message: "#{self} put on hold until #{until_time}",
    migration_id: id,
    job_class_name: job_class_name,
    duration_s: duration_s
  )

  update!(on_hold_until: until_time)
end

#interval_elapsed?(variance: 0) ⇒ Boolean

Returns:

  • (Boolean)


172
173
174
175
176
177
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 172

def interval_elapsed?(variance: 0)
  return true unless last_job

  interval_with_variance = interval - variance
  last_job.created_at <= Time.current - interval_with_variance
end

#job_classObject



234
235
236
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 234

def job_class
  "#{JOB_CLASS_MODULE}::#{job_class_name}".constantize
end

#job_class_name=(class_name) ⇒ Object



242
243
244
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 242

def job_class_name=(class_name)
  write_attribute(:job_class_name, class_name.delete_prefix("::"))
end

#migrated_tuple_countObject



250
251
252
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 250

def migrated_tuple_count
  batched_jobs.with_status(:succeeded).sum(:batch_size)
end

#next_min_valueObject



220
221
222
223
224
225
226
227
228
229
230
231
232
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 220

def next_min_value
  if cursor?
    # Cursors require a subtle off-by-one change: we return the end of the last batch instead
    # of bumping it by 1 with .next because this class doesn't know what's in the cursor.
    # This means that the min_cursor here must be logically before the beginning of the batch, not just
    # equal to the first row (if it's equal it'll make batching skip the first row), this is because the
    # KeysetIterator we use for cursor batching expects the cursor passed to it to be before the start of
    # the iteration range.
    last_job&.max_cursor || min_cursor
  else
    last_job&.max_value&.next || min_value
  end
end

#on_hold?Boolean

Returns:

  • (Boolean)


316
317
318
319
320
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 316

def on_hold?
  return false unless on_hold_until

  on_hold_until > Time.zone.now
end

#optimize!Object



281
282
283
284
285
286
287
288
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 281

def optimize!
  return false unless batch_optimizer.should_optimize?

  new_batch_size = batch_optimizer.optimized_batch_size
  return false if new_batch_size == batch_size

  update!(batch_size: new_batch_size)
end

#progressObject

Computes an estimation of the progress of the migration in percents.

Because total_tuple_count is an estimation of the tuples based on DB statistics when the migration is complete there can actually be more or less tuples that initially estimated as total_tuple_count so the progress may not show 100%. For that reason when we know migration completed successfully, we just return the 100 value



332
333
334
335
336
337
338
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 332

def progress
  return FINISHED_PROGRESS_VALUE if finished? || finalized?

  return unless total_tuple_count.to_i > 0

  100 * migrated_tuple_count / total_tuple_count
end

#prometheus_labelsObject



254
255
256
257
258
259
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 254

def prometheus_labels
  @prometheus_labels ||= {
    migration_id: id,
    migration_identifier: "%s/%s.%s" % [job_class_name, table_name, column_name]
  }
end

#reset_attempts_of_blocked_jobs!Object



166
167
168
169
170
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 166

def reset_attempts_of_blocked_jobs!
  batched_jobs.blocked_by_max_attempts.each_batch(of: 100) do |batch|
    batch.update_all(attempts: 0)
  end
end

#retry_failed_jobs!Object



197
198
199
200
201
202
203
204
205
206
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 197

def retry_failed_jobs!
  batched_jobs.with_status(:failed).each_batch(of: 100) do |batch|
    self.class.transaction do
      batch.lock.each(&:split_and_retry!)
      self.execute!
    end
  end

  self.execute!
end

#should_stop?Boolean

Returns:

  • (Boolean)


208
209
210
211
212
213
214
215
216
217
218
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 208

def should_stop?
  return unless started_at

  total_jobs = batched_jobs.created_since(started_at).count

  return if total_jobs < MINIMUM_JOBS

  failed_jobs = batched_jobs.with_status(:failed).created_since(started_at).count

  failed_jobs.fdiv(total_jobs) > MAXIMUM_FAILED_RATIO
end

#smoothed_time_efficiency(number_of_jobs: 10, alpha: 0.2) ⇒ Object



261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 261

def smoothed_time_efficiency(number_of_jobs: 10, alpha: 0.2)
  jobs = batched_jobs.successful_in_execution_order.reverse_order.limit(number_of_jobs).with_preloads

  return if jobs.size < number_of_jobs

  efficiencies = jobs.map(&:time_efficiency).reject(&:nil?).each_with_index

  dividend = efficiencies.reduce(0) do |total, (job_eff, i)|
    total + (job_eff * ((1 - alpha)**i))
  end

  divisor = efficiencies.reduce(0) do |total, (job_eff, i)|
    total + ((1 - alpha)**i)
  end

  return if divisor == 0

  (dividend / divisor).round(2)
end

#to_sObject



322
323
324
# File 'lib/gitlab/database/background_migration/batched_migration.rb', line 322

def to_s
  "BatchedMigration[id: #{id}]"
end