Module: Gitlab::Database::PartitioningMigrationHelpers::TableManagementHelpers

Includes:
DynamicModelHelpers, MigrationHelpers, Migrations::BackgroundMigrationHelpers, SchemaHelpers
Included in:
Gitlab::Database::PartitioningMigrationHelpers
Defined in:
lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb

Defined Under Namespace

Classes: JobArguments

Constant Summary collapse

ALLOWED_TABLES =
%w[audit_events].freeze
ERROR_SCOPE =
'table partitioning'
MIGRATION_CLASS_NAME =
"::#{module_parent_name}::BackfillPartitionedTable"
BATCH_INTERVAL =
2.minutes.freeze
BATCH_SIZE =
50_000

Constants included from Migrations::BackgroundMigrationHelpers

Migrations::BackgroundMigrationHelpers::BACKGROUND_MIGRATION_BATCH_SIZE, Migrations::BackgroundMigrationHelpers::BACKGROUND_MIGRATION_JOB_BUFFER_SIZE

Constants included from MigrationHelpers

MigrationHelpers::DEFAULT_TIMESTAMP_COLUMNS, MigrationHelpers::MAX_IDENTIFIER_NAME_LENGTH, MigrationHelpers::PERMITTED_TIMESTAMP_COLUMNS

Instance Method Summary collapse

Methods included from Migrations::BackgroundMigrationHelpers

#bulk_migrate_async, #bulk_migrate_in, #bulk_queue_background_migration_jobs_by_range, #migrate_async, #migrate_in, #perform_background_migration_inline?, #queue_background_migration_jobs_by_range_at_intervals, #with_migration_context

Methods included from MigrationHelpers

#add_check_constraint, #add_column_with_default, #add_concurrent_foreign_key, #add_concurrent_index, #add_not_null_constraint, #add_text_limit, #add_timestamps_with_timezone, #backfill_iids, #change_column_type_concurrently, #change_column_type_using_background_migration, #check_constraint_exists?, #check_constraint_name, #check_not_null_constraint_exists?, #check_text_limit_exists?, #check_trigger_permissions!, #cleanup_concurrent_column_rename, #cleanup_concurrent_column_type_change, #column_for, #concurrent_foreign_key_name, #copy_foreign_keys, #copy_indexes, #create_extension, #create_or_update_plan_limit, #disable_statement_timeout, #drop_extension, #false_value, #foreign_key_exists?, #foreign_keys_for, #index_exists_by_name?, #indexes_for, #install_rename_triggers, #install_rename_triggers_for_postgresql, #postgres_exists_by_name?, #remove_check_constraint, #remove_concurrent_index, #remove_concurrent_index_by_name, #remove_foreign_key_if_exists, #remove_foreign_key_without_error, #remove_not_null_constraint, #remove_rename_triggers_for_postgresql, #remove_text_limit, #remove_timestamps, #rename_column_concurrently, #rename_column_using_background_migration, #rename_trigger_name, #replace_sql, #sidekiq_queue_length, #sidekiq_queue_migrate, #true_value, #undo_cleanup_concurrent_column_rename, #undo_rename_column_concurrently, #update_column_in_batches, #validate_check_constraint, #validate_foreign_key, #validate_not_null_constraint, #validate_text_limit, #with_lock_retries

Methods included from DynamicModelHelpers

#define_batchable_model

Methods included from SchemaHelpers

#assert_not_in_transaction_block, #create_comment, #create_trigger, #create_trigger_function, #drop_function, #drop_trigger, #function_exists?, #object_name, #tmp_table_name, #trigger_exists?, #with_lock_retries

Instance Method Details

#cleanup_partitioning_data_migration(table_name) ⇒ Object

Cleanup a previously enqueued background migration to copy data into a partitioned table. This will not prevent the enqueued jobs from executing, but instead cleans up information in the database used to track the state of the background migration. It should be safe to also remove the partitioned table even if the background jobs are still in-progress, as the absence of the table will cause them to safely exit.

Example:

cleanup_partitioning_data_migration :audit_events

121
122
123
124
125
# File 'lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb', line 121

def cleanup_partitioning_data_migration(table_name)
  assert_table_is_allowed(table_name)

  cleanup_migration_jobs(table_name)
end

#create_hash_partitions(table_name, number_of_partitions) ⇒ Object


127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
# File 'lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb', line 127

def create_hash_partitions(table_name, number_of_partitions)
  transaction do
    (0..number_of_partitions - 1).each do |partition|
      decimals = Math.log10(number_of_partitions).ceil
      suffix = "%0#{decimals}d" % partition
      partition_name = "#{table_name}_#{suffix}"
      schema = Gitlab::Database::STATIC_PARTITIONS_SCHEMA

      execute(<<~SQL)
        CREATE TABLE #{schema}.#{partition_name}
        PARTITION OF #{table_name}
        FOR VALUES WITH (MODULUS #{number_of_partitions}, REMAINDER #{partition});
      SQL
    end
  end
end

#drop_partitioned_table_for(table_name) ⇒ Object

Clean up a partitioned copy of an existing table. First, deletes the database function and trigger that were used to copy writes to the partitioned table, then removes the partitioned table (also removing partitions).

Example:

drop_partitioned_table_for :audit_events

79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
# File 'lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb', line 79

def drop_partitioned_table_for(table_name)
  assert_table_is_allowed(table_name)
  assert_not_in_transaction_block(scope: ERROR_SCOPE)

  with_lock_retries do
    trigger_name = make_sync_trigger_name(table_name)
    drop_trigger(table_name, trigger_name)
  end

  function_name = make_sync_function_name(table_name)
  drop_function(function_name)

  partitioned_table_name = make_partitioned_table_name(table_name)
  drop_table(partitioned_table_name)
end

#enqueue_partitioning_data_migration(table_name) ⇒ Object

Enqueue the background jobs that will backfill data in the partitioned table, by batch-copying records from original table. This helper should be called from a post-deploy migration.

Example:

enqueue_partitioning_data_migration :audit_events

102
103
104
105
106
107
108
109
110
# File 'lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb', line 102

def enqueue_partitioning_data_migration(table_name)
  assert_table_is_allowed(table_name)

  assert_not_in_transaction_block(scope: ERROR_SCOPE)

  partitioned_table_name = make_partitioned_table_name(table_name)
  primary_key = connection.primary_key(table_name)
  enqueue_background_migration(table_name, partitioned_table_name, primary_key)
end

#finalize_backfilling_partitioned_table(table_name) ⇒ Object

Executes cleanup tasks from a previous BackgroundMigration to backfill a partitioned table by finishing pending jobs and performing a final data synchronization. This performs two steps:

1. Wait to finish any pending BackgroundMigration jobs that have not succeeded
2. Inline copy any missed rows from the original table to the partitioned table

*NOTE* Migrations using this method cannot be scheduled in the same release as the migration that schedules the background migration using the `enqueue_background_migration` helper, or else the background migration jobs will be force-executed.

Example:

finalize_backfilling_partitioned_table :audit_events

158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
# File 'lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb', line 158

def finalize_backfilling_partitioned_table(table_name)
  assert_table_is_allowed(table_name)
  assert_not_in_transaction_block(scope: ERROR_SCOPE)

  partitioned_table_name = make_partitioned_table_name(table_name)
  unless table_exists?(partitioned_table_name)
    raise "could not find partitioned table for #{table_name}, " \
      "this could indicate the previous partitioning migration has been rolled back."
  end

  Gitlab::BackgroundMigration.steal(MIGRATION_CLASS_NAME) do |raw_arguments|
    JobArguments.from_array(raw_arguments).source_table_name == table_name.to_s
  end

  primary_key = connection.primary_key(table_name)
  copy_missed_records(table_name, partitioned_table_name, primary_key)

  disable_statement_timeout do
    execute("VACUUM FREEZE ANALYZE #{partitioned_table_name}")
  end
end

#partition_table_by_date(table_name, column_name, min_date: nil, max_date: nil) ⇒ Object

Creates a partitioned copy of an existing table, using a RANGE partitioning strategy on a timestamp column. One partition is created per month between the given `min_date` and `max_date`. Also installs a trigger on the original table to copy writes into the partitioned table. To copy over historic data from before creation of the partitioned table, use the `enqueue_partitioning_data_migration` helper in a post-deploy migration.

A copy of the original table is required as PG currently does not support partitioning existing tables.

Example:

partition_table_by_date :audit_events, :created_at, min_date: Date.new(2020, 1), max_date: Date.new(2020, 6)

Options are:

:min_date - a date specifying the lower bounds of the partition range
:max_date - a date specifying the upper bounds of the partitioning range, defaults to today + 1 month

Unless min_date is specified explicitly, we default to

  1. The minimum value for the partitioning column in the table

  2. If no data is present yet, the current month


43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'lib/gitlab/database/partitioning_migration_helpers/table_management_helpers.rb', line 43

def partition_table_by_date(table_name, column_name, min_date: nil, max_date: nil)
  assert_table_is_allowed(table_name)

  assert_not_in_transaction_block(scope: ERROR_SCOPE)

  max_date ||= Date.today + 1.month

  min_date ||= connection.select_one(<<~SQL)['minimum'] || max_date - 1.month
    SELECT date_trunc('MONTH', MIN(#{column_name})) AS minimum
    FROM #{table_name}
  SQL

  raise "max_date #{max_date} must be greater than min_date #{min_date}" if min_date >= max_date

  primary_key = connection.primary_key(table_name)
  raise "primary key not defined for #{table_name}" if primary_key.nil?

  partition_column = find_column_definition(table_name, column_name)
  raise "partition column #{column_name} does not exist on #{table_name}" if partition_column.nil?

  partitioned_table_name = make_partitioned_table_name(table_name)

  transaction do
    create_range_partitioned_copy(table_name, partitioned_table_name, partition_column, primary_key)
    create_daterange_partitions(partitioned_table_name, partition_column.name, min_date, max_date)
  end
  create_trigger_to_sync_tables(table_name, partitioned_table_name, primary_key)
end