Class: Gitlab::BackgroundMigration::MarkDuplicateNpmPackagesForDestruction

Inherits:
BatchedMigrationJob
  • Object
show all
Defined in:
lib/gitlab/background_migration/mark_duplicate_npm_packages_for_destruction.rb

Overview

It seeks duplicate npm packages and mark them for destruction

Defined Under Namespace

Classes: Package

Constant Summary collapse

NPM_PACKAGE_TYPE =
2
PENDING_DESTRUCTION_STATUS =
4

Constants inherited from BatchedMigrationJob

BatchedMigrationJob::DEFAULT_FEATURE_CATEGORY

Constants included from Database::DynamicModelHelpers

Database::DynamicModelHelpers::BATCH_SIZE

Instance Method Summary collapse

Methods inherited from BatchedMigrationJob

#batch_metrics, feature_category, #filter_batch, generic_instance, #initialize, job_arguments, job_arguments_count, operation_name, scope_to

Methods included from Database::DynamicModelHelpers

#define_batchable_model, #each_batch, #each_batch_range

Constructor Details

This class inherits a constructor from Gitlab::BackgroundMigration::BatchedMigrationJob

Instance Method Details

#performObject



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/gitlab/background_migration/mark_duplicate_npm_packages_for_destruction.rb', line 20

def perform
  distinct_each_batch do |batch|
    project_ids = batch.pluck(:project_id)

    subquery = Package
      .where(project_id: project_ids, package_type: NPM_PACKAGE_TYPE)
      .where.not(status: PENDING_DESTRUCTION_STATUS)
      .select('project_id, name, version, MAX(id) AS max_id')
      .group(:project_id, :name, :version)
      .having('COUNT(*) > 1')

    join_query = <<~SQL.squish
      INNER JOIN (#{subquery.to_sql}) AS duplicates
      ON packages_packages.project_id = duplicates.project_id
      AND packages_packages.name = duplicates.name
      AND packages_packages.version = duplicates.version
    SQL

    Package
      .joins(join_query)
      .where.not('packages_packages.id = duplicates.max_id')
      .each_batch do |batch_to_update|
      batch_to_update.update_all(status: PENDING_DESTRUCTION_STATUS)
    end
  end
end