Class: BulkSubmission

Inherits:
Object
  • Object
show all
Extended by:
ActiveModel::Naming
Includes:
ActiveModel::AttributeMethods, ActiveModel::Conversion, ActiveModel::Validations, ManifestUtil, Submission::AssetSubmissionFinder
Defined in:
app/models/bulk_submission.rb

Overview

A bulk submission is created through the upload of a spreadsheet (csv) It contains the information for setting up one or more submissions, allowing for the quick request of multiple pieces of work simultaneously. Bulk Submissions are not currently persisted.

Constant Summary collapse

DEFAULT_ENCODING =

This is the default output from excel

'Windows-1252'.freeze
COMMON_FIELDS =
[
  # Needed to construct the submission ...
  'template name',
  'study id', 'study name',
  'project id', 'project name', 'submission name',
  'user login',

  # Needed to identify the assets and what happens to them ...
  'asset group id', 'asset group name',
  'fragment size from', 'fragment size to',
  'pcr cycles',
  'primer panel',
  'read length',
  'library type',
  'bait library', 'bait library name',
  'comments',
  'number of lanes',
  'pre-capture plex level',
  'pre-capture group',
  'gigabases expected',
  'priority'
].freeze
ALIAS_FIELDS =
{
  'plate barcode' => 'barcode',
  'tube barcode' => 'barcode'
}.freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from ManifestUtil

#filter_end_of_header, #is_end_of_header?

Methods included from Submission::AssetSubmissionFinder

#find_all_assets_by_name_including_samples!, #find_tubes_including_samples_for!, #find_wells_including_samples_for!, #is_plate?, #is_tube?

Constructor Details

#initialize(attrs = {}) ⇒ BulkSubmission

Returns a new instance of BulkSubmission.


45
46
47
48
# File 'app/models/bulk_submission.rb', line 45

def initialize(attrs = {})
  self.spreadsheet = attrs[:spreadsheet]
  self.encoding = attrs.fetch(:encoding, DEFAULT_ENCODING)
end

Instance Attribute Details

#encodingObject

Returns the value of attribute encoding


34
35
36
# File 'app/models/bulk_submission.rb', line 34

def encoding
  @encoding
end

#spreadsheetObject

Returns the value of attribute spreadsheet


34
35
36
# File 'app/models/bulk_submission.rb', line 34

def spreadsheet
  @spreadsheet
end

Instance Method Details

#add_study_to_assets(assets, study) ⇒ Object


255
256
257
258
259
# File 'app/models/bulk_submission.rb', line 255

def add_study_to_assets(assets, study)
  assets.map(&:samples).flatten.uniq.each do |sample|
    sample.studies << study unless sample.studies.include?(study)
  end
end

#completed_submissionsObject

This is used to present a list of successes


392
393
394
# File 'app/models/bulk_submission.rb', line 392

def completed_submissions
  [@submission_ids, @completed_submissions]
end

#extract_request_options(details) ⇒ Object


261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
# File 'app/models/bulk_submission.rb', line 261

def extract_request_options(details)
  {
    read_length: details['read length'],
    multiplier: {}
  }.tap do |request_options|
    request_options['library_type'] = details['library type'] if details['library type'].present?
    if details['fragment size from'].present?
      request_options['fragment_size_required_from']   = details['fragment size from']
    end
    if details['fragment size to'].present?
      request_options['fragment_size_required_to']     = details['fragment size to']
    end
    request_options['pcr_cycles'] = details['pcr cycles'] if details['pcr cycles'].present?
    if details['bait library name'].present?
      request_options[:bait_library_name]              = details['bait library name']
    end
    request_options[:bait_library_name] ||= details['bait library'] if details['bait library'].present?
    if details['pre-capture plex level'].present?
      request_options['pre_capture_plex_level']        = details['pre-capture plex level']
    end
    if details['gigabases expected'].present?
      request_options['gigabases_expected']            = details['gigabases expected']
    end
    request_options['primer_panel_name'] = details['primer panel'] if details['primer panel'].present?
  end
end

#find_template(template_name) ⇒ Object

Returns the SubmissionTemplate and checks that it is valid

Raises:

  • (StandardError)

384
385
386
387
388
389
# File 'app/models/bulk_submission.rb', line 384

def find_template(template_name)
  template = SubmissionTemplate.find_by(name: template_name) or raise StandardError, "Cannot find template #{template_name}"
  raise(StandardError, "Template: '#{template_name}' is deprecated and no longer in use.") unless template.visible

  template
end

#idObject


43
# File 'app/models/bulk_submission.rb', line 43

def id; nil; end

#persisted?Boolean

Returns:

  • (Boolean)

41
# File 'app/models/bulk_submission.rb', line 41

def persisted?; false; end

#prepare_order(details) ⇒ Object

Returns an order for the given details


289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
# File 'app/models/bulk_submission.rb', line 289

def prepare_order(details) # rubocop:todo Metrics/CyclomaticComplexity
  # Retrieve common attributes
  study   = Study.find_by_id_or_name!(details['study id'], details['study name'])
  project = Project.find_by_id_or_name!(details['project id'], details['project name'])
  user    = User.find_by(login: details['user login']) or raise StandardError, "Cannot find user #{details['user login'].inspect}"

  # Extract the request options from the row details
  request_options = extract_request_options(details)

  # Check the library type matches a value from the table
  if request_options['library_type'].present?
    # find is case insensitive but we want the correct case sensitive name for requests or we get issues downstream in NPG
    lt = LibraryType.find_by(name: request_options['library_type'])&.name or
      raise StandardError, "Cannot find library type #{request_options['library_type'].inspect}"
    request_options['library_type'] = lt
  end

  # Set up the order attributes
  attributes = {
    study: study,
    project: project,
    user: user,
    comments: details['comments'],
    request_options: request_options,
    pre_cap_group: details['pre-capture group']
  }

  # Deal with the asset group: either it's one we should be loading, or one we should be creating.

  attributes[:asset_group] = study.asset_groups.find_by_id_or_name(details['asset group id'],
                                                                   details['asset group name'])
  attributes[:asset_group_name] = details['asset group name'] if attributes[:asset_group].nil?

  ##
  # We go ahead and find our assets regardless of whether we have an asset group.
  # While this takes longer, it helps to detect cases where an asset group name has been
  # reused. This is a common cause of submission problems.

  # Locate either the assets by name or ID, or find the plate and it's well
  if is_plate?(details)

    found_assets = find_wells_including_samples_for!(details)
  # We've probably got a tube
  elsif is_tube?(details)

    found_assets = find_tubes_including_samples_for!(details)

  else

    asset_ids, asset_names = details.fetch('asset ids', ''), details.fetch('asset names', '')
    found_assets = if attributes[:asset_group] && asset_ids.blank? && asset_names.blank?
                     []
                   elsif asset_names.present?
                     Array(find_all_assets_by_name_including_samples!(asset_names)).uniq
                   elsif asset_ids.present?
                     raise StandardError, 'Specifying assets by id is no longer possible. Please provide a name or barcode.'
                   else
                     raise StandardError, 'Please specify a barcode or name for each asset.'
                   end

    assets_found, expecting = found_assets.map do |asset|
                                "#{asset.name}(#{asset.id})"
                              end, asset_ids.size + asset_names.size
    if assets_found.size < expecting
      raise StandardError, "Too few assets found for #{details['rows']}: #{assets_found.inspect}"
    end
    if assets_found.size > expecting
      raise StandardError, "Too many assets found for #{details['rows']}: #{assets_found.inspect}"
    end

  end

  if attributes[:asset_group].nil?
    attributes[:assets] = found_assets
  elsif found_assets.present? && found_assets != attributes[:asset_group].assets
    raise StandardError, "Asset Group '#{attributes[:asset_group].name}' contains different assets to those you specified. You may be reusing an asset group name"
  end

  add_study_to_assets(found_assets, study)

  # Create the order.  Ensure that the number of lanes is correctly set.
  sub_template      = find_template(details['template name'])
  number_of_lanes   = details.fetch('number of lanes', 1).to_i

  sub_template.new_order(attributes).tap do |new_order|
    new_order.request_type_multiplier do |multiplexed_request_type_id|
      new_order.request_options[:multiplier][multiplexed_request_type_id] = number_of_lanes
    end
  end
rescue => e
  errors.add :spreadsheet, "There was a problem on row(s) #{details['rows']}: #{e.message}"
  nil
end

#processObject


126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# File 'app/models/bulk_submission.rb', line 126

def process
  # Store the details of the successful submissions so the user can be presented with a summary
  @submission_ids = []
  @completed_submissions = {}

  csv_content = spreadsheet.read
  @csv_rows = CSV.parse(csv_content.encode!('utf-8', encoding))

  if spreadsheet_valid?
    submission_details = submission_structure

    raise ActiveRecord::RecordInvalid, self if errors.count > 0

    # Within a single transaction process each of the rows of the CSV file as a separate submission.  Any name
    # fields need to be mapped to IDs, and the 'assets' field needs to be split up and processed if present.
    ActiveRecord::Base.transaction do
      submission_details.each do |submissions|
        submissions.each do |submission_name, orders|
          user = User.find_by(login: orders.first['user login'])
          if user.nil?
            errors.add :spreadsheet, orders.first['user login'].nil? ? "No user specified for #{submission_name}" : "Cannot find user #{orders.first["user login"].inspect}"
            next
          end

          begin
            orders_processed = orders.map(&method(:prepare_order)).compact

            submission = Submission.create!(name: submission_name, user: user, orders: orders_processed,
                                            priority: max_priority(orders))
            submission.built!
            # Collect successful submissions
            @submission_ids << submission.id
            @completed_submissions[submission.id] = "Submission #{submission.id} built (#{submission.orders.count} orders)"
          rescue Submission::ProjectValidation::Error => e
            errors.add :spreadsheet, "There was an issue with a project: #{e.message}"
          end
        end
      end

      # If there are any errors then the transaction needs to be rolled back.
      raise ActiveRecord::Rollback if errors.present?
    end

  end
end

#process_fileObject


52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# File 'app/models/bulk_submission.rb', line 52

def process_file
  # Slightly inelegant file-type checking
  # TODO (jr) Find a better way of verifying the CSV file?
  if spreadsheet.present?
    if spreadsheet.size == 0
      errors.add(:spreadsheet, 'The supplied file was empty')
    else
      if spreadsheet.original_filename.end_with?('.csv')
        process
      else
        errors.add(:spreadsheet, 'The supplied file was not a CSV file')
      end
    end
  end
rescue CSV::MalformedCSVError
  errors.add(:spreadsheet, 'The supplied file was not a valid CSV file (try opening it with MS Excel)')
rescue Encoding::InvalidByteSequenceError
  errors.add(:encoding, "didn't match for the provided file.")
end

#shared_options!(rows) ⇒ Object


242
243
244
245
246
247
248
249
250
251
252
253
# File 'app/models/bulk_submission.rb', line 242

def shared_options!(rows)
  # Builds an array of the common fields. Raises and exception if the fields are inconsistent
  COMMON_FIELDS.map do |field|
    option = rows.map { |r| r[field] }.uniq
    if option.count > 1
      provided_values = option.map { |o| "'#{o}'" }.to_sentence
      errors.add(:spreadsheet,
                 "#{field} should be identical for all requests in asset group '#{rows.first['asset group name']}'. Given values were: #{provided_values}.")
    end
    [field, option.first]
  end
end

#submission_structureObject

Process CSV into a structure

this creates an array containing a hash for each distinct "submission name"
  "submission name" => array of orders
  where each order is a hash of headers to values (grouped by "asset group name")

216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
# File 'app/models/bulk_submission.rb', line 216

def submission_structure
  Hash.new { |h, i| h[i] = Array.new }.tap do |submission|
    csv_data_rows.each_with_index do |row, index|
      next if row.all?(&:nil?)

      details = headers.each_with_index.filter_map do |header, pos|
        validate_entry(header, pos, row, index + start_row)
      end.to_h.merge('row' => index + start_row)
      submission[details['submission name']] << details
    end
  end.map do |submission_name, rows|
    order = rows.group_by do |details|
      details['asset group name']
    end.map do |_group_name, rows|
      shared_options!(rows).to_h.tap do |details|
        details['rows']          = rows.comma_separate_field_list_for_display('row')
        details['asset ids']     = rows.field_list('asset id', 'asset ids')
        details['asset names']   = rows.field_list('asset name', 'asset names')
        details['plate well']    = rows.field_list('plate well')
        details['barcode']       = rows.field_list('barcode')
      end.delete_if { |_, v| v.blank? }
    end
    { submission_name => order }
  end
end

#translate(header) ⇒ Object


200
201
202
# File 'app/models/bulk_submission.rb', line 200

def translate(header)
  ALIAS_FIELDS[header] || header
end

#valid_header?Boolean

Returns:

  • (Boolean)

104
105
106
107
108
109
110
# File 'app/models/bulk_submission.rb', line 104

def valid_header?
  return false if headers.nil?
  return true if headers.include? 'submission name'

  errors.add :spreadsheet, "You submitted an incompatible spreadsheet. Please ensure your spreadsheet contains the 'submission name' column"
  false
end