Class: BulkSubmission

Inherits:
Object
  • Object
show all
Extended by:
ActiveModel::Naming
Includes:
ActiveModel::AttributeMethods, ActiveModel::Conversion, ActiveModel::Validations, ManifestUtil, Submission::AssetSubmissionFinder
Defined in:
app/models/bulk_submission.rb

Overview

A bulk submission is created through the upload of a spreadsheet (csv) It contains the information for setting up one or more submissions, allowing for the quick request of multiple pieces of work simultaneously. Bulk Submissions are not currently persisted.

Constant Summary collapse

DEFAULT_ENCODING =

This is the default output from excel

'Windows-1252'
COMMON_FIELDS =

process

[
  # Needed to construct the submission ...
  'template name',
  'study id', 'study name',
  'project id', 'project name', 'submission name',
  'user login',

  # Needed to identify the assets and what happens to them ...
  'asset group id', 'asset group name',
  'fragment size from', 'fragment size to',
  'pcr cycles',
  'primer panel',
  'read length',
  'library type',
  'bait library', 'bait library name',
  'comments',
  'number of lanes',
  'pre-capture plex level',
  'pre-capture group',
  'gigabases expected',
  'priority'
]
ALIAS_FIELDS =
{
  'plate barcode' => 'barcode',
  'tube barcode' => 'barcode'
}

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from ManifestUtil

#filter_end_of_header, #is_end_of_header?

Methods included from Submission::AssetSubmissionFinder

#find_all_assets_by_name_including_samples!, #find_tubes_including_samples_for!, #find_wells_including_samples_for!, #is_plate?, #is_tube?

Constructor Details

#initialize(attrs = {}) ⇒ BulkSubmission


44
45
46
47
# File 'app/models/bulk_submission.rb', line 44

def initialize(attrs = {})
  self.spreadsheet = attrs[:spreadsheet]
  self.encoding = attrs.fetch(:encoding, DEFAULT_ENCODING)
end

Instance Attribute Details

#encodingObject

Returns the value of attribute encoding


34
35
36
# File 'app/models/bulk_submission.rb', line 34

def encoding
  @encoding
end

#spreadsheetObject

Returns the value of attribute spreadsheet


34
35
36
# File 'app/models/bulk_submission.rb', line 34

def spreadsheet
  @spreadsheet
end

Instance Method Details

#add_study_to_assets(assets, study) ⇒ Object


247
248
249
250
251
# File 'app/models/bulk_submission.rb', line 247

def add_study_to_assets(assets, study)
  assets.map(&:samples).flatten.uniq.each do |sample|
    sample.studies << study unless sample.studies.include?(study)
  end
end

#completed_submissionsObject

This is used to present a list of successes


367
368
369
# File 'app/models/bulk_submission.rb', line 367

def completed_submissions
  [@submission_ids, @completed_submissions]
end

#extract_request_options(details) ⇒ Object


253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
# File 'app/models/bulk_submission.rb', line 253

def extract_request_options(details)
  {
    read_length: details['read length'],
    multiplier: {}
  }.tap do |request_options|
    request_options['library_type']                  = details['library type']           unless details['library type'].blank?
    request_options['fragment_size_required_from']   = details['fragment size from']     unless details['fragment size from'].blank?
    request_options['fragment_size_required_to']     = details['fragment size to']       unless details['fragment size to'].blank?
    request_options['pcr_cycles']                    = details['pcr cycles']             unless details['pcr cycles'].blank?
    request_options[:bait_library_name]              = details['bait library name']      unless details['bait library name'].blank?
    request_options[:bait_library_name]            ||= details['bait library']           unless details['bait library'].blank?
    request_options['pre_capture_plex_level']        = details['pre-capture plex level'] unless details['pre-capture plex level'].blank?
    request_options['gigabases_expected']            = details['gigabases expected']     unless details['gigabases expected'].blank?
    request_options['primer_panel_name']             = details['primer panel']           unless details['primer panel'].blank?
  end
end

#find_template(template_name) ⇒ Object

Returns the SubmissionTemplate and checks that it is valid

Raises:

  • (StandardError)

359
360
361
362
363
364
# File 'app/models/bulk_submission.rb', line 359

def find_template(template_name)
  template = SubmissionTemplate.find_by(name: template_name) or raise StandardError, "Cannot find template #{template_name}"
  raise(StandardError, "Template: '#{template_name}' is deprecated and no longer in use.") unless template.visible

  template
end

#idObject


42
# File 'app/models/bulk_submission.rb', line 42

def id; nil; end

#persisted?Boolean


40
# File 'app/models/bulk_submission.rb', line 40

def persisted?; false; end

#prepare_order(details) ⇒ Object

Returns an order for the given details


271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
# File 'app/models/bulk_submission.rb', line 271

def prepare_order(details)
  # Retrieve common attributes
  study   = Study.find_by_id_or_name!(details['study id'], details['study name'])
  project = Project.find_by_id_or_name!(details['project id'], details['project name'])
  user    = User.find_by(login: details['user login']) or raise StandardError, "Cannot find user #{details['user login'].inspect}"

  # Extract the request options from the row details
  request_options = extract_request_options(details)

  # Check the library type matches a value from the table
  if request_options['library_type'].present?
    # find is case insensitive but we want the correct case sensitive name for requests or we get issues downstream in NPG
    lt = LibraryType.find_by(name: request_options['library_type'])&.name or
      raise StandardError, "Cannot find library type #{request_options['library_type'].inspect}"
    request_options['library_type'] = lt
  end

  # Set up the order attributes
  attributes = {
    study: study,
    project: project,
    user: user,
    comments: details['comments'],
    request_options: request_options,
    pre_cap_group: details['pre-capture group']
  }

  # Deal with the asset group: either it's one we should be loading, or one we should be creating.

  attributes[:asset_group] = study.asset_groups.find_by_id_or_name(details['asset group id'], details['asset group name'])
  attributes[:asset_group_name] = details['asset group name'] if attributes[:asset_group].nil?

  ##
  # We go ahead and find our assets regardless of whether we have an asset group.
  # While this takes longer, it helps to detect cases where an asset group name has been
  # reused. This is a common cause of submission problems.

  # Locate either the assets by name or ID, or find the plate and it's well
  if is_plate?(details)

    found_assets = find_wells_including_samples_for!(details)
  # We've probably got a tube
  elsif is_tube?(details)

    found_assets = find_tubes_including_samples_for!(details)

  else

    asset_ids, asset_names = details.fetch('asset ids', ''), details.fetch('asset names', '')
    found_assets = if attributes[:asset_group] && asset_ids.blank? && asset_names.blank?
                     []
                   elsif asset_names.present?
                     Array(find_all_assets_by_name_including_samples!(asset_names)).uniq
                   elsif asset_ids.present?
                     raise StandardError, 'Specifying assets by id is no longer possible. Please provide a name or barcode.'
                   else
                     raise StandardError, 'Please specify a barcode or name for each asset.'
                   end

    assets_found, expecting = found_assets.map { |asset| "#{asset.name}(#{asset.id})" }, asset_ids.size + asset_names.size
    raise StandardError, "Too few assets found for #{details['rows']}: #{assets_found.inspect}"  if assets_found.size < expecting
    raise StandardError, "Too many assets found for #{details['rows']}: #{assets_found.inspect}" if assets_found.size > expecting

  end

  if attributes[:asset_group].nil?
    attributes[:assets] = found_assets
  elsif found_assets.present? && found_assets != attributes[:asset_group].assets
    raise StandardError, "Asset Group '#{attributes[:asset_group].name}' contains different assets to those you specified. You may be reusing an asset group name"
  end

  add_study_to_assets(found_assets, study)

  # Create the order.  Ensure that the number of lanes is correctly set.
  sub_template      = find_template(details['template name'])
  number_of_lanes   = details.fetch('number of lanes', 1).to_i

  sub_template.new_order(attributes).tap do |new_order|
    new_order.request_type_multiplier do |multiplexed_request_type_id|
      new_order.request_options[:multiplier][multiplexed_request_type_id] = number_of_lanes
    end
  end
rescue => e
  errors.add :spreadsheet, "There was a problem on row(s) #{details['rows']}: #{e.message}"
  nil
end

#processObject


125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'app/models/bulk_submission.rb', line 125

def process
  # Store the details of the successful submissions so the user can be presented with a summary
  @submission_ids = []
  @completed_submissions = {}

  csv_content = spreadsheet.read
  @csv_rows = CSV.parse(csv_content.encode!('utf-8', encoding))

  if spreadsheet_valid?
    submission_details = submission_structure

    raise ActiveRecord::RecordInvalid, self if errors.count > 0

    # Within a single transaction process each of the rows of the CSV file as a separate submission.  Any name
    # fields need to be mapped to IDs, and the 'assets' field needs to be split up and processed if present.
    ActiveRecord::Base.transaction do
      submission_details.each do |submissions|
        submissions.each do |submission_name, orders|
          user = User.find_by(login: orders.first['user login'])
          if user.nil?
            errors.add :spreadsheet, orders.first['user login'].nil? ? "No user specified for #{submission_name}" : "Cannot find user #{orders.first["user login"].inspect}"
            next
          end

          begin
            submission = Submission.create!(name: submission_name, user: user, orders: orders.map(&method(:prepare_order)).compact, priority: max_priority(orders))
            submission.built!
            # Collect successful submissions
            @submission_ids << submission.id
            @completed_submissions[submission.id] = "Submission #{submission.id} built (#{submission.orders.count} orders)"
          rescue Submission::ProjectValidation::Error => e
            errors.add :spreadsheet, "There was an issue with a project: #{e.message}"
          end
        end
      end

      # If there are any errors then the transaction needs to be rolled back.
      raise ActiveRecord::Rollback if errors.count > 0
    end

  end
end

#process_fileObject


51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'app/models/bulk_submission.rb', line 51

def process_file
  # Slightly inelegant file-type checking
  # TODO (jr) Find a better way of verifying the CSV file?
  unless spreadsheet.blank?
    if spreadsheet.size == 0
      errors.add(:spreadsheet, 'The supplied file was empty')
    else
      if spreadsheet.original_filename.end_with?('.csv')
        process
      else
        errors.add(:spreadsheet, 'The supplied file was not a CSV file')
      end
    end
  end
rescue CSV::MalformedCSVError
  errors.add(:spreadsheet, 'The supplied file was not a valid CSV file (try opening it with MS Excel)')
rescue Encoding::InvalidByteSequenceError
  errors.add(:encoding, "didn't match for the provided file.")
end

#shared_options!(rows) ⇒ Object


235
236
237
238
239
240
241
242
243
244
245
# File 'app/models/bulk_submission.rb', line 235

def shared_options!(rows)
  # Builds an array of the common fields. Raises and exception if the fields are inconsistent
  COMMON_FIELDS.map do |field|
    option = rows.map { |r| r[field] }.uniq
    if option.count > 1
      provided_values = option.map { |o| "'#{o}'" }.to_sentence
      errors.add(:spreadsheet, "#{field} should be identical for all requests in asset group '#{rows.first['asset group name']}'. Given values were: #{provided_values}.")
    end
    [field, option.first]
  end
end

#submission_structureObject

Process CSV into a structure

this creates an array containing a hash for each distinct "submission name"
  "submission name" => array of orders
  where each order is a hash of headers to values (grouped by "asset group name")

211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# File 'app/models/bulk_submission.rb', line 211

def submission_structure
  Hash.new { |h, i| h[i] = Array.new }.tap do |submission|
    csv_data_rows.each_with_index do |row, index|
      next if row.all?(&:nil?)

      details = Hash[headers.each_with_index.map { |header, pos| validate_entry(header, pos, row, index + start_row) }].merge('row' => index + start_row)
      submission[details['submission name']] << details
    end
  end.map do |submission_name, rows|
    order = rows.group_by do |details|
      details['asset group name']
    end.map do |_group_name, rows|
      Hash[shared_options!(rows)].tap do |details|
        details['rows']          = rows.comma_separate_field_list_for_display('row')
        details['asset ids']     = rows.field_list('asset id', 'asset ids')
        details['asset names']   = rows.field_list('asset name', 'asset names')
        details['plate well']    = rows.field_list('plate well')
        details['barcode']       = rows.field_list('barcode')
      end.delete_if { |_, v| v.blank? }
    end
    Hash[submission_name, order]
  end
end

#translate(header) ⇒ Object


196
197
198
# File 'app/models/bulk_submission.rb', line 196

def translate(header)
  ALIAS_FIELDS[header] || header
end

#valid_header?Boolean


103
104
105
106
107
108
109
# File 'app/models/bulk_submission.rb', line 103

def valid_header?
  return false if headers.nil?
  return true if headers.include? 'submission name'

  errors.add :spreadsheet, "You submitted an incompatible spreadsheet. Please ensure your spreadsheet contains the 'submission name' column"
  false
end