Class: GeneValidator::Validate

Inherits:

Object

Object
GeneValidator::Validate

show all

Extended by:: Forwardable

Defined in:: lib/genevalidator/validation.rb

Overview

Class that runs the validations (Instatiated for each query)

Instance Method Summary collapse

#check_validations(vals) ⇒ Object
#check_validations_output(vals) ⇒ Object
#compute_scores ⇒ Object
#create_validation_tests(prediction, hits) ⇒ Object
#generate_run_output ⇒ Object
#generate_run_overview ⇒ Object
#init_nucleotide_only_validations(val, prediction, hits) ⇒ Object
#initialize ⇒ Validate constructor

Initilizes the object Params: opt: A hash with the following keys: validations:, blast_tabular_file:, blast_tabular_options:, blast_xml_file:, db:, raw_sequences:, num_threads:, fast:} start_idx: number of the sequence from the file to start with overall_evaluation: boolean variable for printing overall evaluation.
#length_validation_scores(validations, scores) ⇒ Object

Since there are two length validations, it is necessary to adjust the scores accordingly.
#remove_identical_hits(prediction, hits) ⇒ Object

Removes identical hits (100% coverage and >99% identity) Params: prediction: Sequence object hits: Array of Sequence objects Output: new array of hit Sequence objects.
#validate(prediction, hits, current_idx) ⇒ Object

Validate one query and create validation report Params: prediction: Sequence object hits: Array of Sequence objects current_idx: the index number of the query.

Constructor Details

#initialize ⇒ `Validate`

Initilizes the object Params: opt: A hash with the following keys: validations:, blast_tabular_file:, blast_tabular_options:, blast_xml_file:, db:, raw_sequences:, num_threads:, fast:} start_idx: number of the sequence from the file to start with overall_evaluation: boolean variable for printing overall evaluation

# File 'lib/genevalidator/validation.rb', line 121

def initialize
  @opt         = opt
  @config      = config
  @mutex_array = mutex_array
  @run_output  = nil
  @overview    = overview
  @query_idx   = query_idx
end

Instance Method Details

#check_validations(vals) ⇒ `Object`

# File 'lib/genevalidator/validation.rb', line 198

def check_validations(vals)
  # check the class type of the elements in the list
  vals.each { |v| fail ValidationClassError unless v.is_a? ValidationTest }
  # check alias duplication
  aliases = vals.map(&:cli_name)
  fail AliasDuplicationError unless aliases.length == aliases.uniq.length
rescue ValidationClassError => e
  $stderr.puts e
  exit 1
rescue AliasDuplicationError => e
  $stderr.puts e
  exit 1
end

#check_validations_output(vals) ⇒ `Object`

# File 'lib/genevalidator/validation.rb', line 212

def check_validations_output(vals)
  fail NoValidationError if @run_output.validations.length == 0
  vals.each do |v|
    fail ReportClassError unless v.validation_report.is_a? ValidationReport
  end
rescue NoValidationError => e
  $stderr.puts e
  exit 1
rescue ReportClassError => e
  $stderr.puts e
  exit 1
end

#compute_scores ⇒ `Object`

# File 'lib/genevalidator/validation.rb', line 225

def compute_scores
  validations        = @run_output.validations
  scores             = {}
  scores[:successes] = validations.count { |v| v.result == v.expected }
  scores[:fails] = validations.count { |v| v.validation != :unapplicable && v.validation != :error && v.result != v.expected }
  scores         = length_validation_scores(validations, scores)

  @run_output.successes     = scores[:successes]
  @run_output.fails         = scores[:fails]
  total_query               = scores[:successes].to_i + scores[:fails]
  if total_query == 0
    @run_output.overall_score = 0
  else
    @run_output.overall_score = (scores[:successes] * 90 / total_query).round
  end
end

#create_validation_tests(prediction, hits) ⇒ `Object`

# File 'lib/genevalidator/validation.rb', line 180

def create_validation_tests(prediction, hits)
  val = []
  val.push MakerQIValidation.new(prediction, hits)
  val.push LengthClusterValidation.new(prediction, hits)
  val.push LengthRankValidation.new(prediction, hits)
  val.push GeneMergeValidation.new(prediction, hits)
  val.push DuplicationValidation.new(prediction, hits)
  init_nucleotide_only_validations(val, prediction, hits)
  val.push AlignmentValidation.new(prediction, hits)
  val.select { |v| @opt[:validations].include? v.cli_name.downcase }
end

#generate_run_output ⇒ `Object`

# File 'lib/genevalidator/validation.rb', line 262

def generate_run_output
  @run_output.generate_html
  @run_output.generate_json
  @run_output.print_output_console
  generate_run_overview
end

#generate_run_overview ⇒ `Object`

# File 'lib/genevalidator/validation.rb', line 269

def generate_run_overview
  vals        = @run_output.validations
  no_mafft    = 0
  no_internet = 0
  errors      = []
  vals.each do |v|
    unless v.errors.nil?
      no_mafft += v.errors.count { |e| e == NoMafftInstallationError }
      no_internet += v.errors.count { |e| e == NoInternetError }
    end
    errors.push(v.short_header) if v.validation == :error
  end

  no_evidence = vals.count { |v| v.result == :unapplicable || v.result == :warning } == vals.length
  nee = (no_evidence) ? 1 : 0

  good_scores = (@run_output.overall_score >= 75) ? 1 : 0
  bad_scores  = (@run_output.overall_score >= 75) ? 0 : 1

  @mutex_array.synchronize do
    @overview[:no_queries] += 1
    @overview[:scores].push(@run_output.overall_score)
    @overview[:good_scores] += good_scores
    @overview[:bad_scores] += bad_scores
    @overview[:nee] += nee
    @overview[:no_mafft] += no_mafft
    @overview[:no_internet] += no_internet
    errors.each { |err| @overview[:map_errors][err] += 1 }

    vals.each do |v|
      next if v.run_time == 0 || v.run_time.nil?
      next if v.validation == :unapplicable || v.validation == :error
      p = Pair1.new(@overview[:run_time][v.short_header].x + v.run_time,
                    @overview[:run_time][v.short_header].y + 1)
      @overview[:run_time][v.short_header] = p
    end
  end
end

#init_nucleotide_only_validations(val, prediction, hits) ⇒ `Object`

# File 'lib/genevalidator/validation.rb', line 192

def init_nucleotide_only_validations(val, prediction, hits)
  return unless @config[:type] == :nucleotide
  val.push BlastReadingFrameValidation.new(prediction, hits)
  val.push OpenReadingFrameValidation.new(prediction, hits)
end

#length_validation_scores(validations, scores) ⇒ `Object`

Since there are two length validations, it is necessary to adjust the

scores accordingly

# File 'lib/genevalidator/validation.rb', line 244

def length_validation_scores(validations, scores)
  lcv = validations.select { |v| v.class == LengthClusterValidationOutput }
  lrv = validations.select { |v| v.class == LengthRankValidationOutput }
  if lcv.length == 1 && lrv.length == 1
    score_lcv = (lcv[0].result == lcv[0].expected)
    score_lrv = (lrv[0].result == lrv[0].expected)
    if score_lcv == true && score_lrv == true
      scores[:successes] -= 1 # if both are true: counted as 1 success
    elsif score_lcv == false && score_lrv == false
      scores[:fails] -= 1 # if both are false: counted as 1 fail
    else
      scores[:successes] -= 0.5
      scores[:fails] -= 0.5
    end
  end
  scores
end

#remove_identical_hits(prediction, hits) ⇒ `Object`

Removes identical hits (100% coverage and >99% identity) Params: prediction: Sequence object hits: Array of Sequence objects Output: new array of hit Sequence objects

# File 'lib/genevalidator/validation.rb', line 156

def remove_identical_hits(prediction, hits)
  identical_hits = []
  hits.each do |hit|
    low_identity = hit.hsp_list.select { |hsp| hsp.pidentity < 99 }
    no_data      = hit.hsp_list.select { |hsp| hsp.pidentity.nil? }
    low_identity += no_data
    # check the coverage
    coverage = Array.new(prediction.length_protein, 0)
    hit.hsp_list.each do |hsp|
      match_to   = hsp.match_query_to
      match_from = hsp.match_query_from
      len        = match_to - match_from + 1
      coverage[match_from - 1..match_to - 1] = Array.new(len, 1)
    end

    if low_identity.length == 0 && coverage.uniq.length == 1
      identical_hits.push(hit)
    end
  end

  identical_hits.each { |hit| hits.delete(hit) }
  hits
end

#validate(prediction, hits, current_idx) ⇒ `Object`

Validate one query and create validation report Params: prediction: Sequence object hits: Array of Sequence objects current_idx: the index number of the query

# File 'lib/genevalidator/validation.rb', line 136

def validate(prediction, hits, current_idx)
  hits = remove_identical_hits(prediction, hits)
  vals = create_validation_tests(prediction, hits)
  check_validations(vals)
  vals.each(&:run)
  @run_output = Output.new(current_idx, hits.length, prediction.definition)
  @run_output.validations = vals.map(&:validation_report)
  check_validations_output(vals)

  compute_scores
  generate_run_output
end

Class: GeneValidator::Validate

Overview

Instance Method Summary collapse

Constructor Details

#initialize ⇒ Validate

Instance Method Details

#check_validations(vals) ⇒ Object

#check_validations_output(vals) ⇒ Object

#compute_scores ⇒ Object

#create_validation_tests(prediction, hits) ⇒ Object

#generate_run_output ⇒ Object

#generate_run_overview ⇒ Object

#init_nucleotide_only_validations(val, prediction, hits) ⇒ Object

#length_validation_scores(validations, scores) ⇒ Object

#remove_identical_hits(prediction, hits) ⇒ Object

#validate(prediction, hits, current_idx) ⇒ Object