Method: Nimbus::ClassificationTree#estimate_importances

Defined in:
lib/nimbus/classification_tree.rb

#estimate_importances(oob_ids) ⇒ Object

Estimation of importance for every SNP.

The importance of any SNP in the tree is calculated using the OOB sample. For every SNP, every individual in the sample is pushed down the tree but with the value of that SNP permuted with other individual in the sample.

That way the difference between the generalization error and the error frequency with the SNP value modified can be estimated for any given SNP.

This method computes importance estimations for every SNPs used in the tree (for any other SNP it would be 0).



90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/nimbus/classification_tree.rb', line 90

def estimate_importances(oob_ids)
  return nil if (@generalization_error.nil? && generalization_error_from_oob(oob_ids).nil?)
  oob_individuals_count = oob_ids.size
  @importances = {}
  @used_snps.uniq.each do |current_snp|
    shuffled_ids = oob_ids.shuffle
    permutated_snp_errors = 0.0
    oob_ids.each_with_index {|oobi, index|
      permutated_prediction = traverse_with_permutation @structure, individuals[oobi].snp_list, current_snp, individuals[shuffled_ids[index]].snp_list
      permutated_snp_errors += 1 unless @id_to_fenotype[oobi] == permutated_prediction
    }
    @importances[current_snp] = ((permutated_snp_errors / oob_individuals_count) - @generalization_error).round(5)
  end
  @importances
end