Module: ClassifierReborn::ClassifierValidator
- Defined in:
- lib/classifier-reborn/validators/classifier_validator.rb
Class Method Summary collapse
- .build_run_report(conf_mat) ⇒ Object
- .conf_mat_to_tab(conf_mat) ⇒ Object
- .conf_tab_derivations(tab) ⇒ Object
- .cross_validate(classifier, sample_data, fold = 10, *options) ⇒ Object
- .divide(dividend, divisor) ⇒ Object
- .empty_conf_mat(categories) ⇒ Object
- .evaluate(classifier, test_data) ⇒ Object
- .generate_report(*conf_mats) ⇒ Object
- .print_conf_mat(conf_mat) ⇒ Object
- .print_conf_tab(conf_tab) ⇒ Object
- .print_derivations(derivations) ⇒ Object
- .print_run_report(stats, prefix = '', print_header = false) ⇒ Object
- .validate(classifier, training_data, test_data, *options) ⇒ Object
Class Method Details
.build_run_report(conf_mat) ⇒ Object
70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 70 def build_run_report(conf_mat) correct = incorrect = 0 conf_mat.each do |actual, cols| cols.each do |predicted, v| if actual == predicted correct += v else incorrect += v end end end total = correct + incorrect { total: total, correct: correct, incorrect: incorrect, accuracy: divide(correct, total) } end |
.conf_mat_to_tab(conf_mat) ⇒ Object
85 86 87 88 89 90 91 92 93 94 95 |
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 85 def conf_mat_to_tab(conf_mat) conf_tab = Hash.new { |h, k| h[k] = { p: { t: 0, f: 0 }, n: { t: 0, f: 0 } } } conf_mat.each_key do |positive| conf_mat.each do |actual, cols| cols.each do |predicted, v| conf_tab[positive][positive == predicted ? :p : :n][actual == predicted ? :t : :f] += v end end end conf_tab end |
.conf_tab_derivations(tab) ⇒ Object
134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 |
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 134 def conf_tab_derivations(tab) positives = tab[:p][:t] + tab[:n][:f] negatives = tab[:n][:t] + tab[:p][:f] total = positives + negatives { total_population: positives + negatives, condition_positive: positives, condition_negative: negatives, true_positive: tab[:p][:t], true_negative: tab[:n][:t], false_positive: tab[:p][:f], false_negative: tab[:n][:f], prevalence: divide(positives, total), specificity: divide(tab[:n][:t], negatives), recall: divide(tab[:p][:t], positives), precision: divide(tab[:p][:t], tab[:p][:t] + tab[:p][:f]), accuracy: divide(tab[:p][:t] + tab[:n][:t], total), f1_score: divide(2 * tab[:p][:t], 2 * tab[:p][:t] + tab[:p][:f] + tab[:n][:f]) } end |
.cross_validate(classifier, sample_data, fold = 10, *options) ⇒ Object
7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 7 def cross_validate(classifier, sample_data, fold = 10, *) classifier = ClassifierReborn.const_get(classifier).new() if classifier.is_a?(String) sample_data.shuffle! partition_size = sample_data.length / fold partitioned_data = sample_data.each_slice(partition_size) conf_mats = [] fold.times do |i| training_data = partitioned_data.take(fold) test_data = training_data.slice!(i) conf_mats << validate(classifier, training_data.flatten!(1), test_data) end classifier.reset generate_report(conf_mats) end |
.divide(dividend, divisor) ⇒ Object
166 167 168 |
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 166 def divide(dividend, divisor) divisor.zero? ? 0.0 : dividend / divisor.to_f end |
.empty_conf_mat(categories) ⇒ Object
162 163 164 |
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 162 def empty_conf_mat(categories) categories.map { |actual| [actual, categories.map { |predicted| [predicted, 0] }.to_h] }.to_h end |
.evaluate(classifier, test_data) ⇒ Object
31 32 33 34 35 36 37 38 39 |
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 31 def evaluate(classifier, test_data) conf_mat = empty_conf_mat(classifier.categories.sort) test_data.each do |rec| actual = rec.first.tr('_', ' ').capitalize predicted = classifier.classify(rec.last) conf_mat[actual][predicted] += 1 unless predicted.nil? end conf_mat end |
.generate_report(*conf_mats) ⇒ Object
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 41 def generate_report(*conf_mats) conf_mats.flatten! accumulated_conf_mat = conf_mats.length == 1 ? conf_mats.first : empty_conf_mat(conf_mats.first.keys.sort) header = 'Run Total Correct Incorrect Accuracy' puts puts ' Run Report '.center(header.length, '-') puts header puts '-' * header.length if conf_mats.length > 1 conf_mats.each_with_index do |conf_mat, i| run_report = build_run_report(conf_mat) print_run_report(run_report, i + 1) conf_mat.each do |actual, cols| cols.each do |predicted, v| accumulated_conf_mat[actual][predicted] += v end end end puts '-' * header.length end run_report = build_run_report(accumulated_conf_mat) print_run_report(run_report, 'All') puts print_conf_mat(accumulated_conf_mat) puts conf_tab = conf_mat_to_tab(accumulated_conf_mat) print_conf_tab(conf_tab) end |
.print_conf_mat(conf_mat) ⇒ Object
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 |
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 102 def print_conf_mat(conf_mat) header = ['Predicted ->'] + conf_mat.keys + %w[Total Recall] cell_size = header.map(&:length).max header = header.map { |h| h.rjust(cell_size) }.join(' ') puts ' Confusion Matrix '.center(header.length, '-') puts header puts '-' * header.length predicted_totals = conf_mat.keys.map { |predicted| [predicted, 0] }.to_h correct = 0 conf_mat.each do |k, rec| actual_total = rec.values.reduce(:+) puts ([k.ljust(cell_size)] + rec.values.map { |v| v.to_s.rjust(cell_size) } + [actual_total.to_s.rjust(cell_size), divide(rec[k], actual_total).round(5).to_s.rjust(cell_size)]).join(' ') rec.each do |cat, val| predicted_totals[cat] += val correct += val if cat == k end end total = predicted_totals.values.reduce(:+) puts '-' * header.length puts (['Total'.ljust(cell_size)] + predicted_totals.values.map { |v| v.to_s.rjust(cell_size) } + [total.to_s.rjust(cell_size), ''.rjust(cell_size)]).join(' ') puts (['Precision'.ljust(cell_size)] + predicted_totals.keys.map { |k| divide(conf_mat[k][k], predicted_totals[k]).round(5).to_s.rjust(cell_size) } + ['Accuracy ->'.rjust(cell_size), divide(correct, total).round(5).to_s.rjust(cell_size)]).join(' ') end |
.print_conf_tab(conf_tab) ⇒ Object
125 126 127 128 129 130 131 132 |
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 125 def print_conf_tab(conf_tab) conf_tab.each do |positive, tab| puts "# Positive class: #{positive}" derivations = conf_tab_derivations(tab) print_derivations(derivations) puts end end |
.print_derivations(derivations) ⇒ Object
155 156 157 158 159 160 |
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 155 def print_derivations(derivations) max_len = derivations.keys.map(&:length).max derivations.each do |k, v| puts k.to_s.tr('_', ' ').capitalize.ljust(max_len) + ' : ' + v.to_s end end |
.print_run_report(stats, prefix = '', print_header = false) ⇒ Object
97 98 99 100 |
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 97 def print_run_report(stats, prefix = '', print_header = false) puts "#{'Run'.rjust([3, prefix.length].max)} Total Correct Incorrect Accuracy" if print_header puts "#{prefix.to_s.rjust(3)} #{stats[:total].to_s.rjust(9)} #{stats[:correct].to_s.rjust(9)} #{stats[:incorrect].to_s.rjust(9)} #{stats[:accuracy].round(5).to_s.ljust(7, '0').rjust(9)}" end |
.validate(classifier, training_data, test_data, *options) ⇒ Object
22 23 24 25 26 27 28 29 |
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 22 def validate(classifier, training_data, test_data, *) classifier = ClassifierReborn.const_get(classifier).new() if classifier.is_a?(String) classifier.reset training_data.each do |rec| classifier.train(rec.first, rec.last) end evaluate(classifier, test_data) end |