Module: ClassifierReborn::ClassifierValidator

Defined in:
lib/classifier-reborn/validators/classifier_validator.rb

Class Method Summary collapse

Class Method Details

.build_run_report(conf_mat) ⇒ Object



70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 70

def build_run_report(conf_mat)
  correct = incorrect = 0
  conf_mat.each do |actual, cols|
    cols.each do |predicted, v|
      if actual == predicted
        correct += v
      else
        incorrect += v
      end
    end
  end
  total = correct + incorrect
  { total: total, correct: correct, incorrect: incorrect, accuracy: divide(correct, total) }
end

.conf_mat_to_tab(conf_mat) ⇒ Object



85
86
87
88
89
90
91
92
93
94
95
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 85

def conf_mat_to_tab(conf_mat)
  conf_tab = Hash.new { |h, k| h[k] = { p: { t: 0, f: 0 }, n: { t: 0, f: 0 } } }
  conf_mat.each_key do |positive|
    conf_mat.each do |actual, cols|
      cols.each do |predicted, v|
        conf_tab[positive][positive == predicted ? :p : :n][actual == predicted ? :t : :f] += v
      end
    end
  end
  conf_tab
end

.conf_tab_derivations(tab) ⇒ Object



134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 134

def conf_tab_derivations(tab)
  positives = tab[:p][:t] + tab[:n][:f]
  negatives = tab[:n][:t] + tab[:p][:f]
  total     = positives + negatives
  {
    total_population: positives + negatives,
    condition_positive: positives,
    condition_negative: negatives,
    true_positive: tab[:p][:t],
    true_negative: tab[:n][:t],
    false_positive: tab[:p][:f],
    false_negative: tab[:n][:f],
    prevalence: divide(positives, total),
    specificity: divide(tab[:n][:t], negatives),
    recall: divide(tab[:p][:t], positives),
    precision: divide(tab[:p][:t], tab[:p][:t] + tab[:p][:f]),
    accuracy: divide(tab[:p][:t] + tab[:n][:t], total),
    f1_score: divide(2 * tab[:p][:t], 2 * tab[:p][:t] + tab[:p][:f] + tab[:n][:f])
  }
end

.cross_validate(classifier, sample_data, fold = 10, *options) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 7

def cross_validate(classifier, sample_data, fold = 10, *options)
  classifier = ClassifierReborn.const_get(classifier).new(options) if classifier.is_a?(String)
  sample_data.shuffle!
  partition_size = sample_data.length / fold
  partitioned_data = sample_data.each_slice(partition_size)
  conf_mats = []
  fold.times do |i|
    training_data = partitioned_data.take(fold)
    test_data = training_data.slice!(i)
    conf_mats << validate(classifier, training_data.flatten!(1), test_data)
  end
  classifier.reset
  generate_report(conf_mats)
end

.divide(dividend, divisor) ⇒ Object



166
167
168
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 166

def divide(dividend, divisor)
  divisor.zero? ? 0.0 : dividend / divisor.to_f
end

.empty_conf_mat(categories) ⇒ Object



162
163
164
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 162

def empty_conf_mat(categories)
  categories.map { |actual| [actual, categories.map { |predicted| [predicted, 0] }.to_h] }.to_h
end

.evaluate(classifier, test_data) ⇒ Object



31
32
33
34
35
36
37
38
39
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 31

def evaluate(classifier, test_data)
  conf_mat = empty_conf_mat(classifier.categories.sort)
  test_data.each do |rec|
    actual = rec.first.tr('_', ' ').capitalize
    predicted = classifier.classify(rec.last)
    conf_mat[actual][predicted] += 1 unless predicted.nil?
  end
  conf_mat
end

.generate_report(*conf_mats) ⇒ Object



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 41

def generate_report(*conf_mats)
  conf_mats.flatten!
  accumulated_conf_mat = conf_mats.length == 1 ? conf_mats.first : empty_conf_mat(conf_mats.first.keys.sort)
  header = 'Run     Total   Correct Incorrect  Accuracy'
  puts
  puts ' Run Report '.center(header.length, '-')
  puts header
  puts '-' * header.length
  if conf_mats.length > 1
    conf_mats.each_with_index do |conf_mat, i|
      run_report = build_run_report(conf_mat)
      print_run_report(run_report, i + 1)
      conf_mat.each do |actual, cols|
        cols.each do |predicted, v|
          accumulated_conf_mat[actual][predicted] += v
        end
      end
    end
    puts '-' * header.length
  end
  run_report = build_run_report(accumulated_conf_mat)
  print_run_report(run_report, 'All')
  puts
  print_conf_mat(accumulated_conf_mat)
  puts
  conf_tab = conf_mat_to_tab(accumulated_conf_mat)
  print_conf_tab(conf_tab)
end


102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 102

def print_conf_mat(conf_mat)
  header = ['Predicted ->'] + conf_mat.keys + %w[Total Recall]
  cell_size = header.map(&:length).max
  header = header.map { |h| h.rjust(cell_size) }.join(' ')
  puts ' Confusion Matrix '.center(header.length, '-')
  puts header
  puts '-' * header.length
  predicted_totals = conf_mat.keys.map { |predicted| [predicted, 0] }.to_h
  correct = 0
  conf_mat.each do |k, rec|
    actual_total = rec.values.reduce(:+)
    puts ([k.ljust(cell_size)] + rec.values.map { |v| v.to_s.rjust(cell_size) } + [actual_total.to_s.rjust(cell_size), divide(rec[k], actual_total).round(5).to_s.rjust(cell_size)]).join(' ')
    rec.each do |cat, val|
      predicted_totals[cat] += val
      correct += val if cat == k
    end
  end
  total = predicted_totals.values.reduce(:+)
  puts '-' * header.length
  puts (['Total'.ljust(cell_size)] + predicted_totals.values.map { |v| v.to_s.rjust(cell_size) } + [total.to_s.rjust(cell_size), ''.rjust(cell_size)]).join(' ')
  puts (['Precision'.ljust(cell_size)] + predicted_totals.keys.map { |k| divide(conf_mat[k][k], predicted_totals[k]).round(5).to_s.rjust(cell_size) } + ['Accuracy ->'.rjust(cell_size), divide(correct, total).round(5).to_s.rjust(cell_size)]).join(' ')
end


125
126
127
128
129
130
131
132
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 125

def print_conf_tab(conf_tab)
  conf_tab.each do |positive, tab|
    puts "# Positive class: #{positive}"
    derivations = conf_tab_derivations(tab)
    print_derivations(derivations)
    puts
  end
end


155
156
157
158
159
160
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 155

def print_derivations(derivations)
  max_len = derivations.keys.map(&:length).max
  derivations.each do |k, v|
    puts k.to_s.tr('_', ' ').capitalize.ljust(max_len) + ' : ' + v.to_s
  end
end


97
98
99
100
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 97

def print_run_report(stats, prefix = '', print_header = false)
  puts "#{'Run'.rjust([3, prefix.length].max)}     Total   Correct Incorrect  Accuracy" if print_header
  puts "#{prefix.to_s.rjust(3)} #{stats[:total].to_s.rjust(9)} #{stats[:correct].to_s.rjust(9)} #{stats[:incorrect].to_s.rjust(9)} #{stats[:accuracy].round(5).to_s.ljust(7, '0').rjust(9)}"
end

.validate(classifier, training_data, test_data, *options) ⇒ Object



22
23
24
25
26
27
28
29
# File 'lib/classifier-reborn/validators/classifier_validator.rb', line 22

def validate(classifier, training_data, test_data, *options)
  classifier = ClassifierReborn.const_get(classifier).new(options) if classifier.is_a?(String)
  classifier.reset
  training_data.each do |rec|
    classifier.train(rec.first, rec.last)
  end
  evaluate(classifier, test_data)
end