Class: NaiveBayes
- Inherits:
-
Object
- Object
- NaiveBayes
- Defined in:
- lib/twss-classifier/naive-bayes.rb
Class Method Summary collapse
Instance Method Summary collapse
-
#classify(sentence) ⇒ Object
Returns the probability that ‘sentence` is a TWSS sentence.
-
#initialize(ngram_size, pos_training_examples, neg_training_examples) ⇒ NaiveBayes
constructor
A new instance of NaiveBayes.
- #puts_best_neg_predictors ⇒ Object
- #puts_best_pos_predictors ⇒ Object
- #test(threshold, pos_test_examples, neg_test_examples) ⇒ Object
- #train ⇒ Object
- #yamlize(filename) ⇒ Object
Constructor Details
#initialize(ngram_size, pos_training_examples, neg_training_examples) ⇒ NaiveBayes
Returns a new instance of NaiveBayes.
14 15 16 17 18 |
# File 'lib/twss-classifier/naive-bayes.rb', line 14 def initialize(ngram_size, pos_training_examples, neg_training_examples) @ngram_size = ngram_size @pos_training_examples = pos_training_examples # Array of positive training examples. @neg_training_examples = neg_training_examples end |
Class Method Details
.load_yaml(filename) ⇒ Object
77 78 79 |
# File 'lib/twss-classifier/naive-bayes.rb', line 77 def self.load_yaml(filename) return YAML::load(File.read(filename)) end |
Instance Method Details
#classify(sentence) ⇒ Object
Returns the probability that ‘sentence` is a TWSS sentence.
63 64 65 66 67 68 69 |
# File 'lib/twss-classifier/naive-bayes.rb', line 63 def classify(sentence) probs = to_ngrams(sentence, @ngram_size).map{ |word| @probs[word] || [0.5, 0.5] } pos_probs = probs.map{ |x| x[0] }.product neg_probs = probs.map{ |x| x[1] }.product pos_p = pos_probs / (pos_probs + neg_probs) return pos_p end |
#puts_best_neg_predictors ⇒ Object
89 90 91 92 93 94 95 |
# File 'lib/twss-classifier/naive-bayes.rb', line 89 def puts_best_neg_predictors total_pos_count = @pos_counts.values.sum @probs.to_a.sort_by{ |k, v| p = v[1] / v.sum; p }.select{ |k, v| @pos_counts[k] > 10 }.reverse.first(500).each do |k, v| puts [k.ljust(20), v[1] / v.sum, @pos_counts[k] ].join("\t") #if @pos_counts[k] > 10 # puts [k.ljust(20), v[0] / v.sum ].join("\t") #if @pos_counts[k] > 10 end end |
#puts_best_pos_predictors ⇒ Object
81 82 83 84 85 86 87 |
# File 'lib/twss-classifier/naive-bayes.rb', line 81 def puts_best_pos_predictors total_pos_count = @pos_counts.values.sum @probs.to_a.sort_by{ |k, v| p = v[0] / v.sum; p }.select{ |k, v| @pos_counts[k] > 10 }.reverse.first(500).each do |k, v| puts [k.ljust(20), v[0] / v.sum, @pos_counts[k] ].join("\t") #if @pos_counts[k] > 10 # puts [k.ljust(20), v[0] / v.sum ].join("\t") #if @pos_counts[k] > 10 end end |
#test(threshold, pos_test_examples, neg_test_examples) ⇒ Object
37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/twss-classifier/naive-bayes.rb', line 37 def test(threshold, pos_test_examples, neg_test_examples) tp = 0 fp = 0 tn = 0 fn = 0 pos_test_examples.each do |line| if self.classify(line) > threshold tp += 1 else fn += 1 end end neg_test_examples.each do |line| if self.classify(line) < threshold tn += 1 else fp += 1 end end return tp, fp, tn, fn end |
#train ⇒ Object
20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/twss-classifier/naive-bayes.rb', line 20 def train # Get hashes from ngrams to their (smoothed) counts. @pos_counts = get_ngram_counts(@pos_training_examples) @neg_counts = get_ngram_counts(@neg_training_examples) pos_total_count = @pos_counts.values.sum neg_total_count = @neg_counts.values.sum # Get the proportions of ngrams in each corpus. @probs = {} # Hash.new { |h, k| h[k] = [0.5, 0.5] } (@pos_counts.keys + @neg_counts.keys).uniq.each do |ngram| pos_p = @pos_counts[ngram].to_f / pos_total_count neg_p = @neg_counts[ngram].to_f / neg_total_count @probs[ngram] = [pos_p, neg_p] end end |
#yamlize(filename) ⇒ Object
71 72 73 74 75 |
# File 'lib/twss-classifier/naive-bayes.rb', line 71 def yamlize(filename) File.open(filename, "w") do |f| f.puts self.to_yaml end end |