Class: Ankusa::NaiveBayesClassifier
- Inherits:
-
Object
- Object
- Ankusa::NaiveBayesClassifier
- Includes:
- Classifier
- Defined in:
- lib/ankusa/naive_bayes.rb
Instance Attribute Summary
Attributes included from Classifier
Instance Method Summary collapse
-
#classifications(text, classnames = nil) ⇒ Object
Classes is an array of classes to look at.
- #classify(text, classes = nil) ⇒ Object
-
#log_likelihoods(text, classnames = nil) ⇒ Object
Classes is an array of classes to look at.
Methods included from Classifier
Instance Method Details
#classifications(text, classnames = nil) ⇒ Object
Classes is an array of classes to look at
12 13 14 15 16 17 18 19 20 21 22 |
# File 'lib/ankusa/naive_bayes.rb', line 12 def classifications(text, classnames=nil) result = log_likelihoods text, classnames result.keys.each { |k| result[k] = Math.exp result[k] } # normalize to get probs sum = result.values.inject { |x,y| x+y } result.keys.each { |k| result[k] = result[k] / sum } result end |
#classify(text, classes = nil) ⇒ Object
6 7 8 9 |
# File 'lib/ankusa/naive_bayes.rb', line 6 def classify(text, classes=nil) # return the most probable class log_likelihoods(text, classes).sort_by { |c| -c[1] }.first.first end |
#log_likelihoods(text, classnames = nil) ⇒ Object
Classes is an array of classes to look at
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/ankusa/naive_bayes.rb', line 25 def log_likelihoods(text, classnames=nil) classnames ||= @classnames result = Hash.new 0 TextHash.new(text).each { |word, count| probs = get_word_probs(word, classnames) classnames.each { |k| result[k] += (Math.log(probs[k]) * count) } } # add the prior and exponentiate doc_counts = doc_count_totals.select { |k,v| classnames.include? k }.map { |k,v| v } doc_count_total = (doc_counts.inject { |x,y| x+y } + classnames.length).to_f classnames.each { |k| result[k] += Math.log((@storage.get_doc_count(k) + 1).to_f / doc_count_total) } result end |