Class: Ankusa::NaiveBayesClassifier
- Inherits:
-
Object
- Object
- Ankusa::NaiveBayesClassifier
- Includes:
- Classifier
- Defined in:
- lib/ankusa/naive_bayes.rb
Instance Attribute Summary
Attributes included from Classifier
Instance Method Summary collapse
-
#classifications(text, classnames = nil) ⇒ Object
Classes is an array of classes to look at.
- #classify(text, classes = nil) ⇒ Object
-
#log_likelihoods(text, classnames = nil) ⇒ Object
Classes is an array of classes to look at.
Methods included from Classifier
Instance Method Details
#classifications(text, classnames = nil) ⇒ Object
Classes is an array of classes to look at
13 14 15 16 17 18 19 20 21 22 23 |
# File 'lib/ankusa/naive_bayes.rb', line 13 def classifications(text, classnames=nil) result = log_likelihoods text, classnames result.keys.each { |k| result[k] = (result[k] == INFTY) ? 0 : Math.exp(result[k]) } # normalize to get probs sum = result.values.inject { |x,y| x+y } result.keys.each { |k| result[k] = result[k] / sum } result end |
#classify(text, classes = nil) ⇒ Object
7 8 9 10 |
# File 'lib/ankusa/naive_bayes.rb', line 7 def classify(text, classes=nil) # return the most probable class log_likelihoods(text, classes).sort_by { |c| -c[1] }.first.first end |
#log_likelihoods(text, classnames = nil) ⇒ Object
Classes is an array of classes to look at
26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 |
# File 'lib/ankusa/naive_bayes.rb', line 26 def log_likelihoods(text, classnames=nil) classnames ||= @classnames result = Hash.new 0 TextHash.new(text).each { |word, count| probs = get_word_probs(word, classnames) classnames.each { |k| # log likelihood should be infinity if we've never seen the klass result[k] += probs[k] > 0 ? (Math.log(probs[k]) * count) : INFTY } } # add the prior doc_counts = doc_count_totals.select { |k,v| classnames.include? k }.map { |k,v| v } doc_count_total = (doc_counts.inject { |x,y| x+y } + classnames.length).to_f classnames.each { |k| result[k] += Math.log((@storage.get_doc_count(k) + 1).to_f / doc_count_total) } result end |