Class: Ankusa::NaiveBayesClassifier

Inherits:
Object
  • Object
show all
Includes:
Classifier
Defined in:
lib/ankusa/naive_bayes.rb

Instance Attribute Summary

Attributes included from Classifier

#classnames

Instance Method Summary collapse

Methods included from Classifier

#initialize, #train, #untrain

Instance Method Details

#classifications(text, classnames = nil) ⇒ Object

Classes is an array of classes to look at



12
13
14
15
16
17
18
19
20
21
22
# File 'lib/ankusa/naive_bayes.rb', line 12

def classifications(text, classnames=nil)
  result = log_likelihoods text, classnames
  result.keys.each { |k|
    result[k] = Math.exp result[k] 
  }

  # normalize to get probs
  sum = result.values.inject { |x,y| x+y }
  result.keys.each { |k| result[k] = result[k] / sum }
  result
end

#classify(text, classes = nil) ⇒ Object



6
7
8
9
# File 'lib/ankusa/naive_bayes.rb', line 6

def classify(text, classes=nil)
  # return the most probable class
  log_likelihoods(text, classes).sort_by { |c| -c[1] }.first.first
end

#log_likelihoods(text, classnames = nil) ⇒ Object

Classes is an array of classes to look at



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/ankusa/naive_bayes.rb', line 25

def log_likelihoods(text, classnames=nil)
  classnames ||= @classnames
  result = Hash.new 0

  TextHash.new(text).each { |word, count|
    probs = get_word_probs(word, classnames)
    classnames.each { |k| result[k] += (Math.log(probs[k]) * count) }
  }

  # add the prior and exponentiate
  doc_counts = doc_count_totals.select { |k,v| classnames.include? k }.map { |k,v| v }
  doc_count_total = (doc_counts.inject { |x,y| x+y } + classnames.length).to_f
  classnames.each { |k| 
    result[k] += Math.log((@storage.get_doc_count(k) + 1).to_f / doc_count_total) 
  }
  
  result
end