Class: StuffClassifier::Bayes
- Inherits:
-
Base
- Object
- Base
- StuffClassifier::Bayes
show all
- Defined in:
- lib/stuff-classifier/bayes.rb
Instance Attribute Summary collapse
Attributes inherited from Base
#name
Attributes included from Tokenizer
#stemming
Instance Method Summary
collapse
Methods inherited from Base
#cat_count, #categories, #incr_cat, #incr_word, open, #save_state, #total_count, #train, #word_count, #word_prob, #word_weighted_average
Methods included from Tokenizer
#each_word, #ignore_words, #ignore_words=, #stemming?
Constructor Details
#initialize(name, opts = {}) ⇒ Bayes
Returns a new instance of Bayes.
7
8
9
10
|
# File 'lib/stuff-classifier/bayes.rb', line 7
def initialize(name, opts={})
super(name, opts)
@thresholds = {}
end
|
Instance Attribute Details
#thresholds=(value) ⇒ Object
5
6
7
|
# File 'lib/stuff-classifier/bayes.rb', line 5
def thresholds=(value)
@thresholds = value
end
|
Instance Method Details
#cat_scores(text) ⇒ Object
24
25
26
27
28
29
30
|
# File 'lib/stuff-classifier/bayes.rb', line 24
def cat_scores(text)
probs = {}
categories.each do |cat|
probs[cat] = text_prob(text, cat)
end
probs.map{|k,v| [k,v]}.sort{|a,b| b[1] <=> a[1]}
end
|
#classify(text, default = nil) ⇒ Object
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
|
# File 'lib/stuff-classifier/bayes.rb', line 32
def classify(text, default=nil)
max_prob = 0.0
best = nil
scores = cat_scores(text)
scores.each do |score|
cat, prob = score
if prob > max_prob
max_prob = prob
best = cat
end
end
return default unless best
threshold = @thresholds[best] || 1.0
scores.each do |score|
cat, prob = score
next if cat == best
return default if prob * threshold > max_prob
end
return best
end
|
#doc_prob(text, category) ⇒ Object
12
13
14
15
16
|
# File 'lib/stuff-classifier/bayes.rb', line 12
def doc_prob(text, category)
each_word(text).map {|w|
word_weighted_average(w, category)
}.inject(1) {|p,c| p * c}
end
|
#text_prob(text, category) ⇒ Object
18
19
20
21
22
|
# File 'lib/stuff-classifier/bayes.rb', line 18
def text_prob(text, category)
cat_prob = cat_count(category) / total_count
doc_prob = doc_prob(text, category)
cat_prob * doc_prob
end
|