Class: StuffClassifier::Base
- Inherits:
-
Object
- Object
- StuffClassifier::Base
show all
- Includes:
- Tokenizer
- Defined in:
- lib/stuff-classifier/base.rb
Class Attribute Summary collapse
Instance Attribute Summary collapse
Attributes included from Tokenizer
#stemming
Class Method Summary
collapse
Instance Method Summary
collapse
Methods included from Tokenizer
#each_word, #ignore_words, #ignore_words=, #stemming?
Constructor Details
#initialize(name, opts = {}) ⇒ Base
Returns a new instance of Base.
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
|
# File 'lib/stuff-classifier/base.rb', line 5
def initialize(name, opts={})
@stemming = opts.key?(:stemming) ? opts[:stemming] : true
purge_state = opts[:purge_state]
@name = name
@wcount = {}
@ccount = {}
@ignore_words = nil
@storage = opts[:storage] || StuffClassifier::Base.storage
unless purge_state
@storage.load_state(self)
else
@storage.purge_state(self)
end
end
|
Instance Attribute Details
#name ⇒ Object
Returns the value of attribute name.
3
4
5
|
# File 'lib/stuff-classifier/base.rb', line 3
def name
@name
end
|
Class Method Details
.open(name) ⇒ Object
89
90
91
92
93
94
95
96
97
|
# File 'lib/stuff-classifier/base.rb', line 89
def open(name)
inst = self.new(name)
if block_given?
yield inst
inst.save_state
else
inst
end
end
|
Instance Method Details
#cat_count(category) ⇒ Object
38
39
40
|
# File 'lib/stuff-classifier/base.rb', line 38
def cat_count(category)
@ccount[category] ? @ccount[category].to_f : 0.0
end
|
#categories ⇒ Object
46
47
48
|
# File 'lib/stuff-classifier/base.rb', line 46
def categories
@ccount.keys
end
|
#incr_cat(category) ⇒ Object
28
29
30
31
|
# File 'lib/stuff-classifier/base.rb', line 28
def incr_cat(category)
@ccount[category] ||= 0
@ccount[category] += 1
end
|
#incr_word(word, category) ⇒ Object
22
23
24
25
26
|
# File 'lib/stuff-classifier/base.rb', line 22
def incr_word(word, category)
@wcount[word] ||= {}
@wcount[word][category] ||= 0
@wcount[word][category] += 1
end
|
#save_state ⇒ Object
77
78
79
|
# File 'lib/stuff-classifier/base.rb', line 77
def save_state
@storage.save_state(self)
end
|
#total_count ⇒ Object
42
43
44
|
# File 'lib/stuff-classifier/base.rb', line 42
def total_count
@ccount.values.inject(0){|s,c| s + c}.to_f
end
|
#train(category, text) ⇒ Object
50
51
52
53
|
# File 'lib/stuff-classifier/base.rb', line 50
def train(category, text)
each_word(text) {|w| incr_word(w, category) }
incr_cat(category)
end
|
#word_count(word, category) ⇒ Object
33
34
35
36
|
# File 'lib/stuff-classifier/base.rb', line 33
def word_count(word, category)
return 0.0 unless @wcount[word] && @wcount[word][category]
@wcount[word][category].to_f
end
|
#word_prob(word, cat) ⇒ Object
55
56
57
58
|
# File 'lib/stuff-classifier/base.rb', line 55
def word_prob(word, cat)
return 0.0 if cat_count(cat) == 0
word_count(word, cat) / cat_count(cat)
end
|
#word_weighted_average(word, cat, opts = {}) ⇒ Object
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
|
# File 'lib/stuff-classifier/base.rb', line 60
def word_weighted_average(word, cat, opts={})
func = opts[:func]
weight = opts[:weight] || 1.0
assumed_prob = opts[:assumed_prob] || 0.5
basic_prob = func ? func.call(word, cat)
: word_prob(word, cat)
totals = categories.map{|c| word_count(word, c)}.inject(0){|s,c| s + c}
(weight * assumed_prob + totals * basic_prob) / (weight + totals)
end
|