Class: Adocca::Thomas::Bayes
- Inherits:
-
Object
- Object
- Adocca::Thomas::Bayes
show all
- Defined in:
- lib/thomas.rb
Constant Summary
collapse
- @@updates =
{}
Instance Attribute Summary collapse
Instance Method Summary
collapse
Constructor Details
#initialize(model) ⇒ Bayes
Returns a new instance of Bayes.
48
49
50
|
# File 'lib/thomas.rb', line 48
def initialize(model)
@model = model
end
|
Dynamic Method Handling
This class handles dynamic methods through the method_missing method
#method_missing(meth, *args) ⇒ Object
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
|
# File 'lib/thomas.rb', line 52
def method_missing(meth, *args)
if meth.to_s =~ /_or_/
corpi = meth.to_s.split("_or_")
prob_by_corpus = guess(corpi, *args)
best_match = {"none" => -1}
next_best = {"none" => -1}
prob_by_corpus.each do |corpus, prob|
if prob > best_match.values.first
next_best = best_match.clone
best_match = {corpus => prob}
elsif prob > next_best.values.first
next_best = {corpus => prob}
end
end
[best_match.keys.first, best_match.values.first / next_best.values.first]
else
super
end
end
|
Instance Attribute Details
#model ⇒ Object
Returns the value of attribute model.
44
45
46
|
# File 'lib/thomas.rb', line 44
def model
@model
end
|
Instance Method Details
#get_occ(corpus, word) ⇒ Object
115
116
117
118
|
# File 'lib/thomas.rb', line 115
def get_occ(corpus, word)
instance = @model.find_by_corpus_and_word(corpus, word)
instance.nil? ? 0 : instance.amount
end
|
#guess(corpi, text, options = {}) ⇒ Object
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
|
# File 'lib/thomas.rb', line 97
def guess(corpi, text, options = {})
corpus_probs_by_word = {}
tokenize(text, options).each do |word|
prob_by_corpus, any_occ = get_prob_by_corpus(corpi, word)
corpus_probs_by_word[word] = prob_by_corpus if any_occ
end
rval = {}
corpi.each do |corpus|
rval[corpus] = 1.0
end
corpus_probs_by_word.each do |word, prob_by_corpus|
prob_by_corpus.each do |corpus, prob|
rval[corpus] *= prob
end
end
rval
end
|
#train(corpus, text, options = {}) ⇒ Object
73
74
75
76
77
78
79
80
81
82
83
|
# File 'lib/thomas.rb', line 73
def train(corpus, text, options = {})
tokenize(text, options).each do |word|
@@updates[corpus] ||= {}
if update = @@updates[corpus][word]
update.adjust(1)
else
@@updates[corpus][word] = Update.new(@model, corpus, word, 1)
end
flush_updates
end
end
|
#untrain(corpus, text, options = {}) ⇒ Object
85
86
87
88
89
90
91
92
93
94
95
|
# File 'lib/thomas.rb', line 85
def untrain(corpus, text, options = {})
tokenize(text, options).each do |word|
@@updates[corpus] ||= {}
if update = @@updates[corpus][word]
update.adjust(-1)
else
@@updates[corpus][word] = Update.new(@model, corpus, word, -1)
end
flush_updates
end
end
|