Class: Adocca::Thomas::Bayes

Inherits:
Object
  • Object
show all
Defined in:
lib/thomas.rb

Constant Summary collapse

@@updates =
{}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(model) ⇒ Bayes

Returns a new instance of Bayes.



48
49
50
# File 'lib/thomas.rb', line 48

def initialize(model)
  @model = model
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(meth, *args) ⇒ Object



52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/thomas.rb', line 52

def method_missing(meth, *args)
  if meth.to_s =~ /_or_/
    corpi = meth.to_s.split("_or_")
    prob_by_corpus = guess(corpi, *args)

    best_match = {"none" => -1}
    next_best = {"none" => -1}
    prob_by_corpus.each do |corpus, prob|
      if prob > best_match.values.first
        next_best = best_match.clone
        best_match = {corpus => prob}
      elsif prob > next_best.values.first
        next_best = {corpus => prob}
      end
    end
    [best_match.keys.first, best_match.values.first / next_best.values.first]
  else
    super
  end
end

Instance Attribute Details

#modelObject

Returns the value of attribute model.



44
45
46
# File 'lib/thomas.rb', line 44

def model
  @model
end

Instance Method Details

#get_occ(corpus, word) ⇒ Object



115
116
117
118
# File 'lib/thomas.rb', line 115

def get_occ(corpus, word)
  instance = @model.find_by_corpus_and_word(corpus, word)
  instance.nil? ? 0 : instance.amount
end

#guess(corpi, text, options = {}) ⇒ Object



97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
# File 'lib/thomas.rb', line 97

def guess(corpi, text, options = {})
  corpus_probs_by_word = {}
  tokenize(text, options).each do |word|
    prob_by_corpus, any_occ = get_prob_by_corpus(corpi, word)
    corpus_probs_by_word[word] = prob_by_corpus if any_occ
  end
  rval = {}
  corpi.each do |corpus|
    rval[corpus] = 1.0
  end
  corpus_probs_by_word.each do |word, prob_by_corpus|
    prob_by_corpus.each do |corpus, prob|
      rval[corpus] *= prob
    end
  end
  rval
end

#train(corpus, text, options = {}) ⇒ Object



73
74
75
76
77
78
79
80
81
82
83
# File 'lib/thomas.rb', line 73

def train(corpus, text, options = {})
  tokenize(text, options).each do |word|
    @@updates[corpus] ||= {}
    if update = @@updates[corpus][word]
      update.adjust(1)
    else
      @@updates[corpus][word] = Update.new(@model, corpus, word, 1)
    end
    flush_updates
  end
end

#untrain(corpus, text, options = {}) ⇒ Object



85
86
87
88
89
90
91
92
93
94
95
# File 'lib/thomas.rb', line 85

def untrain(corpus, text, options = {})
  tokenize(text, options).each do |word|
    @@updates[corpus] ||= {}
    if update = @@updates[corpus][word]
      update.adjust(-1)
    else
      @@updates[corpus][word] = Update.new(@model, corpus, word, -1)
    end
    flush_updates
  end
end