Class: OmniCat::Classifiers::Bayes

Inherits:
Strategy
  • Object
show all
Defined in:
lib/omnicat/classifiers/bayes.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(bayes_hash = {}) ⇒ Bayes

Returns a new instance of Bayes.



9
10
11
12
13
14
15
16
17
# File 'lib/omnicat/classifiers/bayes.rb', line 9

def initialize(bayes_hash = {})
  super(bayes_hash)
  if bayes_hash.has_key?(:categories)
    bayes_hash[:categories].each do |name, category|
      @categories[name] = ::OmniCat::Classifiers::BayesInternals::Category.new(category)
    end
  end
  @k_value = bayes_hash[:k_value] || 1.0
end

Instance Attribute Details

#k_valueObject

Integer - Helper value for skipping some Bayes algorithm errors



7
8
9
# File 'lib/omnicat/classifiers/bayes.rb', line 7

def k_value
  @k_value
end

Instance Method Details

#add_category(category_name) ⇒ Object

Allows adding new classification category

Parameters

  • category_name - Name for category

Examples

# Create a classification category
bayes = Bayes.new
bayes.add_category("positive")


30
31
32
33
34
35
36
37
38
# File 'lib/omnicat/classifiers/bayes.rb', line 30

def add_category(category_name)
  if category_exists?(category_name)
    raise StandardError,
          "Category with name '#{category_name}' is already exists!"
  else
    increment_category_count
    @categories[category_name] = ::OmniCat::Classifiers::BayesInternals::Category.new(name: category_name)
  end
end

#classify(doc_content) ⇒ Object

Classify the given document

Parameters

  • doc_content - The document for classification

Returns

  • result - OmniCat::Result object

Examples

# Classify a document
bayes.classify("good documentation")
=>


106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
# File 'lib/omnicat/classifiers/bayes.rb', line 106

def classify(doc_content)
  return unless classifiable?
  doc = ::OmniCat::Doc.new(content: doc_content)
  result = ::OmniCat::Result.new
  @categories.each do |category_name, category|
    result.add_score(
      Score.new(
        key: category.name,
        value: doc_probability(category, doc)
      )
    )
  end
  auto_train(@categories[result.top_score.key], doc)
  result.calculate_percentages
  result
end

#train(category_name, doc_content) ⇒ Object

Train the desired category with a document

Parameters

  • category_name - Name of the category from added categories list

  • doc_content - Document text

Examples

# Train the desired category
bayes.train("positive", "clear documentation")
bayes.train("positive", "good, very well")
bayes.train("negative", "bad dog")
bayes.train("neutral", "how is the management gui")


54
55
56
57
58
59
60
61
62
63
# File 'lib/omnicat/classifiers/bayes.rb', line 54

def train(category_name, doc_content)
  category_must_exist(category_name)
  doc = add_doc(category_name, doc_content)
  doc.tokens.each do |token, count|
    increment_token_counts(category_name, token, count)
    @categories[category_name].tokens[token] = @categories[category_name].tokens[token].to_i + count
  end
  increment_doc_counts(category_name)
  update_priors
end

#untrain(category_name, doc_content) ⇒ Object

Untrain the desired category with a document

Parameters

  • category_name - Name of the category from added categories list

  • doc_content - Document text

Examples

# Untrain the desired category
bayes.untrain("positive", "clear documentation")
bayes.untrain("positive", "good, very well")
bayes.untrain("negative", "bad dog")
bayes.untrain("neutral", "how is the management gui")


79
80
81
82
83
84
85
86
87
88
89
# File 'lib/omnicat/classifiers/bayes.rb', line 79

def untrain(category_name, doc_content)
  category_must_exist(category_name)
  doc = remove_doc(category_name, doc_content)
  doc.tokens.each do |token, count|
    @categories[category_name].tokens[token] = @categories[category_name].tokens[token].to_i - count
    @categories[category_name].tokens.delete(token) if @categories[category_name].tokens[token] == 0
    decrement_token_counts(category_name, token, count)
  end
  decrement_doc_counts(category_name)
  update_priors
end