Class: OmniCat::Classifiers::Bayes

Inherits:
Base
  • Object
show all
Defined in:
lib/omnicat/classifiers/bayes.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from Base

#add_categories, #classify_batch, #train_batch

Methods inherited from Base

#to_hash

Constructor Details

#initialize(bayes_hash = {}) ⇒ Bayes

Returns a new instance of Bayes.



12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/omnicat/classifiers/bayes.rb', line 12

def initialize(bayes_hash = {})
  self.categories = ::OmniCat::Hash.new
  if bayes_hash.has_key?(:categories)
    bayes_hash[:categories].each do |name, category|
      self.categories[name] = ::OmniCat::Classifiers::BayesInternals::Category.new(category)
    end
  end
  self.category_count = bayes_hash[:category_count].to_i
  self.doc_count = bayes_hash[:doc_count].to_i
  self.k_value = bayes_hash[:k_value] || 1.0
  self.token_count = bayes_hash[:token_count].to_i
  self.uniq_token_count = bayes_hash[:uniq_token_count].to_i
end

Instance Attribute Details

#categoriesObject

::OmniCat::Hash - Hash of categories



5
6
7
# File 'lib/omnicat/classifiers/bayes.rb', line 5

def categories
  @categories
end

#category_countObject

Integer - Total category count



6
7
8
# File 'lib/omnicat/classifiers/bayes.rb', line 6

def category_count
  @category_count
end

#doc_countObject

Integer - Total token count



7
8
9
# File 'lib/omnicat/classifiers/bayes.rb', line 7

def doc_count
  @doc_count
end

#k_valueObject

Integer - Helper value for skipping some Bayes algorithm errors



10
11
12
# File 'lib/omnicat/classifiers/bayes.rb', line 10

def k_value
  @k_value
end

#token_countObject

Integer - Total token count



8
9
10
# File 'lib/omnicat/classifiers/bayes.rb', line 8

def token_count
  @token_count
end

#uniq_token_countObject

Integer - Total uniq token count



9
10
11
# File 'lib/omnicat/classifiers/bayes.rb', line 9

def uniq_token_count
  @uniq_token_count
end

Instance Method Details

#add_category(name) ⇒ Object

Allows adding new classification category

Parameters

  • name - Name for category

Examples

# Create a classification category
bayes = Bayes.new
bayes.add_category("positive")


37
38
39
40
41
42
43
44
45
# File 'lib/omnicat/classifiers/bayes.rb', line 37

def add_category(name)
  if category_exists?(name)
    raise StandardError,
          "Category with name '#{name}' is already exists!"
  else
    self.category_count +=1
    self.categories[name] = ::OmniCat::Classifiers::BayesInternals::Category.new
  end
end

#classify(doc) ⇒ Object

Classify the given document

Parameters

  • doc - The document for classification

Returns

  • result - OmniCat::Result object

Examples

# Classify a document
bayes.classify("good documentation")
=>


90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/omnicat/classifiers/bayes.rb', line 90

def classify(doc)
  if category_count < 2
    return raise StandardError,
                 "At least 2 categories needed for classification process!"
  end
  score = -1000000
  result = ::OmniCat::Result.new
  self.categories.each do |category_name, category|
    result.scores[category_name] = doc_probability(category, doc)
    if result.scores[category_name] > score
      result.category[:name] = category_name
      score = result.scores[category_name]
    end
    result.total_score += result.scores[category_name]
  end
  result.total_score = 1 if result.total_score == 0
  result.category[:percentage] = (
    result.scores[result.category[:name]] * 100.0 /
    result.total_score
  ).floor
  result
end

#train(category_name, doc) ⇒ Object

Train the desired category with a document

Parameters

  • category - Name of the category from added categories list

  • doc - Document text

Examples

# Train the desired category
bayes.train("positive", "clear documentation")
bayes.train("positive", "good, very well")
bayes.train("negative", "bad dog")
bayes.train("neutral", "how is the management gui")


61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/omnicat/classifiers/bayes.rb', line 61

def train(category_name, doc)
  if category_exists?(category_name)
    increment_doc_counts(category_name)
    update_priors
    doc.tokenize_with_counts.each do |token, count|
      increment_token_counts(category_name, token, count)
      self.categories[category_name].tokens[token] = self.categories[category_name].tokens[token].to_i + count
    end
  else
    raise StandardError,
          "Category with name '#{category_name}' does not exist!"
  end
end