Class: OmniCat::Classifiers::Bayes
- Defined in:
- lib/omnicat/classifiers/bayes.rb
Instance Attribute Summary collapse
-
#categories ⇒ Object
::OmniCat::Hash - Hash of categories.
-
#category_count ⇒ Object
Integer - Total category count.
-
#doc_count ⇒ Object
Integer - Total token count.
-
#k_value ⇒ Object
Integer - Helper value for skipping some Bayes algorithm errors.
-
#token_count ⇒ Object
Integer - Total token count.
-
#uniq_token_count ⇒ Object
Integer - Total uniq token count.
Instance Method Summary collapse
-
#add_category(name) ⇒ Object
Allows adding new classification category.
-
#classify(doc) ⇒ Object
Classify the given document.
-
#initialize(bayes_hash = {}) ⇒ Bayes
constructor
A new instance of Bayes.
-
#train(category_name, doc) ⇒ Object
Train the desired category with a document.
Methods inherited from Base
#add_categories, #classify_batch, #train_batch
Methods inherited from Base
Constructor Details
#initialize(bayes_hash = {}) ⇒ Bayes
Returns a new instance of Bayes.
12 13 14 15 16 17 18 19 20 21 22 23 24 |
# File 'lib/omnicat/classifiers/bayes.rb', line 12 def initialize(bayes_hash = {}) self.categories = ::OmniCat::Hash.new if bayes_hash.has_key?(:categories) bayes_hash[:categories].each do |name, category| self.categories[name] = ::OmniCat::Classifiers::BayesInternals::Category.new(category) end end self.category_count = bayes_hash[:category_count].to_i self.doc_count = bayes_hash[:doc_count].to_i self.k_value = bayes_hash[:k_value] || 1.0 self.token_count = bayes_hash[:token_count].to_i self.uniq_token_count = bayes_hash[:uniq_token_count].to_i end |
Instance Attribute Details
#categories ⇒ Object
::OmniCat::Hash - Hash of categories
5 6 7 |
# File 'lib/omnicat/classifiers/bayes.rb', line 5 def categories @categories end |
#category_count ⇒ Object
Integer - Total category count
6 7 8 |
# File 'lib/omnicat/classifiers/bayes.rb', line 6 def category_count @category_count end |
#doc_count ⇒ Object
Integer - Total token count
7 8 9 |
# File 'lib/omnicat/classifiers/bayes.rb', line 7 def doc_count @doc_count end |
#k_value ⇒ Object
Integer - Helper value for skipping some Bayes algorithm errors
10 11 12 |
# File 'lib/omnicat/classifiers/bayes.rb', line 10 def k_value @k_value end |
#token_count ⇒ Object
Integer - Total token count
8 9 10 |
# File 'lib/omnicat/classifiers/bayes.rb', line 8 def token_count @token_count end |
#uniq_token_count ⇒ Object
Integer - Total uniq token count
9 10 11 |
# File 'lib/omnicat/classifiers/bayes.rb', line 9 def uniq_token_count @uniq_token_count end |
Instance Method Details
#add_category(name) ⇒ Object
37 38 39 40 41 42 43 44 45 |
# File 'lib/omnicat/classifiers/bayes.rb', line 37 def add_category(name) if category_exists?(name) raise StandardError, "Category with name '#{name}' is already exists!" else self.category_count +=1 self.categories[name] = ::OmniCat::Classifiers::BayesInternals::Category.new end end |
#classify(doc) ⇒ Object
Classify the given document
Parameters
-
doc
- The document for classification
Returns
-
result
- OmniCat::Result object
Examples
# Classify a document
bayes.classify("good documentation")
=>
90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
# File 'lib/omnicat/classifiers/bayes.rb', line 90 def classify(doc) if category_count < 2 return raise StandardError, "At least 2 categories needed for classification process!" end score = -1000000 result = ::OmniCat::Result.new self.categories.each do |category_name, category| result.scores[category_name] = doc_probability(category, doc) if result.scores[category_name] > score result.category[:name] = category_name score = result.scores[category_name] end result.total_score += result.scores[category_name] end result.total_score = 1 if result.total_score == 0 result.category[:percentage] = ( result.scores[result.category[:name]] * 100.0 / result.total_score ).floor result end |
#train(category_name, doc) ⇒ Object
Train the desired category with a document
Parameters
-
category
- Name of the category from added categories list -
doc
- Document text
Examples
# Train the desired category
bayes.train("positive", "clear documentation")
bayes.train("positive", "good, very well")
bayes.train("negative", "bad dog")
bayes.train("neutral", "how is the management gui")
61 62 63 64 65 66 67 68 69 70 71 72 73 |
# File 'lib/omnicat/classifiers/bayes.rb', line 61 def train(category_name, doc) if category_exists?(category_name) increment_doc_counts(category_name) update_priors doc.tokenize_with_counts.each do |token, count| increment_token_counts(category_name, token, count) self.categories[category_name].tokens[token] = self.categories[category_name].tokens[token].to_i + count end else raise StandardError, "Category with name '#{category_name}' does not exist!" end end |