Class: Idhja22::Bayes

Inherits:
BinaryClassifier show all
Defined in:
lib/idhja22/bayes.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from BinaryClassifier

train, train_and_validate, train_and_validate_from_csv, train_from_csv, #validate

Instance Attribute Details

#conditional_probabilitiesObject

Returns the value of attribute conditional_probabilities.



3
4
5
# File 'lib/idhja22/bayes.rb', line 3

def conditional_probabilities
  @conditional_probabilities
end

#prior_probabilitiesObject

Returns the value of attribute prior_probabilities.



3
4
5
# File 'lib/idhja22/bayes.rb', line 3

def prior_probabilities
  @prior_probabilities
end

Class Method Details

.calculate_conditional_probabilities(dataset, attribute_labels_to_use) ⇒ Object



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/idhja22/bayes.rb', line 5

def calculate_conditional_probabilities dataset, attribute_labels_to_use
  conditional_probabilities = {}
  attribute_labels_to_use.each do |attr_label|
    conditional_probabilities[attr_label] = {}
    dataset.partition_by_category.each do |cat, uniform_category_ds|
      conditional_probabilities[attr_label][cat] = Hash.new(0)
      partitioned_data = uniform_category_ds.partition(attr_label)
      partitioned_data.each do |attr_value, uniform_value_ds|
        conditional_probabilities[attr_label][cat][attr_value] = uniform_value_ds.size.to_f/uniform_category_ds.size.to_f
      end
    end
  end

  return conditional_probabilities
end

.calculate_priors(dataset) ⇒ Object



21
22
23
24
25
26
27
# File 'lib/idhja22/bayes.rb', line 21

def calculate_priors dataset
  output = Hash.new(0)
  dataset.category_counts.each do |cat, count|
    output[cat] = count.to_f/dataset.size.to_f
  end
  return output
end

Instance Method Details

#evaluate(query) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/idhja22/bayes.rb', line 30

def evaluate(query)
  nb_values = {}
  total_values = 0

  prior_probabilities.each do |cat, prior_prob|
    nb_value = prior_prob
    conditional_probabilities.each do |attr_label, probs|
      raise Idhja22::Dataset::Datum::UnknownAttributeValue, "Not seen value #{query[attr_label]} for attribute #{attr_label} in training." unless probs[cat].has_key? query[attr_label]
      nb_value *= probs[cat][query[attr_label]]
    end
    total_values += nb_value
    nb_values[cat] = nb_value
  end

  return nb_values['Y']/total_values
end

#train(dataset, attributes_to_use) ⇒ Object



47
48
49
50
51
# File 'lib/idhja22/bayes.rb', line 47

def train(dataset, attributes_to_use)
  self.conditional_probabilities = self.class.calculate_conditional_probabilities(dataset, attributes_to_use)
  self.prior_probabilities = self.class.calculate_priors(dataset)
  return self
end