Module: Dwarf::Information

Extended by:
ExampleManagement
Defined in:
lib/dwarf/information.rb

Class Method Summary collapse

Methods included from ExampleManagement

attribute_map, classification_map, eval_attribute, filter_classifications, invert_with_dups

Class Method Details

.entropy(example_subset, classifications) ⇒ Object



7
8
9
10
11
12
13
14
15
16
17
# File 'lib/dwarf/information.rb', line 7

def entropy(example_subset, classifications)
  seen_classifications = filter_classifications(classifications, example_subset)
  return 0.0 if seen_classifications.length == 1
  set_size = example_subset.length.to_f
  examples_inversion = invert_with_dups(classification_map(example_subset, classifications))
  occurrences = occurrences(examples_inversion)
  sum_over(seen_classifications) do |classification|
    frequency = occurrences[classification]/set_size
    - frequency *  Math.log(frequency,seen_classifications.length)
  end
end

.information_gain(example_subset, attribute, classifications) ⇒ Object



19
20
21
22
# File 'lib/dwarf/information.rb', line 19

def information_gain(example_subset, attribute, classifications)
  filtered_example_subset = filter_for_missing_attribute(example_subset, attribute)
  unfiltered_information_gain(filtered_example_subset, attribute, classifications)
end

.unfiltered_information_gain(example_subset, attribute, classifications) ⇒ Object



24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/dwarf/information.rb', line 24

def unfiltered_information_gain(example_subset, attribute, classifications)
  set_size = example_subset.length.to_f
  examples_inversion = invert_with_dups(attribute_map(example_subset,attribute))
  occurrences = occurrences(examples_inversion)
  heterogeneous_entropy = entropy(example_subset, classifications)
  seen_attribute_values = attribute_values(example_subset,attribute)
  heterogeneous_entropy -
    sum_over(seen_attribute_values) do |attribute_value|
    frequency = occurrences[attribute_value]/set_size
    frequency * entropy(examples_inversion[attribute_value], classifications)
  end
end