Module: Dwarf::Information
Class Method Summary
collapse
attribute_map, classification_map, eval_attribute, filter_classifications, invert_with_dups
Class Method Details
.entropy(example_subset, classifications) ⇒ Object
7
8
9
10
11
12
13
14
15
16
17
|
# File 'lib/dwarf/information.rb', line 7
def entropy(example_subset, classifications)
seen_classifications = filter_classifications(classifications, example_subset)
return 0.0 if seen_classifications.length == 1
set_size = example_subset.length.to_f
examples_inversion = invert_with_dups(classification_map(example_subset, classifications))
occurrences = occurrences(examples_inversion)
sum_over(seen_classifications) do |classification|
frequency = occurrences[classification]/set_size
- frequency * Math.log(frequency,seen_classifications.length)
end
end
|
19
20
21
22
|
# File 'lib/dwarf/information.rb', line 19
def information_gain(example_subset, attribute, classifications)
filtered_example_subset = filter_for_missing_attribute(example_subset, attribute)
unfiltered_information_gain(filtered_example_subset, attribute, classifications)
end
|
24
25
26
27
28
29
30
31
32
33
34
35
|
# File 'lib/dwarf/information.rb', line 24
def unfiltered_information_gain(example_subset, attribute, classifications)
set_size = example_subset.length.to_f
examples_inversion = invert_with_dups(attribute_map(example_subset,attribute))
occurrences = occurrences(examples_inversion)
heterogeneous_entropy = entropy(example_subset, classifications)
seen_attribute_values = attribute_values(example_subset,attribute)
heterogeneous_entropy -
sum_over(seen_attribute_values) do |attribute_value|
frequency = occurrences[attribute_value]/set_size
frequency * entropy(examples_inversion[attribute_value], classifications)
end
end
|