Module: EverydayCliUtils::KmeansUtil

Defined in:
lib/everyday-cli-utils/safe/kmeans.rb

Class Method Summary collapse

Class Method Details

.f_test(clusters, means, cnt, avg) ⇒ Object



10
11
12
13
14
15
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 10

def self.f_test(clusters, means, cnt, avg)
  cnt2 = clusters.count { |i| !i.empty? }
  ev   = f_test_ev(avg, clusters, cnt2, means)
  uv   = f_test_uv(clusters, cnt, cnt2, means)
  (ev / uv)
end

.f_test2(clusters, means, cnt) ⇒ Object



34
35
36
37
38
39
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 34

def self.f_test2(clusters, means, cnt)
  uv   = 0.0
  cnt2 = clusters.count { |i| !i.empty? }
  (0...means.count).each { |i| uv += f_test2_calc(clusters, i, means, uv) unless clusters[i].empty? }
  (uv / (cnt - cnt2))
end

.f_test2_calc(clusters, i, means, uv) ⇒ Object



41
42
43
44
45
46
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 41

def self.f_test2_calc(clusters, i, means, uv)
  tmp = 0.0
  (0...clusters[i].count).each { |j| tmp += (clusters[i][j] - means[i]) ** 2.0 }
  tmp /= clusters[i].count
  Math.sqrt(tmp)
end

.f_test_ev(avg, clusters, cnt2, means) ⇒ Object



17
18
19
20
21
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 17

def self.f_test_ev(avg, clusters, cnt2, means)
  ev = 0.0
  (0...means.count).each { |i| ev += clusters[i].empty? ? 0.0 : clusters[i].count * ((means[i] - avg) ** 2.0) }
  ev / (cnt2 - 1.0)
end

.f_test_uv(clusters, cnt, cnt2, means) ⇒ Object



23
24
25
26
27
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 23

def self.f_test_uv(clusters, cnt, cnt2, means)
  uv = 0.0
  (0...means.count).each { |i| uv = f_test_uvi(clusters, i, means, uv) }
  uv / (cnt - cnt2)
end

.f_test_uvi(clusters, i, means, uv) ⇒ Object



29
30
31
32
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 29

def self.f_test_uvi(clusters, i, means, uv)
  (0...clusters[i].count).each { |j| uv += (clusters[i][j] - means[i]) * (clusters[i][j] - means[i]) } unless clusters[i].empty?
  uv
end

.find_outliers(avg, cs, i, sensitivity) ⇒ Object



65
66
67
68
69
70
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 65

def self.find_outliers(avg, cs, i, sensitivity)
  csi = cs[i]
  std = EverydayCliUtils::MapUtil.std_dev(csi)
  cnt = csi.count
  csi.select { |c| (normal(c, avg, std) * cnt) < sensitivity }
end

.get_clusters(collection, means) ⇒ Object



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 48

def self.get_clusters(collection, means)
  clusters = Array.new(means.count) { Array.new }
  collection.each { |item|
    cluster  = false
    distance = false
    (0...means.count).each { |i|
      diff = (means[i] - item).abs
      if distance == false || diff < distance
        cluster  = i
        distance = diff
      end
    }
    clusters[cluster] << item
  }
  clusters
end

.normal(x, avg, std) ⇒ Object



5
6
7
8
# File 'lib/everyday-cli-utils/safe/kmeans.rb', line 5

def self.normal(x, avg, std)
  exp = -(((x - avg) / std) ** 2.0) / 2.0
  ((Math.exp(exp) / (std * Math.sqrt(2.0 * Math::PI))))
end