39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
|
# File 'lib/kmeans/hcluster.rb', line 39
def hcluster(rows)
distances = Hash.new
currentclustid = -1
clust = Array.new
for i in 0...rows.length
c = Bicluster.new(rows[i])
c.id = i
clust.push(c)
end
while clust.length > 1
lowestpair = [0,1]
closest = 1 - Pearson.calc(clust[0].vec, clust[1].vec)
for i in 0...clust.length
for j in i+1...clust.length
if !distances.key?([clust[i].id, clust[j].id])
distances[[clust[i].id, clust[j].id]] = 1 - Pearson.calc(clust[i].vec, clust[j].vec)
end
d = distances[[clust[i].id, clust[j].id]]
if d < closest
closest = d
lowestpair = [i,j]
end
end
end
mergevec = Array.new
for i in 0...clust[0].vec.length
m = (clust[lowestpair[0]].vec[i] + clust[lowestpair[1]].vec[i])/2.0
mergevec.push(m)
end
newcluster = Bicluster.new(mergevec, clust[lowestpair[0]], clust[lowestpair[1]], closest, currentclustid)
currentclustid -= 1
clust.delete_at(lowestpair[1])
clust.delete_at(lowestpair[0])
clust.push(newcluster)
end
return clust[0]
end
|