Class: Clusterer::Algorithms
- Inherits:
-
Object
- Object
- Clusterer::Algorithms
- Defined in:
- lib/clusterer/algorithms.rb
Class Method Summary collapse
- .bisecting_kmeans(documents, k, options = { }) ⇒ Object
- .hierarchical(documents, k, options = { }) ⇒ Object
- .kmeans(documents, k, options = { }) ⇒ Object
Class Method Details
.bisecting_kmeans(documents, k, options = { }) ⇒ Object
65 66 67 68 69 70 71 72 73 |
# File 'lib/clusterer/algorithms.rb', line 65 def bisecting_kmeans(documents, k, = { }) clusters = [Cluster.new(documents)] while clusters.size < k lg_clus = clusters.max {|a, b| a.documents.size <=> b.documents.size} #largest cluster clusters.delete(lg_clus) clusters.concat(kmeans(lg_clus.documents,2)) end [:refined] ? clusters = kmeans(documents, k, .merge(:seeds => clusters)) : clusters end |
.hierarchical(documents, k, options = { }) ⇒ Object
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
# File 'lib/clusterer/algorithms.rb', line 75 def hierarchical(documents, k, = { }) clusters = documents.collect {|d| Cluster.new([d])} iter = 0 sim_fun = [:similarity_function] || :upgma [:similarity_function] = nil while clusters.size > k puts "Iteration ....#{iter}" pairs = [] clusters.each_with_index {|c,i| pairs.concat(clusters.slice(i+1,clusters.size).collect{|f| [c,f] })} pair = pairs.max {|a,b| a[0].send(sim_fun, a[1]) <=> b[0].send(sim_fun, b[1]) } clusters.delete(pair[1]) pair[0].merge!(pair[1]) iter += 1 end [:refined] ? clusters = kmeans(documents, k, .merge(:seeds => clusters)) : clusters end |
.kmeans(documents, k, options = { }) ⇒ Object
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/clusterer/algorithms.rb', line 41 def kmeans(documents, k, = { }) old_clusters = Array.new(k) max_iter = [:maximum_iterations] || 10 clusters = [:seeds] || random_cluster_seeds(documents, k) sim_fun = [:similarity_function] || :cosine_similarity iter = 0 while (!max_iter || iter < max_iter) && clusters != old_clusters puts "Iteration ....#{iter}" k.times {|i| old_clusters[i] = clusters[i]; clusters[i] = []} documents.each do |document| max_index = (0..k-1).max do |i,j| document.send(sim_fun, old_clusters[i].centroid) <=> document.send(sim_fun, old_clusters[j].centroid) end clusters[max_index] << document end k.times {|i| clusters[i] = Cluster.new(clusters[i])} iter += 1 end return clusters end |