Class: SameSame::DbscanClusters

Inherits:
Object
  • Object
show all
Defined in:
lib/same_same/dbscan_clusters.rb

Constant Summary collapse

NOISE_ID =

Identifies a set of Noise points.

-1
UNCLASSIFIED_ID =

Identifies a set of Unclassified points.

0

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(unclassified) ⇒ DbscanClusters

Returns a new instance of DbscanClusters.



15
16
17
18
19
20
21
# File 'lib/same_same/dbscan_clusters.rb', line 15

def initialize( unclassified )
  self.clusters = Hash.new {|hash, key|
    hash[key] = Set.new
  }
  self.last_id = 0
  assign_points( unclassified, UNCLASSIFIED_ID )
end

Instance Attribute Details

#clustersObject

Returns the value of attribute clusters.



13
14
15
# File 'lib/same_same/dbscan_clusters.rb', line 13

def clusters
  @clusters
end

#last_idObject

Returns the value of attribute last_id.



13
14
15
# File 'lib/same_same/dbscan_clusters.rb', line 13

def last_id
  @last_id
end

Instance Method Details

#assign_point(p, cluster_id) ⇒ Object



43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/same_same/dbscan_clusters.rb', line 43

def assign_point( p, cluster_id)
  # Remove point from the group that it currently belongs to...
  if noise?(p)
    remove_point_from_cluster(p, NOISE_ID)
  elsif unclassified?(p)
    remove_point_from_cluster(p, UNCLASSIFIED_ID)
  else
    if cluster_id != UNCLASSIFIED_ID
      raise ArgumentError.new("Trying to move point that has already been assigned to some other cluster. Point: #{p}, cluster_id=#{cluster_id}")
    end
  end
  
  clusters[cluster_id] << p
end

#assign_points(points, cluster_id) ⇒ Object



39
40
41
# File 'lib/same_same/dbscan_clusters.rb', line 39

def assign_points(points, cluster_id)
  points.each {|p| assign_point( p, cluster_id)}
end

#assign_to_noise(p) ⇒ Object



23
24
25
# File 'lib/same_same/dbscan_clusters.rb', line 23

def assign_to_noise( p )
  assign_point( p, NOISE_ID)
end

#cluster_name(id) ⇒ Object



67
68
69
70
71
72
73
# File 'lib/same_same/dbscan_clusters.rb', line 67

def cluster_name(id)
  case id
  when NOISE_ID then "Noise"
  when UNCLASSIFIED_ID then "Unclassified"
  else "Cluster #{id}"
  end
end

#get_next_cluster_idObject



84
85
86
# File 'lib/same_same/dbscan_clusters.rb', line 84

def get_next_cluster_id
  self.last_id = last_id + 1
end

#noise?(p) ⇒ Boolean

Returns:

  • (Boolean)


31
32
33
# File 'lib/same_same/dbscan_clusters.rb', line 31

def noise?(p)
  point_in_cluster?(p, NOISE_ID)
end

#point_in_cluster?(p, cluster_id) ⇒ Boolean

Returns:

  • (Boolean)


35
36
37
# File 'lib/same_same/dbscan_clusters.rb', line 35

def point_in_cluster?( p, cluster_id)
  clusters[cluster_id].include?( p )
end

#remove_point_from_cluster(p, cluster_id) ⇒ Object



75
76
77
78
79
80
81
82
# File 'lib/same_same/dbscan_clusters.rb', line 75

def remove_point_from_cluster(p, cluster_id)
  cluster = clusters[cluster_id]

  return false if cluster.nil?
  cluster.include?(p).tap do
    cluster.delete p
  end
end

#to_clustersObject



58
59
60
61
62
63
64
# File 'lib/same_same/dbscan_clusters.rb', line 58

def to_clusters
  [].tap do |all_clusters|       
    clusters.each do |id, points|
      all_clusters << Cluster.new(points, cluster_name(id)) unless points.empty?
    end
  end
end

#unclassified?(p) ⇒ Boolean

Returns:

  • (Boolean)


27
28
29
# File 'lib/same_same/dbscan_clusters.rb', line 27

def unclassified?(p)
  point_in_cluster?(p, UNCLASSIFIED_ID)
end