Class: DbClustering::Algorithms::Dbscan

Inherits:
Object
  • Object
show all
Defined in:
lib/algorithms/density_based/dbscan.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(datasource:, distance_metric:, debug: false) ⇒ Dbscan

Returns a new instance of Dbscan.



9
10
11
12
13
14
# File 'lib/algorithms/density_based/dbscan.rb', line 9

def initialize(datasource:, distance_metric:, debug: false)
  @datasource = datasource
  @distance_metric = distance_metric
  @clusters = []
  @debug = debug
end

Instance Attribute Details

#clustersObject

Returns the value of attribute clusters.



7
8
9
# File 'lib/algorithms/density_based/dbscan.rb', line 7

def clusters
  @clusters
end

#datasourceObject

Returns the value of attribute datasource.



7
8
9
# File 'lib/algorithms/density_based/dbscan.rb', line 7

def datasource
  @datasource
end

Instance Method Details

#cluster(max_distance:, min_neighbors:) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/algorithms/density_based/dbscan.rb', line 16

def cluster(max_distance:, min_neighbors:)
  @clusters = []
  cluster = nil

  if @debug
    last_printed_progress = 0.0
  end

  @datasource.iterate_all_points do |point, current_index, points_count|
    neighbors = @datasource.neighbors(point: point, distance_metric: @distance_metric, max_distance: max_distance)

    if neighbors.count < min_neighbors
      point.is_noise = true
    elsif point.cluster.nil?
      cluster = DbClustering::Models::Cluster.new
      @clusters << cluster
      cluster.add(point)

      expand_cluster(point, neighbors, max_distance)
    end

    yield(point, current_index, points_count) if block_given?

    if @debug
      point_type_string = point.is_edge_point? ? 'E' : point.is_core_point? ? 'C' : 'N'
      print point_type_string

      progress = (current_index + 1) * 100 / points_count.to_f

      if progress > last_printed_progress + 1
        print "[#{progress.to_i}%]"
        last_printed_progress = progress
      end
    end
  end

  if @debug
    print "\n"
    puts "#{clusters.count} clusters found"
  end
end

#expand_cluster(point, neighbors, max_distance) ⇒ Object



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/algorithms/density_based/dbscan.rb', line 58

def expand_cluster(point, neighbors, max_distance)
  neighbors.each do |neighbor|
    if neighbor.cluster.nil?
      point.cluster.add(neighbor)

      if @debug
        print "+"
      end

      neighbors_of_neighbor = @datasource.neighbors(point: neighbor, distance_metric: @distance_metric, max_distance: max_distance)
      neighbors_of_neighbor.each do |neighbor_of_neighbor|
        neighbors << neighbor_of_neighbor unless neighbors.include?(neighbor_of_neighbor)
      end
    end
  end
end