Class: DBSCAN::Clusterer

Inherits:
Object
  • Object
show all
Includes:
RedisGeodata
Defined in:
lib/gtfs_stops_clustering/dbscan.rb

Overview

Clusterer class

Instance Attribute Summary collapse

Attributes included from RedisGeodata

#redis

Instance Method Summary collapse

Methods included from RedisGeodata

#geosearch, #redis_geodata_import

Constructor Details

#initialize(points, stops_redis_geodata, options = {}) ⇒ Clusterer

Returns a new instance of Clusterer.



23
24
25
26
27
28
29
30
31
32
33
# File 'lib/gtfs_stops_clustering/dbscan.rb', line 23

def initialize(points, stops_redis_geodata, options = {})
  options[:distance] = :euclidean_distance unless options[:distance]
  options[:labels] = [] unless options[:labels]

  redis_geodata_import(stops_redis_geodata, options[:epsilon])
  @options = options
  init_points(points)
  @clusters = { -1 => [] }

  clusterize!
end

Instance Attribute Details

#clustersObject

Returns the value of attribute clusters.



21
22
23
# File 'lib/gtfs_stops_clustering/dbscan.rb', line 21

def clusters
  @clusters
end

#optionsObject

Returns the value of attribute options.



21
22
23
# File 'lib/gtfs_stops_clustering/dbscan.rb', line 21

def options
  @options
end

#pointsObject

Returns the value of attribute points.



21
22
23
# File 'lib/gtfs_stops_clustering/dbscan.rb', line 21

def points
  @points
end

Instance Method Details

#add_connected(neighbors, current_cluster) ⇒ Object



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/gtfs_stops_clustering/dbscan.rb', line 110

def add_connected(neighbors, current_cluster)
  cluster_points = []
  neighbors.each do |point|
    unless point.visited?
      point.visit!
      new_points = inmediate_neighbors(point)

      if new_points.size >= options[:min_points]
        new_points.each do |p|
          neighbors.push(p) unless neighbors.include?(p)
        end
      end
    end

    unless point.cluster
      cluster_points.push(point)
      point.cluster = current_cluster
    end
  end

  cluster_points
end

#clusterize!Object



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/gtfs_stops_clustering/dbscan.rb', line 44

def clusterize!
  current_cluster = -1
  @points.each do |point|
    next if point.visited?

    point.visit!
    neighbors = inmediate_neighbors(point)

    if neighbors.size >= options[:min_points]
      current_cluster += 1
      create_cluster(current_cluster, point, neighbors)
      update_cluster_info(current_cluster)
    else
      clusters[-1].push(point)
    end
  end
end

#create_cluster(cluster_index, point, neighbors) ⇒ Object



62
63
64
65
66
# File 'lib/gtfs_stops_clustering/dbscan.rb', line 62

def create_cluster(cluster_index, point, neighbors)
  point.cluster = cluster_index
  cluster = [point].push(add_connected(neighbors, cluster_index))
  @clusters[cluster_index] = cluster.flatten
end

#init_points(points) ⇒ Object



35
36
37
38
39
40
41
42
# File 'lib/gtfs_stops_clustering/dbscan.rb', line 35

def init_points(points)
  c = 0
  @points = points.map do |e|
    po = Point.new(e, @options[:labels][c])
    c += 1
    po
  end
end

#inmediate_neighbors(point) ⇒ Object



98
99
100
101
102
103
104
105
106
107
108
# File 'lib/gtfs_stops_clustering/dbscan.rb', line 98

def inmediate_neighbors(point)
  neighbors = []
  geosearch_results = geosearch(point.items[1], point.items[0])
  geosearch_results.each do |neighbor_pos|
    neighbor = Utils.find_inmediate_neighbor(neighbor_pos, @points)
    next unless neighbor

    neighbors.push(neighbor) if Utils.string_similarity(point.label.downcase, neighbor.label.downcase) > options[:similarity]
  end
  neighbors
end

#labeled_resultsObject



76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/gtfs_stops_clustering/dbscan.rb', line 76

def labeled_results
  hash = {}
  @clusters.each do |cluster_index, elements|
    hash.store(cluster_index, [])
    elements.each do |e|
      hash[cluster_index].push(
        {
          stop_id: nil,
          stop_code: nil,
          cluster_name: e.cluster_name,
          cluster_pos: e.cluster_pos,
          stop_name: e.label,
          stop_lat: e.items[0],
          stop_lon: e.items[1],
          parent_station: nil
        }
      )
    end
  end
  hash
end

#update_cluster_info(cluster_index) ⇒ Object



68
69
70
71
72
73
74
# File 'lib/gtfs_stops_clustering/dbscan.rb', line 68

def update_cluster_info(cluster_index)
  labels = @clusters[cluster_index].map { |e| e.label.capitalize }
  @clusters[cluster_index].each do |e|
    e.cluster_name = Utils.find_cluster_name(labels)
    e.cluster_pos = Utils.find_cluster_position(clusters[cluster_index])
  end
end