Class: DBSCAN::Clusterer
Overview
Instance Attribute Summary collapse
Attributes included from RedisGeodata
#redis
Instance Method Summary
collapse
#geosearch, #redis_geodata_import
Constructor Details
#initialize(points, stops_redis_geodata, options = {}) ⇒ Clusterer
Returns a new instance of Clusterer.
23
24
25
26
27
28
29
30
31
32
33
|
# File 'lib/gtfs_stops_clustering/dbscan.rb', line 23
def initialize(points, stops_redis_geodata, options = {})
options[:distance] = :euclidean_distance unless options[:distance]
options[:labels] = [] unless options[:labels]
redis_geodata_import(stops_redis_geodata, options[:epsilon])
@options = options
init_points(points)
@clusters = { -1 => [] }
clusterize!
end
|
Instance Attribute Details
#clusters ⇒ Object
Returns the value of attribute clusters.
21
22
23
|
# File 'lib/gtfs_stops_clustering/dbscan.rb', line 21
def clusters
@clusters
end
|
#options ⇒ Object
Returns the value of attribute options.
21
22
23
|
# File 'lib/gtfs_stops_clustering/dbscan.rb', line 21
def options
@options
end
|
#points ⇒ Object
Returns the value of attribute points.
21
22
23
|
# File 'lib/gtfs_stops_clustering/dbscan.rb', line 21
def points
@points
end
|
Instance Method Details
#add_connected(neighbors, current_cluster) ⇒ Object
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
|
# File 'lib/gtfs_stops_clustering/dbscan.rb', line 110
def add_connected(neighbors, current_cluster)
cluster_points = []
neighbors.each do |point|
unless point.visited?
point.visit!
new_points = inmediate_neighbors(point)
if new_points.size >= options[:min_points]
new_points.each do |p|
neighbors.push(p) unless neighbors.include?(p)
end
end
end
unless point.cluster
cluster_points.push(point)
point.cluster = current_cluster
end
end
cluster_points
end
|
#clusterize! ⇒ Object
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
# File 'lib/gtfs_stops_clustering/dbscan.rb', line 44
def clusterize!
current_cluster = -1
@points.each do |point|
next if point.visited?
point.visit!
neighbors = inmediate_neighbors(point)
if neighbors.size >= options[:min_points]
current_cluster += 1
create_cluster(current_cluster, point, neighbors)
update_cluster_info(current_cluster)
else
clusters[-1].push(point)
end
end
end
|
#create_cluster(cluster_index, point, neighbors) ⇒ Object
62
63
64
65
66
|
# File 'lib/gtfs_stops_clustering/dbscan.rb', line 62
def create_cluster(cluster_index, point, neighbors)
point.cluster = cluster_index
cluster = [point].push(add_connected(neighbors, cluster_index))
@clusters[cluster_index] = cluster.flatten
end
|
#init_points(points) ⇒ Object
35
36
37
38
39
40
41
42
|
# File 'lib/gtfs_stops_clustering/dbscan.rb', line 35
def init_points(points)
c = 0
@points = points.map do |e|
po = Point.new(e, @options[:labels][c])
c += 1
po
end
end
|
98
99
100
101
102
103
104
105
106
107
108
|
# File 'lib/gtfs_stops_clustering/dbscan.rb', line 98
def inmediate_neighbors(point)
neighbors = []
geosearch_results = geosearch(point.items[1], point.items[0])
geosearch_results.each do |neighbor_pos|
neighbor = Utils.find_inmediate_neighbor(neighbor_pos, @points)
next unless neighbor
neighbors.push(neighbor) if Utils.string_similarity(point.label.downcase, neighbor.label.downcase) > options[:similarity]
end
neighbors
end
|
#labeled_results ⇒ Object
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
|
# File 'lib/gtfs_stops_clustering/dbscan.rb', line 76
def labeled_results
hash = {}
@clusters.each do |cluster_index, elements|
hash.store(cluster_index, [])
elements.each do |e|
hash[cluster_index].push(
{
stop_id: nil,
stop_code: nil,
cluster_name: e.cluster_name,
cluster_pos: e.cluster_pos,
stop_name: e.label,
stop_lat: e.items[0],
stop_lon: e.items[1],
parent_station: nil
}
)
end
end
hash
end
|
#update_cluster_info(cluster_index) ⇒ Object
68
69
70
71
72
73
74
|
# File 'lib/gtfs_stops_clustering/dbscan.rb', line 68
def update_cluster_info(cluster_index)
labels = @clusters[cluster_index].map { |e| e.label.capitalize }
@clusters[cluster_index].each do |e|
e.cluster_name = Utils.find_cluster_name(labels)
e.cluster_pos = Utils.find_cluster_position(clusters[cluster_index])
end
end
|