Module: DistanceMeasures

Included in:
Array
Defined in:
lib/distance_measures/jaccard.rb,
lib/distance_measures/haversine.rb,
lib/distance_measures/cosine_similarity.rb,
lib/distance_measures/tanimoto_coefficient.rb

Overview

Constant Summary collapse

RAD_PER_DEG =

PI = 3.1415926535

0.017453293
R_MILES =

PI/180

3956
R_KM =

radius of the great circle in miles

6371
R =

the great circle distance d will be in whatever units R is in

{
  :miles => R_MILES,
  :km => R_KM,
  :feet => R_MILES * 5282,
  :meters => R_KM * 1000
}

Instance Method Summary collapse

Instance Method Details

#binary_jaccard_distance(other) ⇒ Object



15
16
17
# File 'lib/distance_measures/jaccard.rb', line 15

def binary_jaccard_distance(other)
  1 - self.binary_jaccard_index(other)
end

#binary_jaccard_index(other) ⇒ Object



19
20
21
22
23
24
# File 'lib/distance_measures/jaccard.rb', line 19

def binary_jaccard_index(other)
  intersection = self.binary_intersection_with(other).delete_if {|x| x == 0}.size.to_f
  union = self.binary_union_with(other).delete_if {|x| x == 0}.size.to_f
  
  intersection / union
end

#cosine_similarity(other) ⇒ Object



2
3
4
5
6
7
# File 'lib/distance_measures/cosine_similarity.rb', line 2

def cosine_similarity(other)
  dot_product = self.dot_product(other)
  normalization = self.euclidean_normalize * other.euclidean_normalize

  handle_nan(dot_product / normalization)
end

#haversine_distance(other, um = :meters) ⇒ Object



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
# File 'lib/distance_measures/haversine.rb', line 28

def haversine_distance(other, um = :meters)
  dlon = other[1] - self[1]
  dlat = other[0] - self[0]

  dlon_rad = dlon * RAD_PER_DEG 
  dlat_rad = dlat * RAD_PER_DEG

  lat1_rad = self[0] * RAD_PER_DEG
  lon1_rad = self[1] * RAD_PER_DEG

  lat2_rad = other[0] * RAD_PER_DEG
  lon2_rad = other[1] * RAD_PER_DEG

  a = (Math.sin(dlat_rad/2))**2 + Math.cos(lat1_rad) * Math.cos(lat2_rad) * (Math.sin(dlon_rad/2))**2
  c = 2 * Math.atan2( Math.sqrt(a), Math.sqrt(1-a))

  R[um] * c
end

#jaccard_distance(other) ⇒ Object



4
5
6
# File 'lib/distance_measures/jaccard.rb', line 4

def jaccard_distance(other)
  1 - self.jaccard_index(other)
end

#jaccard_index(other) ⇒ Object



8
9
10
11
12
13
# File 'lib/distance_measures/jaccard.rb', line 8

def jaccard_index(other)
  union = (self + other).uniq.size.to_f
  intersection = self.intersection_with(other).size.to_f
      
  intersection / union
end

#tanimoto_coefficient(other) ⇒ Object



3
4
5
6
7
8
# File 'lib/distance_measures/tanimoto_coefficient.rb', line 3

def tanimoto_coefficient(other)
  dot = self.dot_product(other).to_f
  result = dot / (self.sum_of_squares + other.sum_of_squares - dot).to_f
  
  handle_nan(result)
end