Class: DbClustering::DistanceMetrics::CosineSimilarity

Inherits:
Object
  • Object
show all
Includes:
Math
Defined in:
lib/distance_metrics/cosine_similarity.rb

Instance Method Summary collapse

Constructor Details

#initialize(min_dimensions: 1) ⇒ CosineSimilarity

Returns a new instance of CosineSimilarity.



6
7
8
# File 'lib/distance_metrics/cosine_similarity.rb', line 6

def initialize(min_dimensions: 1)
  @min_dimensions = min_dimensions
end

Instance Method Details

#correlation(vector1, vector2) ⇒ Object



14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
# File 'lib/distance_metrics/cosine_similarity.rb', line 14

def correlation(vector1, vector2)
  vector1_array = vector1.array_for_comparison(vector2)
  vector2_array = vector2.array_for_comparison(vector1)

  if vector1_array.count != vector2_array.count
    raise "Vectors with different sizes cannot be compared"
  end

  if vector1_array.count < @min_dimensions
    return Float::INFINITY
  end

  # see here for calculation formula: https://en.wikipedia.org/wiki/Cosine_similarity
  numerator = 0
  vector1_array.count.times do |i|
    numerator += vector1_array[i] * vector2_array[i]
  end



  left_sqrt = sqrt(vector1_array.reduce(0) { |sum, v1i| sum + v1i ** 2 })
  right_sqrt = sqrt(vector2_array.reduce(0) { |sum, v2i| sum + v2i ** 2 })
  denominator = left_sqrt * right_sqrt

  numerator.to_f / denominator
end

#distance(vector1, vector2) ⇒ Object



10
11
12
# File 'lib/distance_metrics/cosine_similarity.rb', line 10

def distance(vector1, vector2)
  1.0 - correlation(vector1, vector2)
end