Class: Roseflow::VectorStores::HNSWMemoryStore
- Defined in:
- lib/roseflow/vector_stores/hnsw_memory_store.rb
Overview
HNSWMemoryStore is an in-memory vector store that implements the HNSW algorithm.
Defined Under Namespace
Classes: BoundedPriorityQueue, HNSWNode, PriorityQueue
Constant Summary collapse
- PROBABILITY_FACTORS =
[ 0.5, 1 / Math::E, ].freeze
Instance Attribute Summary collapse
-
#nodes ⇒ Object
Returns the value of attribute nodes.
Class Method Summary collapse
-
.deserialize(serialized_data) ⇒ Object
Deserializes a binary string into a vector store.
Instance Method Summary collapse
- #add_neighbors_to_candidates(closest, level, visited, candidates) ⇒ Object
-
#add_node(node_id, vector) ⇒ HNSWNode
(also: #create_vector)
Adds a new node to the vector store.
-
#cosine_distance(from, to) ⇒ Object
Calculates the cosine distance between two vectors.
-
#delete_node(node_id) ⇒ HNSWNode
(also: #delete_vector)
Deletes a node from the vector store.
-
#distance(from, to) ⇒ Object
Calculates the distance between two vectors.
-
#euclidean_distance(from, to) ⇒ Object
Calculates the euclidean distance between two vectors.
-
#find(node_id) ⇒ HNSWNode?
Finds a node in the vector store.
- #find_closest_candidate(candidates, query) ⇒ Object
- #find_closest_neighbor(query, neighbors) ⇒ Object
-
#find_neighbors(node, query, level) ⇒ Object
Finds the nearest neighbors of a vector.
-
#get_random_level ⇒ Object
Returns a random level for a node.
-
#initialize(similarity_metric, dimensions, m, ef) ⇒ HNSWMemoryStore
constructor
Initializes a new HNSWMemoryStore with the specified similarity metric, dimensions, m and ef.
-
#nearest_neighbors(query, k) ⇒ Object
Finds the k nearest neighbors of a vector.
-
#search_knn(entry_point, query, k, level) ⇒ Object
Finds the k nearest neighbors of a vector.
- #search_level(query, entry_point, level) ⇒ Object
-
#serialize ⇒ Object
Serializes the vector store to a binary string.
- #termination_condition_met?(result, closest, query, k) ⇒ Boolean
-
#update_max_level(level) ⇒ Object
Updates maximum level of the graph.
-
#update_neighbors(node, neighbors, query, level) ⇒ Object
Updates the neighbors of a node.
- #update_result(result, candidate, query, k) ⇒ Object
Methods inherited from Base
#build_vector, #has_embeddings?, #list_vectors, #query, #update_vector
Constructor Details
#initialize(similarity_metric, dimensions, m, ef) ⇒ HNSWMemoryStore
Initializes a new HNSWMemoryStore with the specified similarity metric, dimensions, m and ef.
27 28 29 30 31 32 33 34 35 |
# File 'lib/roseflow/vector_stores/hnsw_memory_store.rb', line 27 def initialize(similarity_metric, dimensions, m, ef) @similarity_metric = similarity_metric @dimensions = dimensions @m = m @ef = ef @max_level = 0 @entrypoint = nil @nodes = {} end |
Instance Attribute Details
#nodes ⇒ Object
Returns the value of attribute nodes.
39 40 41 |
# File 'lib/roseflow/vector_stores/hnsw_memory_store.rb', line 39 def nodes @nodes end |
Class Method Details
.deserialize(serialized_data) ⇒ Object
Deserializes a binary string into a vector store.
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
# File 'lib/roseflow/vector_stores/hnsw_memory_store.rb', line 114 def self.deserialize(serialized_data) graph = HNSWGraph.decode(serialized_data) hnsw = new(graph.similarity_metric, graph.dimensions, graph.m, graph.ef) # Create nodes graph.nodes.each do |node| hnsw.nodes[node.id] = HNSWNode.new(node.id, node.vector, node.level, graph.m) end # Set neighbors graph.nodes.each do |node| neighbors = node.neighbors.each_slice(graph.m).to_a neighbors.each_with_index do |neighbor_ids, level| neighbor_ids.each_with_index do |neighbor_id, index| hnsw.nodes[node.id].neighbors[level][index] = hnsw.nodes[neighbor_id] if hnsw.nodes.key?(neighbor_id) end end end hnsw.instance_variable_set(:@entrypoint, hnsw.nodes[graph.entrypoint_id]) hnsw.instance_variable_set(:@max_level, graph.max_level) hnsw end |
Instance Method Details
#add_neighbors_to_candidates(closest, level, visited, candidates) ⇒ Object
258 259 260 261 262 263 264 |
# File 'lib/roseflow/vector_stores/hnsw_memory_store.rb', line 258 def add_neighbors_to_candidates(closest, level, visited, candidates) closest.neighbors[level].each do |neighbor| next unless neighbor next if visited.include?(neighbor.id) candidates.add(neighbor) end end |
#add_node(node_id, vector) ⇒ HNSWNode Also known as: create_vector
Adds a new node to the vector store.
46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 |
# File 'lib/roseflow/vector_stores/hnsw_memory_store.rb', line 46 def add_node(node_id, vector) level = get_random_level node = HNSWNode.new(node_id, vector, level, @m) if @entrypoint.nil? @entrypoint = node return @nodes[node_id] = node end update_max_level(level) current_node = search_level(vector, @entrypoint, @max_level) @max_level.downto(0) do |i| if i <= level neighbors = find_neighbors(current_node, vector, i) update_neighbors(node, neighbors, vector, i) end current_node = search_level(vector, @entrypoint, i - 1) if i > 0 end @nodes[node_id] = node end |
#cosine_distance(from, to) ⇒ Object
Calculates the cosine distance between two vectors.
289 290 291 292 293 294 295 296 297 298 299 300 301 |
# File 'lib/roseflow/vector_stores/hnsw_memory_store.rb', line 289 def cosine_distance(from, to) dot_product = 0 norm_from = 0 norm_to = 0 from.each_with_index do |value, index| dot_product += value * to[index] norm_from += value ** 2 norm_to += to[index] ** 2 end 1 - (dot_product / (Math.sqrt(norm_from) * Math.sqrt(norm_to))) end |
#delete_node(node_id) ⇒ HNSWNode Also known as: delete_vector
Deletes a node from the vector store.
76 77 78 |
# File 'lib/roseflow/vector_stores/hnsw_memory_store.rb', line 76 def delete_node(node_id) @nodes.delete(node_id) end |
#distance(from, to) ⇒ Object
Calculates the distance between two vectors.
267 268 269 270 271 272 273 274 275 276 |
# File 'lib/roseflow/vector_stores/hnsw_memory_store.rb', line 267 def distance(from, to) case @similarity_metric.to_sym when :euclidean euclidean_distance(from, to) when :cosine cosine_distance(from, to) else raise UnsupportedSimilarityMetricError, "Similarity metric #{@similarity_metric} is not supported" end end |
#euclidean_distance(from, to) ⇒ Object
Calculates the euclidean distance between two vectors.
279 280 281 282 283 284 285 286 |
# File 'lib/roseflow/vector_stores/hnsw_memory_store.rb', line 279 def euclidean_distance(from, to) e_distance = 0 from.each_with_index do |value, index| e_distance += (value - to[index]) ** 2 end Math.sqrt(e_distance) end |
#find(node_id) ⇒ HNSWNode?
Finds a node in the vector store.
87 88 89 |
# File 'lib/roseflow/vector_stores/hnsw_memory_store.rb', line 87 def find(node_id) @nodes[node_id] end |
#find_closest_candidate(candidates, query) ⇒ Object
229 230 231 |
# File 'lib/roseflow/vector_stores/hnsw_memory_store.rb', line 229 def find_closest_candidate(candidates, query) candidates.min_by { |c| distance(query, c.vector) } end |
#find_closest_neighbor(query, neighbors) ⇒ Object
192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 |
# File 'lib/roseflow/vector_stores/hnsw_memory_store.rb', line 192 def find_closest_neighbor(query, neighbors) closest_neighbor = nil closest_distance = Float::INFINITY neighbors.each do |neighbor| next unless neighbor distance = distance(query, neighbor.vector) if distance < closest_distance closest_distance = distance closest_neighbor = neighbor end end [closest_neighbor, closest_distance] end |
#find_neighbors(node, query, level) ⇒ Object
Finds the nearest neighbors of a vector.
141 142 143 |
# File 'lib/roseflow/vector_stores/hnsw_memory_store.rb', line 141 def find_neighbors(node, query, level) search_knn(node, query, @m, level) end |
#get_random_level ⇒ Object
Returns a random level for a node.
304 305 306 307 308 309 310 |
# File 'lib/roseflow/vector_stores/hnsw_memory_store.rb', line 304 def get_random_level level = 0 while rand < PROBABILITY_FACTORS[0] && level < @max_level level += 1 end level end |
#nearest_neighbors(query, k) ⇒ Object
Finds the k nearest neighbors of a vector.
165 166 167 168 169 170 171 172 |
# File 'lib/roseflow/vector_stores/hnsw_memory_store.rb', line 165 def nearest_neighbors(query, k) return [] unless @entrypoint entry_point = @entrypoint (0..@max_level).reverse_each do |level| entry_point = search_level(query, entry_point, level) end search_knn(entry_point, query, k, 0) end |
#search_knn(entry_point, query, k, level) ⇒ Object
Finds the k nearest neighbors of a vector.
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
# File 'lib/roseflow/vector_stores/hnsw_memory_store.rb', line 209 def search_knn(entry_point, query, k, level) visited = Set.new candidates = Set.new([entry_point]) result = [] while candidates.size > 0 closest = find_closest_candidate(candidates, query) candidates.delete(closest) visited.add(closest.id) result = update_result(result, closest, query, k) break if termination_condition_met?(result, closest, query, k) add_neighbors_to_candidates(closest, level, visited, candidates) end result end |
#search_level(query, entry_point, level) ⇒ Object
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 |
# File 'lib/roseflow/vector_stores/hnsw_memory_store.rb', line 174 def search_level(query, entry_point, level) current = entry_point best_distance = distance(query, current.vector) loop do closest_neighbor, closest_distance = find_closest_neighbor(query, current.neighbors[level]) if closest_neighbor && closest_distance < best_distance best_distance = closest_distance current = closest_neighbor else break end end current end |
#serialize ⇒ Object
Serializes the vector store to a binary string.
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 |
# File 'lib/roseflow/vector_stores/hnsw_memory_store.rb', line 92 def serialize graph = HNSWGraph.new( entrypoint_id: @entrypoint.id, max_level: @max_level, similarity_metric: @similarity_metric, dimensions: @dimensions, m: @m, ef: @ef, nodes: @nodes.values.map do |node| HNSWGraphNode.new( id: node.id, vector: node.vector, level: node.level, neighbors: node.neighbors.flatten.compact.map(&:id), ) end, ) graph.to_proto end |
#termination_condition_met?(result, closest, query, k) ⇒ Boolean
249 250 251 252 253 254 255 256 |
# File 'lib/roseflow/vector_stores/hnsw_memory_store.rb', line 249 def termination_condition_met?(result, closest, query, k) return false if result.size < k furthest_result_distance = distance(query, result.max_by { |r| distance(query, r.vector) }.vector) closest_distance = distance(query, closest.vector) closest_distance >= furthest_result_distance end |
#update_max_level(level) ⇒ Object
Updates maximum level of the graph.
160 161 162 |
# File 'lib/roseflow/vector_stores/hnsw_memory_store.rb', line 160 def update_max_level(level) @max_level = level if level > @max_level end |
#update_neighbors(node, neighbors, query, level) ⇒ Object
Updates the neighbors of a node.
146 147 148 149 150 151 152 153 154 155 156 157 |
# File 'lib/roseflow/vector_stores/hnsw_memory_store.rb', line 146 def update_neighbors(node, neighbors, query, level) node.neighbors[level] = neighbors[0, @m] neighbors.each do |neighbor| n_distance = distance(neighbor.vector, query) furthest_neighbor_index = neighbor.neighbors[level].index { |n| n.nil? || n_distance < distance(neighbor.vector, n.vector) } next unless furthest_neighbor_index neighbor.neighbors[level].insert(furthest_neighbor_index, node) neighbor.neighbors[level].pop if neighbor.neighbors[level].size > @m end end |
#update_result(result, candidate, query, k) ⇒ Object
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 |
# File 'lib/roseflow/vector_stores/hnsw_memory_store.rb', line 233 def update_result(result, candidate, query, k) if result.size < k result.push(candidate) else furthest_result = result.max_by { |r| distance(query, r.vector) } closest_distance = distance(query, candidate.vector) furthest_result_distance = distance(query, furthest_result.vector) if closest_distance < furthest_result_distance result.delete(furthest_result) result.push(candidate) end end result end |