Class: Anngler::Index
- Inherits:
-
Object
- Object
- Anngler::Index
- Defined in:
- lib/anngler/index.rb
Instance Attribute Summary collapse
-
#bucket_name ⇒ Object
readonly
the name of the bucket to allow multiple different hash tables in redis.
-
#n_features ⇒ Object
readonly
the number of features of the vectors we are storing.
-
#n_projections ⇒ Object
readonly
the number of projections to generate (more = less vectors per bucket).
-
#n_trees ⇒ Object
readonly
how many different projections to overlap (more allows for better accuracy but will slow performance).
-
#rng ⇒ Object
readonly
the random number generator for the projection matrices.
-
#seed ⇒ Object
readonly
seed for our random number generator, we ensure this is deterministic buy resorting to the base16 of the bucket name if no seed is provided.
-
#storage ⇒ Object
readonly
which storage service to use (either redis or local memory).
-
#trees ⇒ Object
readonly
an n_trees x n_features x n_projections matrix to store our projections.
Instance Method Summary collapse
- #add(vec, label: "") ⇒ Object
-
#initialize(bucket_name, n_projections, n_features, seed: nil, n_trees: 1, storage: Anngler::Storage::MemoryBackend.new) ⇒ Index
constructor
A new instance of Index.
- #query(vec) ⇒ Object
- #remove(vec, label: "") ⇒ Object
Constructor Details
#initialize(bucket_name, n_projections, n_features, seed: nil, n_trees: 1, storage: Anngler::Storage::MemoryBackend.new) ⇒ Index
Returns a new instance of Index.
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/anngler/index.rb', line 32 def initialize( bucket_name, n_projections, n_features, seed: nil, n_trees: 1, storage: Anngler::Storage::MemoryBackend.new ) @n_projections = n_projections @n_features = n_features @seed = seed @seed ||= bucket_name.to_i(36) @bucket_name = bucket_name @rng = Random.new(@seed) @n_trees = n_trees @storage = storage gen_trees end |
Instance Attribute Details
#bucket_name ⇒ Object (readonly)
the name of the bucket to allow multiple different hash tables in redis
18 19 20 |
# File 'lib/anngler/index.rb', line 18 def bucket_name @bucket_name end |
#n_features ⇒ Object (readonly)
the number of features of the vectors we are storing
9 10 11 |
# File 'lib/anngler/index.rb', line 9 def n_features @n_features end |
#n_projections ⇒ Object (readonly)
the number of projections to generate (more = less vectors per bucket)
12 13 14 |
# File 'lib/anngler/index.rb', line 12 def n_projections @n_projections end |
#n_trees ⇒ Object (readonly)
how many different projections to overlap (more allows for better accuracy but will slow performance)
27 28 29 |
# File 'lib/anngler/index.rb', line 27 def n_trees @n_trees end |
#rng ⇒ Object (readonly)
the random number generator for the projection matrices
21 22 23 |
# File 'lib/anngler/index.rb', line 21 def rng @rng end |
#seed ⇒ Object (readonly)
seed for our random number generator, we ensure this is deterministic buy resorting to the base16 of the bucket name if no seed is provided
15 16 17 |
# File 'lib/anngler/index.rb', line 15 def seed @seed end |
#storage ⇒ Object (readonly)
which storage service to use (either redis or local memory)
30 31 32 |
# File 'lib/anngler/index.rb', line 30 def storage @storage end |
#trees ⇒ Object (readonly)
an n_trees x n_features x n_projections matrix to store our projections
24 25 26 |
# File 'lib/anngler/index.rb', line 24 def trees @trees end |
Instance Method Details
#add(vec, label: "") ⇒ Object
52 53 54 55 56 57 58 59 60 61 62 |
# File 'lib/anngler/index.rb', line 52 def add(vec, label: "") hashes = calc_hashes(vec) #Serialize the vector and the label serialized_data = pack_data(vec, label) #add the vector into each tree hashes.each_with_index do |hash, i| bucket = "#{@bucket_name}:#{i}:#{hash2string(hash)}" @storage.add_vector(bucket, serialized_data) end end |
#query(vec) ⇒ Object
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 |
# File 'lib/anngler/index.rb', line 74 def query(vec) hashes = calc_hashes(vec) raw_results = [] #search each tree and append the results into raw_results hashes.each_with_index do |hash, i| bucket = "#{@bucket_name}:#{i}:#{hash2string(hash)}" raw_results += @storage.query_bucket(bucket) end #remove duplicates and decode the data raw_results.uniq.map do |encoded_data| unpack_data(encoded_data) end.sort_by do |data| #sort the results by cosine distance Helpers.cosine_distance(vec, data["vec"]) end end |
#remove(vec, label: "") ⇒ Object
64 65 66 67 68 69 70 71 72 |
# File 'lib/anngler/index.rb', line 64 def remove(vec, label: "") hashes = calc_hashes(vec) #remove vector from each tree hashes.each_with_index do |hash, i| bucket = "#{@bucket_name}:#{i}:#{hash2string(hash)}" @storage.remove_vector(bucket, pack_data(vec, label)) end end |