Class: Anngler::Index

Inherits:
Object
  • Object
show all
Defined in:
lib/anngler/index.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(bucket_name, n_projections, n_features, seed: nil, n_trees: 1, storage: Anngler::Storage::MemoryBackend.new) ⇒ Index

Returns a new instance of Index.



32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/anngler/index.rb', line 32

def initialize(
        bucket_name,
        n_projections,
        n_features,
        seed: nil,
        n_trees: 1,
        storage: Anngler::Storage::MemoryBackend.new
    )
    @n_projections = n_projections
    @n_features = n_features
    @seed = seed
    @seed ||= bucket_name.to_i(36)
    @bucket_name = bucket_name
    @rng = Random.new(@seed)
    @n_trees = n_trees
    @storage = storage

    gen_trees
end

Instance Attribute Details

#bucket_nameObject (readonly)

the name of the bucket to allow multiple different hash tables in redis



18
19
20
# File 'lib/anngler/index.rb', line 18

def bucket_name
  @bucket_name
end

#n_featuresObject (readonly)

the number of features of the vectors we are storing



9
10
11
# File 'lib/anngler/index.rb', line 9

def n_features
  @n_features
end

#n_projectionsObject (readonly)

the number of projections to generate (more = less vectors per bucket)



12
13
14
# File 'lib/anngler/index.rb', line 12

def n_projections
  @n_projections
end

#n_treesObject (readonly)

how many different projections to overlap (more allows for better accuracy but will slow performance)



27
28
29
# File 'lib/anngler/index.rb', line 27

def n_trees
  @n_trees
end

#rngObject (readonly)

the random number generator for the projection matrices



21
22
23
# File 'lib/anngler/index.rb', line 21

def rng
  @rng
end

#seedObject (readonly)

seed for our random number generator, we ensure this is deterministic buy resorting to the base16 of the bucket name if no seed is provided



15
16
17
# File 'lib/anngler/index.rb', line 15

def seed
  @seed
end

#storageObject (readonly)

which storage service to use (either redis or local memory)



30
31
32
# File 'lib/anngler/index.rb', line 30

def storage
  @storage
end

#treesObject (readonly)

an n_trees x n_features x n_projections matrix to store our projections



24
25
26
# File 'lib/anngler/index.rb', line 24

def trees
  @trees
end

Instance Method Details

#add(vec, label: "") ⇒ Object



52
53
54
55
56
57
58
59
60
61
62
# File 'lib/anngler/index.rb', line 52

def add(vec, label: "")
    hashes = calc_hashes(vec)
    #Serialize the vector and the label
    serialized_data = pack_data(vec, label)

    #add the vector into each tree
    hashes.each_with_index do |hash, i|
        bucket = "#{@bucket_name}:#{i}:#{hash2string(hash)}"
        @storage.add_vector(bucket, serialized_data)
    end
end

#query(vec) ⇒ Object



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/anngler/index.rb', line 74

def query(vec)
    hashes = calc_hashes(vec)
    raw_results = []

    #search each tree and append the results into raw_results
    hashes.each_with_index do |hash, i|
        bucket = "#{@bucket_name}:#{i}:#{hash2string(hash)}"
        raw_results += @storage.query_bucket(bucket)
    end

    #remove duplicates and decode the data
    raw_results.uniq.map do |encoded_data|
        unpack_data(encoded_data)
    end.sort_by do |data|
        #sort the results by cosine distance
        Helpers.cosine_distance(vec, data["vec"])
    end
end

#remove(vec, label: "") ⇒ Object



64
65
66
67
68
69
70
71
72
# File 'lib/anngler/index.rb', line 64

def remove(vec, label: "")
    hashes = calc_hashes(vec)

    #remove vector from each tree
    hashes.each_with_index do |hash, i|
        bucket = "#{@bucket_name}:#{i}:#{hash2string(hash)}"
        @storage.remove_vector(bucket, pack_data(vec, label))
    end
end