Class: LSH::Storage::RedisBackend

Inherits:
Object
  • Object
show all
Defined in:
lib/lsh/storage/redis_backend.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(params = {}) ⇒ RedisBackend

Returns a new instance of RedisBackend.



29
30
31
32
33
34
35
36
37
38
# File 'lib/lsh/storage/redis_backend.rb', line 29

def initialize(params = {})
  defaults = {:redis => {}, :data_dir => "data", :cache_vectors => TRUE}
  params = defaults.merge params
  @redis = Redis.new(params[:redis])
  @data_dir = params[:data_dir]
  Dir.mkdir(@data_dir) unless File.exists?(@data_dir)
  Dir.mkdir(File.join(@data_dir, 'projections')) unless File.exists?(File.join(@data_dir, 'projections'))
  @cache_vectors = params[:cache_vectors]
  @vector_cache = {}
end

Instance Attribute Details

#cache_vectorsObject

Returns the value of attribute cache_vectors.



27
28
29
# File 'lib/lsh/storage/redis_backend.rb', line 27

def cache_vectors
  @cache_vectors
end

#data_dirObject (readonly)

Returns the value of attribute data_dir.



26
27
28
# File 'lib/lsh/storage/redis_backend.rb', line 26

def data_dir
  @data_dir
end

#redisObject (readonly)

Returns the value of attribute redis.



26
27
28
# File 'lib/lsh/storage/redis_backend.rb', line 26

def redis
  @redis
end

#vector_cacheObject

Returns the value of attribute vector_cache.



27
28
29
# File 'lib/lsh/storage/redis_backend.rb', line 27

def vector_cache
  @vector_cache
end

Instance Method Details

#add_vector(vector, vector_id) ⇒ Object



135
136
137
# File 'lib/lsh/storage/redis_backend.rb', line 135

def add_vector(vector, vector_id)
  save_vector(vector, vector_id) # Writing vector to disk if not already there
end

#add_vector_id_to_bucket(bucket, hash, vector_id) ⇒ Object



139
140
141
# File 'lib/lsh/storage/redis_backend.rb', line 139

def add_vector_id_to_bucket(bucket, hash, vector_id)
  @redis.sadd "#{bucket}:#{hash}", vector_id
end

#clear_data!Object



45
46
47
48
49
50
51
# File 'lib/lsh/storage/redis_backend.rb', line 45

def clear_data!
  keys = @redis.keys("lsh:bucket:*")
  @redis.del(keys) unless keys.empty?
  delete_dat_files_in_dir(@data_dir)
  @redis.set("lsh:max_vector_id", 0)
  @vector_cache = {}
end

#clear_projections!Object



53
54
55
56
57
# File 'lib/lsh/storage/redis_backend.rb', line 53

def clear_projections!
  @redis.del("lsh:parameters")
  @redis.del("lsh:buckets")
  delete_dat_files_in_dir(File.join(@data_dir, 'projections'))
end

#create_new_bucketObject



111
112
113
# File 'lib/lsh/storage/redis_backend.rb', line 111

def create_new_bucket
  @redis.incr "lsh:buckets"
end

#delete_dat_files_in_dir(dir) ⇒ Object



59
60
61
# File 'lib/lsh/storage/redis_backend.rb', line 59

def delete_dat_files_in_dir(dir)
  Dir.foreach(dir) {|f| File.delete(File.join(dir, f)) if f != '.' and f != '..' and f.end_with?('.dat')}
end

#find_bucket(i) ⇒ Object



147
148
149
# File 'lib/lsh/storage/redis_backend.rb', line 147

def find_bucket(i)
  "lsh:bucket:#{i}"
end

#generate_idObject



115
116
117
# File 'lib/lsh/storage/redis_backend.rb', line 115

def generate_id
  (@redis.incr "lsh:max_vector_id").to_s
end

#has_index?Boolean

Returns:

  • (Boolean)


63
64
65
# File 'lib/lsh/storage/redis_backend.rb', line 63

def has_index?
  parameters and projections and number_of_buckets > 0
end

#id_to_vector(vector_id) ⇒ Object



143
144
145
# File 'lib/lsh/storage/redis_backend.rb', line 143

def id_to_vector(vector_id)
  load_vector(vector_id)
end

#load_vector(vector_id) ⇒ Object



126
127
128
129
130
131
132
133
# File 'lib/lsh/storage/redis_backend.rb', line 126

def load_vector(vector_id)
  @vector_cache[vector_id] || (
    vector = MathUtil.zeros(1, parameters[:dim])
    vector.load(File.join(@data_dir, vector_id+'.dat'))
    @vector_cache[vector_id] = vector if @cache_vectors
    vector
  )
end

#number_of_bucketsObject



67
68
69
# File 'lib/lsh/storage/redis_backend.rb', line 67

def number_of_buckets
  @redis.get("lsh:buckets").to_i || 0
end

#parametersObject



98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/lsh/storage/redis_backend.rb', line 98

def parameters
  begin
    @parms ||= (
      parms = JSON.parse(@redis.get "lsh:parameters")
      parms.keys.each { |k| parms[k.to_sym] = parms[k]; parms.delete(k) }
      parms[:window] = Float::INFINITY if parms[:window] == 'Infinity'
      parms
    )
  rescue TypeError
    nil
  end 
end

#parameters=(parms) ⇒ Object



93
94
95
96
# File 'lib/lsh/storage/redis_backend.rb', line 93

def parameters=(parms)
  parms[:window] = 'Infinity' if parms[:window] == Float::INFINITY
  @redis.set "lsh:parameters", parms.to_json
end

#projectionsObject



80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/lsh/storage/redis_backend.rb', line 80

def projections
  return unless parameters
  @projections ||= (
    projections = []
    parameters[:number_of_independent_projections].times do |i|
      m = MathUtil.zeros(parameters[:number_of_random_vectors], parameters[:dim])
      m.load(File.join(@data_dir, 'projections', "projection_#{i}.dat"))
      projections << m
    end
    projections
  )
end

#projections=(projections) ⇒ Object



71
72
73
74
75
76
77
78
# File 'lib/lsh/storage/redis_backend.rb', line 71

def projections=(projections)
  # Saving the projections to disk
  # (too slow to serialize and store in Redis for
  # large number of dimensions/projections)
  projections.each_with_index do |projection, i|
    projection.save(File.join(@data_dir, 'projections', "projection_#{i}.dat"))
  end
end

#query_buckets(hashes) ⇒ Object



151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/lsh/storage/redis_backend.rb', line 151

def query_buckets(hashes)
  keys = hashes.each_with_index.map do |hash, i|
    bucket = find_bucket(i)
    "#{bucket}:#{hash}"
  end
  result_ids = @redis.sunion(keys)

  result_ids.map do |vector_id|
    {
      :data => load_vector(vector_id),
      :id   => vector_id
    }
  end
end

#reset!Object



40
41
42
43
# File 'lib/lsh/storage/redis_backend.rb', line 40

def reset!
  clear_data!
  clear_projections!
end

#save_vector(vector, vector_id) ⇒ Object



119
120
121
122
123
124
# File 'lib/lsh/storage/redis_backend.rb', line 119

def save_vector(vector, vector_id)
  path = File.join(@data_dir, vector_id+'.dat')
  raise "File #{path} already exists" if File.exists?(path)
  vector.save(path) 
  @vector_cache[vector_id] = vector if @cache_vectors
end