Class: Metricstore::HyperLogLog::Builder

Inherits:
Object
  • Object
show all
Defined in:
lib/metricstore/hyper_log_log.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(error_rate, bucket_updater) ⇒ Builder

bucket_updater must have a method named “call” which takes two arguments

the bucket index, and an integer value (of which it will track the max
value per bucket).


23
24
25
26
27
28
29
30
31
32
33
34
35
# File 'lib/metricstore/hyper_log_log.rb', line 23

def initialize(error_rate, bucket_updater)
  @error_rate = error_rate
  unless @error_rate > 0 && @error_rate < 1
    raise(ArgumentError, "error_rate must be between 0 and 1")
  end
  @bits = HyperLogLog.bits_needed(error_rate)
  unless (@bits + 10) <= HASH_BIT_SIZE
    raise(ArgumentError, "error_rate is unattainable. be less picky.")
  end
  @bucket_count = 1 << @bits
  @alpha = ALPHA[@bucket_count]
  @bucket_updater = bucket_updater
end

Instance Attribute Details

#bucket_countObject (readonly)

Returns the value of attribute bucket_count.



18
19
20
# File 'lib/metricstore/hyper_log_log.rb', line 18

def bucket_count
  @bucket_count
end

Instance Method Details

#add(item) ⇒ Object



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/metricstore/hyper_log_log.rb', line 37

def add(item)
  hashed = hash_of(item)
  offset = HASH_BIT_SIZE - @bits
  mask = ((1 << @bits) - 1) << offset
  shifted_front_bits = (hashed & mask)
  front_bits = shifted_front_bits >> offset
  back_bits = hashed - shifted_front_bits
  bucket_index = front_bits
  raise("BUG!") if bucket_index >= @bucket_count
  next_on_bit_index = (HASH_BIT_SIZE - @bits).times.find{|i| back_bits[HASH_BIT_SIZE - @bits - i] == 1}
  if next_on_bit_index.nil?
    next_on_bit_index= HASH_BIT_SIZE - @bits
  else
    next_on_bit_index += 1
  end
  @bucket_updater.call(bucket_index, next_on_bit_index)
end