Class: Hyperll::HyperLogLog

Inherits:
Object
  • Object
show all
Defined in:
lib/hyperll/hyper_log_log.rb

Constant Summary collapse

INT_SIZE =
32
INT_HASH =
0xFFFFFFFF

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(log2m, register_set = nil) ⇒ HyperLogLog

Constructs a new HyperLogLog instance

log2m - accuracy of the counter; larger values are more accurate



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/hyperll/hyper_log_log.rb', line 18

def initialize(log2m, register_set = nil)
  @log2m = log2m
  @count = 2 ** log2m
  @register_set = register_set || RegisterSet.new(@count)

  case log2m
  when 4
    @alphaMM = 0.673 * @count * @count
  when 5
    @alphaMM = 0.697 * @count * @count
  when 6
    @alphaMM = 0.709 * @count * @count
  else
    @alphaMM = (0.7213 / (1 + 1.079 / @count)) * @count * @count
  end
end

Instance Attribute Details

#log2mObject (readonly)

Returns the value of attribute log2m.



8
9
10
# File 'lib/hyperll/hyper_log_log.rb', line 8

def log2m
  @log2m
end

Class Method Details

.unserialize(serialized) ⇒ Object



10
11
12
13
# File 'lib/hyperll/hyper_log_log.rb', line 10

def self.unserialize(serialized)
  log2m, rs_size, *rs_values = serialized.unpack("N*")
  new(log2m, RegisterSet.new(2 ** log2m, rs_values))
end

Instance Method Details

#cardinalityObject



45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/hyperll/hyper_log_log.rb', line 45

def cardinality
  register_sum = 0.0
  zeros = 0.0
  @register_set.each do |value|
    register_sum += 1.0 / (1 << value)
    zeros += 1 if value == 0
  end

  estimate = @alphaMM * (1 / register_sum)
  if estimate <= (5.0 / 2.0) * @count
    # small range estimate
    (@count * Math.log(@count / zeros)).round
  else
    estimate.round
  end
end

#merge(*others) ⇒ Object



62
63
64
65
66
67
68
69
70
# File 'lib/hyperll/hyper_log_log.rb', line 62

def merge(*others)
  raise "Cannot merge hyperloglogs of different sizes" unless others.all? { |o| o.log2m == log2m }

  others.each do |other|
    @register_set.merge(other.register_set)
  end

  self
end

#offer(obj) ⇒ Object



35
36
37
# File 'lib/hyperll/hyper_log_log.rb', line 35

def offer(obj)
  offer_hashed(MurmurHash.hash(obj))
end

#offer_hashed(value) ⇒ Object



39
40
41
42
43
# File 'lib/hyperll/hyper_log_log.rb', line 39

def offer_hashed(value)
  j = value >> (INT_SIZE - @log2m)
  r = number_of_leading_zeros(((value << @log2m) & INT_HASH) | (1 << (@log2m - 1)) + 1) + 1
  @register_set.update_if_greater(j, r)
end

#serializeObject



72
73
74
# File 'lib/hyperll/hyper_log_log.rb', line 72

def serialize
  [@log2m, @register_set.size * 4].pack("N*") + @register_set.serialize
end