Class: Hyperll::HyperLogLog
- Inherits:
-
Object
- Object
- Hyperll::HyperLogLog
- Defined in:
- lib/hyperll/hyper_log_log.rb
Constant Summary collapse
- INT_SIZE =
32
- INT_HASH =
0xFFFFFFFF
Instance Attribute Summary collapse
-
#log2m ⇒ Object
readonly
Returns the value of attribute log2m.
Class Method Summary collapse
Instance Method Summary collapse
- #cardinality ⇒ Object
-
#initialize(log2m, register_set = nil) ⇒ HyperLogLog
constructor
Constructs a new HyperLogLog instance.
- #merge(*others) ⇒ Object
- #offer(obj) ⇒ Object
- #offer_hashed(value) ⇒ Object
- #serialize ⇒ Object
Constructor Details
#initialize(log2m, register_set = nil) ⇒ HyperLogLog
Constructs a new HyperLogLog instance
log2m - accuracy of the counter; larger values are more accurate
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 |
# File 'lib/hyperll/hyper_log_log.rb', line 18 def initialize(log2m, register_set = nil) @log2m = log2m @count = 2 ** log2m @register_set = register_set || RegisterSet.new(@count) case log2m when 4 @alphaMM = 0.673 * @count * @count when 5 @alphaMM = 0.697 * @count * @count when 6 @alphaMM = 0.709 * @count * @count else @alphaMM = (0.7213 / (1 + 1.079 / @count)) * @count * @count end end |
Instance Attribute Details
#log2m ⇒ Object (readonly)
Returns the value of attribute log2m.
8 9 10 |
# File 'lib/hyperll/hyper_log_log.rb', line 8 def log2m @log2m end |
Class Method Details
.unserialize(serialized) ⇒ Object
10 11 12 13 |
# File 'lib/hyperll/hyper_log_log.rb', line 10 def self.unserialize(serialized) log2m, rs_size, *rs_values = serialized.unpack("N*") new(log2m, RegisterSet.new(2 ** log2m, rs_values)) end |
Instance Method Details
#cardinality ⇒ Object
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/hyperll/hyper_log_log.rb', line 45 def cardinality register_sum = 0.0 zeros = 0.0 @register_set.each do |value| register_sum += 1.0 / (1 << value) zeros += 1 if value == 0 end estimate = @alphaMM * (1 / register_sum) if estimate <= (5.0 / 2.0) * @count # small range estimate (@count * Math.log(@count / zeros)).round else estimate.round end end |
#merge(*others) ⇒ Object
62 63 64 65 66 67 68 69 70 |
# File 'lib/hyperll/hyper_log_log.rb', line 62 def merge(*others) raise "Cannot merge hyperloglogs of different sizes" unless others.all? { |o| o.log2m == log2m } others.each do |other| @register_set.merge(other.register_set) end self end |
#offer(obj) ⇒ Object
35 36 37 |
# File 'lib/hyperll/hyper_log_log.rb', line 35 def offer(obj) offer_hashed(MurmurHash.hash(obj)) end |
#offer_hashed(value) ⇒ Object
39 40 41 42 43 |
# File 'lib/hyperll/hyper_log_log.rb', line 39 def offer_hashed(value) j = value >> (INT_SIZE - @log2m) r = number_of_leading_zeros(((value << @log2m) & INT_HASH) | (1 << (@log2m - 1)) + 1) + 1 @register_set.update_if_greater(j, r) end |
#serialize ⇒ Object
72 73 74 |
# File 'lib/hyperll/hyper_log_log.rb', line 72 def serialize [@log2m, @register_set.size * 4].pack("N*") + @register_set.serialize end |