68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
|
# File 'lib/metricstore/hyper_log_log.rb', line 68
def self.estimate_cardinality(buckets)
values = buckets.to_a
m = values.size
raise("BUG!") unless m > 0
alpha = ALPHA[m]
raw = alpha * (m ** 2) / values.map{|x| 2 ** -(x || 0)}.inject(:+)
if raw <= 2.5 * m
zero_registers = values.count(nil)
if zero_registers == 0
raw
else
m * Math.log(m.to_f / zero_registers)
end
elsif raw <= (2 ** HASH_BIT_SIZE) / 30.0
raw
else
(-2 ** HASH_BIT_SIZE) * Math.log(1 - raw.to_f/(2**HASH_BIT_SIZE), 2)
end
end
|