Class: HyperLogLog
- Inherits:
-
Object
- Object
- HyperLogLog
- Defined in:
- lib/hyper_log_log.rb
Instance Method Summary collapse
- #add(counter_name, value) ⇒ Object
- #count(counter_name) ⇒ Object
-
#initialize(redis, b = 10) ⇒ HyperLogLog
constructor
A new instance of HyperLogLog.
- #intersection(*counter_names) ⇒ Object
- #intersection_helper(counter_names, cache) ⇒ Object
-
#rho(i) ⇒ Object
rho(i) is the position of the first 1 in the binary representation of i, reading from most significant to least significant bits.
- #union(*counter_names) ⇒ Object
- #union_helper(counter_names) ⇒ Object
Constructor Details
#initialize(redis, b = 10) ⇒ HyperLogLog
Returns a new instance of HyperLogLog.
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 |
# File 'lib/hyper_log_log.rb', line 5 def initialize(redis, b=10) raise "Accuracy not supported. Please choose a value of b between 4 and 16" if b < 4 || b > 16 @redis = redis @bits_in_hash = 32 - b @m = (2 ** b).to_i if @m == 16 @alpha = 0.673 elsif @m == 32 @alpha = 0.697 elsif @m == 64 @alpha = 0.709 else @alpha = 0.7213/(1 + 1.079/@m) end end |
Instance Method Details
#add(counter_name, value) ⇒ Object
21 22 23 24 25 26 27 |
# File 'lib/hyper_log_log.rb', line 21 def add(counter_name, value) hash = MurmurHash3::V32.murmur3_32_str_hash(value) function_name = (hash % @m).to_s w = hash / @m max_run_of_zeros = @redis.zscore(counter_name, function_name) @redis.zadd(counter_name, [(max_run_of_zeros || 0), rho(w)].max, function_name) end |
#count(counter_name) ⇒ Object
29 30 31 |
# File 'lib/hyper_log_log.rb', line 29 def count(counter_name) union_helper([counter_name]) end |
#intersection(*counter_names) ⇒ Object
37 38 39 |
# File 'lib/hyper_log_log.rb', line 37 def intersection(*counter_names) [intersection_helper(counter_names, {}), 0].max end |
#intersection_helper(counter_names, cache) ⇒ Object
61 62 63 64 65 66 67 68 |
# File 'lib/hyper_log_log.rb', line 61 def intersection_helper(counter_names, cache) sum = union_helper(counter_names) - (1...counter_names.length).map do |k| ((-1) ** (k + 1)) * counter_names.combination(k).map do |group| cache[group] ||= intersection_helper(group, cache) end.inject(0, :+) end.inject(0, :+) ((-1) ** (counter_names.length + 1)) * sum end |
#rho(i) ⇒ Object
rho(i) is the position of the first 1 in the binary representation of i, reading from most significant to least significant bits. Some examples: rho(1…) = 1, rho(001…) = 3, rho(000…0) = @bits_in_hash + 1
73 74 75 76 77 78 79 |
# File 'lib/hyper_log_log.rb', line 73 def rho(i) if i == 0 @bits_in_hash + 1 else @bits_in_hash - Math.log(i, 2).floor end end |
#union(*counter_names) ⇒ Object
33 34 35 |
# File 'lib/hyper_log_log.rb', line 33 def union(*counter_names) union_helper(counter_names) end |
#union_helper(counter_names) ⇒ Object
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
# File 'lib/hyper_log_log.rb', line 41 def union_helper(counter_names) all_estimates = counter_names.map{ |counter_name| @redis.zrange(counter_name, 0, -1, {withscores: true}) } .reduce(:concat) .group_by{ |value, score| value } .map{ |group, counters| 2 ** -counters.map{ |x| x.last }.max } estimate_sum = all_estimates.reduce(:+) || 0 estimate = @alpha * @m * @m * ((estimate_sum + @m - all_estimates.length) ** -1) if estimate <= 2.5 * @m if all_estimates.length == @m estimate.round else # Correction for small sets (@m * Math.log(Float(@m)/(@m - all_estimates.length))).round end elsif estimate <= 2 ** 32 / 30.0 estimate.round else # Correction for large sets (-2**32 * Math.log(1 - estimate/(2.0**32))).round end end |