Class: Wonkavision::Plugins::Aggregation::Measure

Inherits:
Object
  • Object
show all
Defined in:
lib/wonkavision/plugins/analytics/aggregation/measure.rb

Constant Summary collapse

DEFAULT_LOG_BUCKETS =
8
@@LOG2_DIVEDEND =

log2(x) returns j, | i = j-1 and 2**i <= data < 2**j

Math.log(2)

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options = {}) ⇒ Measure

Create a new Aggregate that maintains a binary logarithmic histogram by default. Specifying values for low, high, and width configures the aggregate to maintain a linear histogram with (high - low)/width buckets



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/wonkavision/plugins/analytics/aggregation/measure.rb', line 63

def initialize (options={})
  low = options[:low]
  high = options[:high]
  width = options[:width]
  @log_buckets = options[:log_buckets] || DEFAULT_LOG_BUCKETS
  @count = 0
  @sum = 0.0
  @sum2 = 0.0
  @outliers_low = 0
  @outliers_high = 0

  # If the user asks we maintain a linear histogram where
  # values in the range [low, high) are bucketed in multiples
  # of width
  if (nil != low && nil != high && nil != width)

    #Validate linear specification
    if high <= low
      raise ArgumentError, "High bucket must be > Low bucket"
    end

    if high - low < width
      raise ArgumentError, "Histogram width must be <= histogram range"
    end

    if 0 != (high - low).modulo(width)
      raise ArgumentError, "Histogram range (high - low) must be a multiple of width"
    end

    @low = low
    @high = high
    @width = width
  else
    low ||= 1
    @low = 1
    @low = to_bucket(to_index(low))
    @high = to_bucket(to_index(@low) + log_buckets - 1)
  end

  #Initialize all buckets to 0
  @buckets = Array.new(bucket_count, 0)
end

Instance Attribute Details

#countObject (readonly)

The current number of samples



36
37
38
# File 'lib/wonkavision/plugins/analytics/aggregation/measure.rb', line 36

def count
  @count
end

#maxObject (readonly)

The maximum sample value



39
40
41
# File 'lib/wonkavision/plugins/analytics/aggregation/measure.rb', line 39

def max
  @max
end

#meanObject (readonly)

The current average of all samples



33
34
35
# File 'lib/wonkavision/plugins/analytics/aggregation/measure.rb', line 33

def mean
  @mean
end

#minObject (readonly)

The minimum samples value



42
43
44
# File 'lib/wonkavision/plugins/analytics/aggregation/measure.rb', line 42

def min
  @min
end

#outliers_highObject (readonly)

The number of samples falling above the highest valued histogram bucket



51
52
53
# File 'lib/wonkavision/plugins/analytics/aggregation/measure.rb', line 51

def outliers_high
  @outliers_high
end

#outliers_lowObject (readonly)

The number of samples falling below the lowest valued histogram bucket



48
49
50
# File 'lib/wonkavision/plugins/analytics/aggregation/measure.rb', line 48

def outliers_low
  @outliers_low
end

#sumObject (readonly)

The sum of all samples



45
46
47
# File 'lib/wonkavision/plugins/analytics/aggregation/measure.rb', line 45

def sum
  @sum
end

Class Method Details

.log2(x) ⇒ Object



166
167
168
# File 'lib/wonkavision/plugins/analytics/aggregation/measure.rb', line 166

def self.log2( x )
  Math.log(x) / @@LOG2_DIVEDEND
end

Instance Method Details

#add(data) ⇒ Object Also known as: <<

Include a sample in the aggregate



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/wonkavision/plugins/analytics/aggregation/measure.rb', line 107

def add data

  # Update min/max
  if 0 == @count
    @min = data
    @max = data
  else
    @max = [data, @max].max
    @min = [data, @min].min
  end

  # Update the running info
  @count += 1
  @sum += data
  @sum2 += (data * data)

  # Update the bucket
  @buckets[to_index(data)] += 1 unless outlier?(data)
end

#eachObject

Iterate through each bucket in the histogram regardless of its contents



150
151
152
153
154
# File 'lib/wonkavision/plugins/analytics/aggregation/measure.rb', line 150

def each
  @buckets.each_with_index do |count, index|
    yield(to_bucket(index), count)
  end
end

#each_nonzeroObject

Iterate through only the buckets in the histogram that contain samples



158
159
160
161
162
# File 'lib/wonkavision/plugins/analytics/aggregation/measure.rb', line 158

def each_nonzero
  @buckets.each_with_index do |count, index|
    yield(to_bucket(index), count) if count != 0
  end
end

#log_bucketsObject

The number of buckets in the binary logarithmic histogram (low => 2**0, high => 2**@@LOG_BUCKETS)



56
57
58
# File 'lib/wonkavision/plugins/analytics/aggregation/measure.rb', line 56

def log_buckets
  @log_buckets
end

#reject(data) ⇒ Object Also known as: >>



128
129
130
131
132
133
134
135
# File 'lib/wonkavision/plugins/analytics/aggregation/measure.rb', line 128

def reject(data)
  @min = Wonkavision::NaN
  @max = Wonkavision::NaN
  @count -= 1
  @sum -= data
  @sum2 -= (data * data)
  @buckets[to_index(data)] -= 1 unless outlier?(data, true)
end

#std_devObject

Calculate the standard deviation



143
144
145
146
# File 'lib/wonkavision/plugins/analytics/aggregation/measure.rb', line 143

def std_dev
  return Wonkavision::NaN unless @count > 1
  Math.sqrt((@sum2.to_f - ((@sum.to_f * @sum.to_f)/@count.to_f)) / (@count.to_f - 1))
end