Class: Loess::Calculator

Inherits:
Object
  • Object
show all
Defined in:
lib/loess.rb

Constant Summary collapse

DEFAULT_ACCURACY =

Sane Defaults as per Apache

1e-12
DEFAULT_BANDWIDTH =

A sensible value is usually 0.25 to 0.5

0.3
DEFAULT_ROBUSTNESS_FACTOR =

number of iterations to refine over 1 or 2 is usually good enough

2

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data = [], options = {}) ⇒ Calculator

data: Accepts array of [x, y] pairs e.g. [ [1, 2], [3, 4], [5, 6], [0, 42] ] For options, refer to defaults above



17
18
19
20
21
22
# File 'lib/loess.rb', line 17

def initialize(data = [], options = {})
  @xval, @yval           = split_up(data)
  self.accuracy          = options[:accuracy] || DEFAULT_ACCURACY
  self.bandwidth         = options[:bandwidth] || DEFAULT_BANDWIDTH
  self.robustness_factor = options[:robustness_factor] || DEFAULT_ROBUSTNESS_FACTOR
end

Instance Attribute Details

#accuracyObject

Returns the value of attribute accuracy.



8
9
10
# File 'lib/loess.rb', line 8

def accuracy
  @accuracy
end

#bandwidthObject

Returns the value of attribute bandwidth.



8
9
10
# File 'lib/loess.rb', line 8

def bandwidth
  @bandwidth
end

#dataObject

Returns the value of attribute data.



8
9
10
# File 'lib/loess.rb', line 8

def data
  @data
end

#robustness_factorObject

Returns the value of attribute robustness_factor.



8
9
10
# File 'lib/loess.rb', line 8

def robustness_factor
  @robustness_factor
end

Class Method Details

.calculate(data) ⇒ Object

Accepts array of [x, y] pairs e.g. [ [1, 2], [3, 4], [5, 6], [0, 42] ]



136
137
138
# File 'lib/loess.rb', line 136

def self.calculate(data)
  new(data).calculate
end

Instance Method Details

#calculate(data = nil) ⇒ Object

Accepts array of [x, y] pairs e.g. [ [1, 2], [3, 4], [5, 6], [0, 42] ]



26
27
28
29
# File 'lib/loess.rb', line 26

def calculate(data = nil)
  @xval, @yval = split_up(data) if data
  smooth(@xval, @yval)
end

#next_non_zero(collection, index) ⇒ Object



128
129
130
131
132
# File 'lib/loess.rb', line 128

def next_non_zero(collection, index)
  collection.each_with_index.detect { |el, i|
    i > index && !el.zero?
  }[1]
end

#smooth(xval, yval, weights = []) ⇒ Object



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/loess.rb', line 31

def smooth(xval, yval, weights = [])
  xlength = xval.length
  return unless xlength > 0 && xlength == yval.length
  return yval if xlength == 1 || xlength == 2
  bandwidth_in_points = (bandwidth * xlength).to_i
  fail "bandwidth is way too small" if bandwidth_in_points < 2
  weights = weights.present? ? weights : [1.0] * xlength

  result             = []
  residuals          = []
  robustness_weights = [1] * xlength

  robustness_factor.times do |factor|
    xval.each_with_index do |x, i|
      bandwidth_interval = [0, bandwidth_in_points - 1]
      update_bandwidth_interval(xval, weights, i, bandwidth_interval) if i > 0
      ileft, iright = bandwidth_interval
      edge          = if xval[i] - xval[ileft] > xval[iright] - xval[i]
                        ileft
                      else
                        iright
                      end

      sum_weights   = 0
      sum_x         = 0
      sum_x_squared = 0
      sum_y         = 0
      sum_xy        = 0
      denom         = (1.0 / (xval[edge] - x)).abs

      xval[ileft..iright].each_with_index do |xk, k|
        next unless xk
        yk            = yval[k]
        dist          = (k < i) ? x - xk : xk - x
        w             = tricube(dist * denom) * robustness_weights[k] * weights[k]
        xkw           = xk * w

        # Intentionally avoiding multiple reduce calls here
        # On large data-sets, this severly impacts performance
        sum_weights   += w
        sum_x         += xkw
        sum_x_squared += xk * xkw
        sum_y         += yk * w
        sum_xy        += yk * xkw
      end

      mean_x         = sum_x / sum_weights
      mean_y         = sum_y / sum_weights
      mean_xy        = sum_xy / sum_weights
      mean_x_squared = sum_x_squared / sum_weights

      beta = if Math.sqrt((mean_x_squared - mean_x * mean_x).abs) < accuracy
               0
             else
               (mean_xy - mean_x * mean_y) / (mean_x_squared - mean_x * mean_x)
             end

      alpha        = mean_y - beta * mean_x
      result[i]    = beta * x + alpha
      residuals[i] = (yval[i] - result[i]).abs
    end

    break if factor == robustness_factor - 1

    sorted_residuals = residuals.sort
    median_residual  = sorted_residuals[xlength / 2]

    break if median_residual.abs < accuracy

    xlength.times do |i|
      arg = residuals[i] / (6 * median_residual)
      if arg >= 1
        robustness_weights[i] = 0
      else
        robustness_weights[i] = (1 - arg * arg) ** 2
      end
    end
  end
  result
end

#tricube(x) ⇒ Object



113
114
115
# File 'lib/loess.rb', line 113

def tricube(x)
  (1 - x.abs ** 3) ** 3
end

#update_bandwidth_interval(xval, weights, i, bandwith_interval) ⇒ Object



117
118
119
120
121
122
123
124
125
126
# File 'lib/loess.rb', line 117

def update_bandwidth_interval(xval, weights, i, bandwith_interval)
  left, right = bandwith_interval
  next_right  = next_non_zero(weights, right)
  if next_right < xval.length &&
      xval[next_right] - xval[i] < xval[i] - xval[left]
    next_left            = next_non_zero(weights, left)
    bandwith_interval[0] = next_left
    bandwith_interval[1] = next_right
  end
end