Module: DDescriptive

Included in:
DoubleStatList
Defined in:
lib/colt/double_descriptive.rb

Instance Method Summary collapse

Instance Method Details

#auto_correlation(lag) ⇒ Object


Returns the auto-correlation of a data sequence.


Parameters:

  • lag

    lag between the two measures to auto correlate



79
80
81
# File 'lib/colt/double_descriptive.rb', line 79

def auto_correlation(lag)
  DoubleDescriptive.autoCorrelation(@array_list, lag, mean, variance)
end

#correlation(other_val) ⇒ Object


Returns the correlation of two data sequences. That is covariance(data1,data2)/(standardDev1*standardDev2).




88
89
90
# File 'lib/colt/double_descriptive.rb', line 88

def correlation(other_val)
  covariance(other_val) / (standard_deviation * other_val.standard_deviation)
end

#covariance(other_val) ⇒ Object


Returns the covariance of two data sequences. That is cov(x,y) = Sum((x-mean(x)) * (y-mean(y))) / size().




97
98
99
# File 'lib/colt/double_descriptive.rb', line 97

def covariance(other_val)
  sample_covariance(other_val) * (list_size - 1) / list_size
end

#durbin_watsonObject


Durbin-Watson computation.




105
106
107
# File 'lib/colt/double_descriptive.rb', line 105

def durbin_watson
  @durbin_watson ||= DoubleDescriptive.durbinWatson(@array_list)
end

#frequenciesObject


Computes the frequency (number of occurances, count) of each distinct value in the given sorted data.




114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
# File 'lib/colt/double_descriptive.rb', line 114

def frequencies
  
  if (@frequencies == nil)
    distinct_values = Java::CernColtListTdouble::DoubleArrayList.new
    frequencies = Java::CernColtListTint::IntArrayList.new
    DoubleDescriptive.frequencies(sorted_data, distinct_values, frequencies)
    distinct_values.trimToSize()
    frequencies.trimToSize()
    @distinct_values = distinct_values.elements().to_a
    @frequencies = frequencies.elements().to_a
  end

    { :distinct_values => @distinct_values, :frequencies => @frequencies}
  
end

#geometric_meanObject


Returns the geometric mean of a data sequence.




134
135
136
# File 'lib/colt/double_descriptive.rb', line 134

def geometric_mean
  @geometric_mean ||= DoubleDescriptive.geometricMean(@array_list)
end

#harmonic_meanObject


Returns the harmonic mean of a data sequence.




142
143
144
# File 'lib/colt/double_descriptive.rb', line 142

def harmonic_mean
  @harmonic_mean ||= DoubleDescriptive.harmonicMean(list_size, sum_of_inversions)
end

#kurtosisObject


Returns the kurtosis (aka excess) of a data sequence, which is -3 + moment(data,4,mean) / standardDeviation4.




151
152
153
154
# File 'lib/colt/double_descriptive.rb', line 151

def kurtosis
  @kurtosis ||=
    DoubleDescriptive.kurtosis(moment4, standard_deviation)
end

#lag1Object


Returns the lag-1 autocorrelation of a dataset; Note that this method has semantics different from autoCorrelation(…, 1);




161
162
163
# File 'lib/colt/double_descriptive.rb', line 161

def lag1
  @lag1 ||= DoubleDescriptive.lag1(@array_list, mean)
end

#list_sizeObject





169
170
171
# File 'lib/colt/double_descriptive.rb', line 169

def list_size
  @list_size ||= @array_list.size
end

#maxObject


Returns the largest member of a data sequence.




177
178
179
# File 'lib/colt/double_descriptive.rb', line 177

def max
  @max ||= DoubleDescriptive.max(@array_list)
end

#meanObject


Returns the arithmetic mean of a data sequence; That is Sum( data ) / data.size()




185
186
187
# File 'lib/colt/double_descriptive.rb', line 185

def mean
  @mean ||= DoubleDescriptive.mean(@array_list)
end

#mean_deviationObject


Returns the mean deviation of a dataset.




193
194
195
# File 'lib/colt/double_descriptive.rb', line 193

def mean_deviation
  @mean_deviation ||= DoubleDescriptive.meanDeviation(@array_list, mean)
end

#medianObject


Returns the median of a sorted data sequence.




201
202
203
# File 'lib/colt/double_descriptive.rb', line 201

def median
  @median ||= DoubleDescriptive.median(sorted_data)
end

#minObject


Returns the smallest member of a data sequence.




209
210
211
# File 'lib/colt/double_descriptive.rb', line 209

def min
  @min ||= DoubleDescriptive.min(@array_list)
end

#moment(k, c) ⇒ Object


Returns the moment of k-th order with constant c of a data sequence, which is Sum( (data-c)k ) / data.size().


Parameters:

  • k

    integer

  • c

    double



220
221
222
# File 'lib/colt/double_descriptive.rb', line 220

def moment(k, c)
  DoubleDescriptive.moment(@array_list, k, c)
end

#moment3Object


The third central moment. That is: moment(data,3,mean)




228
229
230
# File 'lib/colt/double_descriptive.rb', line 228

def moment3
  @moment3 ||= moment(3, mean)
end

#moment4Object





236
237
238
# File 'lib/colt/double_descriptive.rb', line 236

def moment4
  @moment4 ||= moment(4, mean)
end

#pooled_mean(other_val) ⇒ Object


Returns the pooled mean of two data sequences. That is (size1 * mean1 + size2 * mean2) / (size1 + size2).




245
246
247
248
# File 'lib/colt/double_descriptive.rb', line 245

def pooled_mean(other_val)
  other_val.reset_statistics
  DoubleDescriptive.pooledMean(list_size, mean, other_val.list_size, other_val.mean)
end

#pooled_variance(other_val) ⇒ Object


Returns the pooled variance of two data sequences. That is: size1 * variance1 + size2 * variance2) / (size1 + size2)




255
256
257
258
259
# File 'lib/colt/double_descriptive.rb', line 255

def pooled_variance(other_val)
  other_val.reset_statistics
  DoubleDescriptive.pooledVariance(list_size, variance, other_val.list_size, 
                                   other_val.variance)
end

#productObject


Returns the product of a data sequence, which is Prod( data ) .




265
266
267
# File 'lib/colt/double_descriptive.rb', line 265

def product
  @product ||= DoubleDescriptive.product(@array_list)
end

#quantile(phi) ⇒ Object


Returns the phi-quantile; that is, an element elem for which holds that phi percent of data elements are less than elem.


Parameters:

  • phi

    double



275
276
277
# File 'lib/colt/double_descriptive.rb', line 275

def quantile(phi)
  DoubleDescriptive.quantile(sorted_data, phi)
end

#quantile_inverse(elmt) ⇒ Object


Returns how many percent of the elements contained in the receiver are <= element.


Parameters:

  • elmt

    double



284
285
286
# File 'lib/colt/double_descriptive.rb', line 284

def quantile_inverse(elmt)
  DoubleDescriptive.quantileInverse(sorted_data, elmt)
end

#quantiles(percs) ⇒ Object


percentage must be in the interval [0.0,1.0].


Parameters:

  • percentages

    the percentages for which quantiles are to be computed. Each



293
294
295
296
297
298
299
# File 'lib/colt/double_descriptive.rb', line 293

def quantiles(percs)

  percs = Java::CernColtListTdouble::DoubleArrayList.new(percs.to_java(Java::double))
  res = DoubleDescriptive.quantiles(sorted_data, percs)
  res.elements().to_a

end

#rank_interpolated(elmt) ⇒ Object


Returns the linearly interpolated number of elements in a list less or equal to a given element. The rank is the number of elements <= element. Ranks are of the form 1, 2,…, sortedList.size(). If no element is <= element, then the rank is zero. If the element lies in between two contained elements, then linear interpolation is used and a non integer value is returned.


Parameters:

  • elmt

    double



310
311
312
# File 'lib/colt/double_descriptive.rb', line 310

def rank_interpolated(elmt)
  DoubleDescriptive.rankInterpolated(sorted_data, elmt)
end

#reset_statisticsObject





36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
# File 'lib/colt/double_descriptive.rb', line 36

def reset_statistics

  @distinct_values = nil
  @durbin_watson = nil
  @frequencies = nil
  @geometric_mean = nil
  @kurtosis = nil
  @lag1 = nil
  @max = nil
  @mean = nil
  @mean_deviation = nil
  @median = nil
  @min = nil
  @moment3 = nil
  @moment4 = nil
  @product = nil
  @sample_kurtosis = nil
  @sample_kurtosis_standard_error = nil
  @sample_skew = nil
  @sample_skew_standard_error = nil
  @sample_standard_deviation = nil
  @sample_variance = nil
  @sample_weighted_variance = nil
  @list_size = nil
  @skew = nil
  @sorted_data = nil
  @standard_deviation = nil
  @standard_error = nil
  @sum = nil
  @sum_of_inversions = nil
  @sum_of_logarithms = nil
  @sum_of_squared_deviations = nil
  @sum_of_squares = nil
  @variance = nil
  @weighted_rms = nil

end

#rmsObject


Returns the RMS (Root-Mean-Square) of a data sequence.




318
319
320
# File 'lib/colt/double_descriptive.rb', line 318

def rms
  @rms ||= DoubleDescriptive.rms(list_size, sum_of_squares)
end

#sample_covariance(other_val) ⇒ Object


Returns the sample covariance of two data sequences. That is cov(x,y) = (1/(size()-1)) * Sum((x-mean(x)) * (y-mean(y))) .




327
328
329
330
# File 'lib/colt/double_descriptive.rb', line 327

def sample_covariance(other_val)
  other_val.reset_statistics
  DoubleDescriptive.covariance(@array_list, other_val.array_list)
end

#sample_kurtosisObject


Returns the sample kurtosis (aka excess) of a data sequence.




336
337
338
339
# File 'lib/colt/double_descriptive.rb', line 336

def sample_kurtosis
  @sample_kurtosis ||= 
    DoubleDescriptive.sampleKurtosis(list_size, moment4, sample_variance)
end

#sample_kurtosis_standard_errorObject


Return the standard error of the sample kurtosis. Ref: R.R. Sokal, F.J. Rohlf, Biometry: the principles and practice of statistics in biological research (W.H. Freeman and Company, New York, 1998, 3rd edition) p. 138.




347
348
349
350
# File 'lib/colt/double_descriptive.rb', line 347

def sample_kurtosis_standard_error
  @sample_kurtosis_standard_error ||=
    DoubleDescriptive.sampleKurtosisStandardError(list_size)
end

#sample_skewObject


Returns the sample skew of a data sequence.




356
357
358
359
# File 'lib/colt/double_descriptive.rb', line 356

def sample_skew
  @sample_skew ||= 
    DoubleDescriptive.sampleSkew(list_size, moment3, sample_variance)
end

#sample_skew_standard_errorObject


Return the standard error of the sample skew. Ref: R.R. Sokal, F.J. Rohlf, Biometry: the principles and practice of statistics in biological research (W.H. Freeman and Company, New York, 1998, 3rd edition) p. 138.




367
368
369
370
# File 'lib/colt/double_descriptive.rb', line 367

def sample_skew_standard_error
  @sample_skew_standard_error ||=
    DoubleDescriptive.sampleSkewStandardError(list_size)
end

#sample_standard_deviationObject


Returns the sample standard deviation. Ref: R.R. Sokal, F.J. Rohlf, Biometry: the principles and practice of statistics in biological research (W.H. Freeman and Company, New York, 1998, 3rd edition) p. 53. The standard deviation calculated as the sqrt of the variance underestimates the unbiased standard deviation. It needs to be multiplied by this correction factor: 1) if (n > 30): Cn = 1+1/(4*(n-1)), else 2) Cn = Math.sqrt((n - 1) * 0.5) * Gamma.gamma((n - 1) * 0.5) / Gamma.gamma(n * 0.5) The sample standard deviation is Cn * size




383
384
385
386
# File 'lib/colt/double_descriptive.rb', line 383

def sample_standard_deviation
  @sample_standard_deviation ||=
    DoubleDescriptive.sampleStandardDeviation(list_size, sample_variance)
end

#sample_varianceObject


Returns the sample variance of a data sequence.




392
393
394
395
# File 'lib/colt/double_descriptive.rb', line 392

def sample_variance
  @sample_variance ||=
    DoubleDescriptive.sampleVariance(list_size, sum, sum_of_squares)
end

#sample_weighted_variance(weights) ⇒ Object


Returns the sample weighted variance of a data sequence.

That is (sum_of_squared_products - sum_of_products * sum_of_products / 
sum_of_weights) / (sum_of_weights - 1)

where:

sum_of_weights = Sum ( weights[i] )
sum_of_products = Sum ( data[i] * weights[i] )
sum_of_squared_products = Sum( data[i] * data[i] * weights[i] )



407
408
409
410
411
412
413
414
# File 'lib/colt/double_descriptive.rb', line 407

def sample_weighted_variance(weights)

  weights = Java::CernColtListTdouble::DoubleArrayList.new(weights.to_java(Java::double))
  sum_of_weights = DoubleDescriptive.sum(weights)
  sum_of_products, sum_of_squared_products = weighted_sums(weights)
  DoubleDescriptive.sampleWeightedVariance(sum_of_weights, sum_of_products, 
                                           sum_of_squared_products)
end

#skewObject


Returns the skew of a data sequence, which is moment(data,3,mean) / standardDeviation.




421
422
423
# File 'lib/colt/double_descriptive.rb', line 421

def skew
  @skew ||= DoubleDescriptive.skew(moment3, standard_deviation)
end

#sortObject


Returns a list with the sorted elements




461
462
463
464
465
# File 'lib/colt/double_descriptive.rb', line 461

def sort
  sorted_data
  @sorted_data.trimToSize()
  @sorted_data.elements.to_a
end

#sorted_dataObject





471
472
473
474
475
476
477
478
479
480
481
482
# File 'lib/colt/double_descriptive.rb', line 471

def sorted_data

  if (@sorted_data)
    return @sorted_data
  end

  list = @array_list.clone().elements()
  comp = Proc.new { |val1, val2| val1 <=> val2 }
  Java::CernColt::Sorting.parallelQuickSort(list, 0, @array_list.size(), comp)
  @sorted_data = Java::CernColtListTdouble::DoubleArrayList.new(list)

end

#split(splitters) ⇒ Object


Splits (partitions) a list into sublists such that each sublist contains the elements with a given range. splitters= (a,b,c,…,y,z) defines the ranges [-inf,a), [a,b), [b,c), …, [y,z), [z,inf]. Examples:

data = (1,2,3,4,5,8,8,8,10,11). 
splitters=(2,8) yields 3 bins: (1), (2,3,4,5) (8,8,8,10,11). 
splitters=() yields 1 bin: (1,2,3,4,5,8,8,8,10,11). 
splitters=(-5) yields 2 bins: (), (1,2,3,4,5,8,8,8,10,11). 
splitters=(100) yields 2 bins: (1,2,3,4,5,8,8,8,10,11), ().

Returns:

  • the sublists (an array with length == splitters.size() + 1. Each sublist is returned sorted ascending.



441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
# File 'lib/colt/double_descriptive.rb', line 441

def split(splitters)

  split = Java::CernColtListTdouble::DoubleArrayList.new(splitters.to_java(Java::double))
  res = DoubleDescriptive.split(sorted_data, split)
  lists = res.to_a
  bins = Array.new

  lists.each do |list|
    list.trimToSize()
    bins << list.elements().to_a
  end
  
  bins

end

#standard_deviationObject


Returns the standard deviation from a variance.




488
489
490
# File 'lib/colt/double_descriptive.rb', line 488

def standard_deviation
  @standard_deviation ||= DoubleDescriptive.standardDeviation(variance)
end

#standard_errorObject


Returns the standard error of a data sequence.




496
497
498
# File 'lib/colt/double_descriptive.rb', line 496

def standard_error
  @standard_error ||= DoubleDescriptive.standardError(list_size, variance)
end

#standardize!Object


Modifies a data sequence to be standardized. Changes each element data as follows: data = (data-mean)/standardDeviation.




505
506
507
# File 'lib/colt/double_descriptive.rb', line 505

def standardize!
  DoubleDescriptive.standardize(@array_list, mean, standard_deviation)
end

#sumObject


Returns the sum of a data sequence.




513
514
515
# File 'lib/colt/double_descriptive.rb', line 513

def sum
  @sum ||= DoubleDescriptive.sum(@array_list)
end

#sum_of_inversions(from = 0, to = list_size - 1) ⇒ Object


Returns the sum of inversions of a data sequence, which is Sum( 1.0 / data).




521
522
523
# File 'lib/colt/double_descriptive.rb', line 521

def sum_of_inversions(from = 0, to = list_size - 1)
  @sum_of_inversions ||= DoubleDescriptive.sumOfInversions(@array_list, from, to)
end

#sum_of_logarithms(from = 0, to = list_size - 1) ⇒ Object


Returns the sum of logarithms of a data sequence, which is Sum( Log(data).




529
530
531
# File 'lib/colt/double_descriptive.rb', line 529

def sum_of_logarithms(from = 0, to = list_size - 1)
  @sum_of_logarithms ||= DoubleDescriptive.sumOfLogarithms(@array_list, from, to)
end

#sum_of_power_deviations(k, c) ⇒ Object


Returns Sum( (data-c)k ); optimized for common parameters like c == 0.0 and/or k == -2




538
539
540
# File 'lib/colt/double_descriptive.rb', line 538

def sum_of_power_deviations(k, c)
  DoubleDescriptive.sumOfPowerDeviations(@array_list, k, c)
end

#sum_of_powers(k) ⇒ Object


Returns the sum of powers of a data sequence, which is Sum ( datak ).




546
547
548
# File 'lib/colt/double_descriptive.rb', line 546

def sum_of_powers(k)
  DoubleDescriptive.sumOfPowers(@array_list, k)
end

#sum_of_squared_deviationsObject


Returns the sum of squared mean deviation of of a data sequence.




577
578
579
580
# File 'lib/colt/double_descriptive.rb', line 577

def sum_of_squared_deviations
  @sum_of_square_deviations ||=
    DoubleDescriptive.sumOfSquaredDeviations(list_size, variance)
end

#sum_of_squaresObject


Returns the sum of squares of a data sequence.




586
587
588
# File 'lib/colt/double_descriptive.rb', line 586

def sum_of_squares
  @sum_of_squares ||= DoubleDescriptive.sumOfSquares(@array_list)
end

#trimmed_mean(left = 0, right = 0) ⇒ Object


Returns the trimmed mean of a sorted data sequence.




594
595
596
# File 'lib/colt/double_descriptive.rb', line 594

def trimmed_mean(left = 0, right = 0)
  DoubleDescriptive.trimmedMean(sorted_data, mean, left, right)
end

#varianceObject


Returns the variance from a standard deviation.




602
603
604
605
# File 'lib/colt/double_descriptive.rb', line 602

def variance
  @variance ||= 
    DoubleDescriptive.variance(list_size, sum, sum_of_squares)
end

#weighted_mean(weights) ⇒ Object


Returns the weighted mean of a data sequence.




611
612
613
614
# File 'lib/colt/double_descriptive.rb', line 611

def weighted_mean(weights)
  weights = Java::CernColtListTdouble::DoubleArrayList.new(weights.to_java(Java::double))
  DoubleDescriptive.weightedMean(@array_list, weights)
end

#weighted_rms(weights) ⇒ Object


Returns the weighted RMS (Root-Mean-Square) of a data sequence.




620
621
622
623
624
625
626
# File 'lib/colt/double_descriptive.rb', line 620

def weighted_rms(weights)

  weights = Java::CernColtListTdouble::DoubleArrayList.new(weights.to_java(Java::double))
  sum_of_products, sum_of_squared_products = weighted_sums(weights)
  DoubleDescriptive.weightedRMS(sum_of_products, sum_of_squared_products)

end

#weighted_sums(other_val, from = 0, to = list_size - 1) ⇒ Object


Returns the sum of the product with another array.T hat is, Sum( data * other_val ) internally.


Parameters:

  • other_val:

    ruby array or a CernColtListTdouble::DoubleArrayList (when called



557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
# File 'lib/colt/double_descriptive.rb', line 557

def weighted_sums(other_val, from = 0, to = list_size - 1)

  if (other_val.is_a? Array)
    weights = Java::CernColtListTdouble::DoubleArrayList.new(other_val.to_java(Java::double))
  elsif (other_val.is_a? Java::CernColtListTdouble::DoubleArrayList)
    weights = other_val
  else
    raise "#{other_val} is not a valid weight array"
  end

  in_out = [0.0, 0.0].to_java Java::double
  DoubleDescriptive.incrementalWeightedUpdate(@array_list, weights, from, to, in_out)
  [in_out[0], in_out[1]]

end

#winsorized_mean(left, right) ⇒ Object


Returns the winsorized mean of a sorted data sequence.




632
633
634
# File 'lib/colt/double_descriptive.rb', line 632

def winsorized_mean(left, right)
  DoubleDescriptive.winsorizedMean(sorted_data, mean, left, right)
end