Module: Rust::Descriptive

Defined in:
lib/rust/stats/descriptive.rb

Overview

Module containing utilities for descriptive statistics.

Class Method Summary collapse

Class Method Details

.mean(data) ⇒ Object

Computes the arithmetic mean of the given data.

Raises:

  • (TypeError)


12
13
14
15
16
# File 'lib/rust/stats/descriptive.rb', line 12

def mean(data)
    raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
    
    return data.sum.to_f / data.size
end

.median(data) ⇒ Object

Computes the median of the given data.

Raises:

  • (TypeError)


44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/rust/stats/descriptive.rb', line 44

def median(data)
    raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
    
    sorted = data.sort
    if data.size == 0
        return Float::NAN
    elsif data.size.odd?
        return sorted[data.size / 2]
    else
        i = (data.size / 2)
        return (sorted[i - 1] + sorted[i]) / 2.0
    end
end

.outliers(data, k = 1.5, **opts) ⇒ Object

Returns the outliers in data using Tukey’s fences, with a given k.



105
106
107
# File 'lib/rust/stats/descriptive.rb', line 105

def outliers(data, k=1.5, **opts)
    outliers_according_to(data, data, k, **opts)
end

.outliers_according_to(data, data_distribution, k = 1.5, **opts) ⇒ Object

Returns the outliers in data using Tukey’s fences, with a given k, with respect to different data distribution (data_distribution).



113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# File 'lib/rust/stats/descriptive.rb', line 113

def outliers_according_to(data, data_distribution, k=1.5, **opts)
    quantiles = Rust::Descriptive.quantile(data_distribution, [0.25, 0.75])
    q1 = quantiles[0.25]
    q3 = quantiles[0.75]
    iqr = q3 - q1
    
    positive_outliers = data.select { |d| d > q3 + iqr * k }
    negative_outliers = data.select { |d| d < q1 - iqr * k }
    
    outliers = negative_outliers + positive_outliers
    if opts[:side]
        case opts[:side].to_sym
        when :positive, :neg, :n, :+
            outliers = positive_outliers
        when :negative, :pos, :p, :-
            outliers = negative_outliers
        end
    end
    
    return outliers
end

.quantile(data, percentiles = [0.0, 0.25, 0.5, 0.75, 1.0]) ⇒ Object

Returns the quantiles of the given data, given the percentiles (optional).

Raises:

  • (TypeError)


70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/rust/stats/descriptive.rb', line 70

def quantile(data, percentiles = [0.0, 0.25, 0.5, 0.75, 1.0])
    raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
    raise TypeError, "Expecting Array of numerics" if !percentiles.is_a?(Array) || !percentiles.all? { |e| e.is_a?(Numeric) }
    raise "Percentiles outside the range: #{percentiles}" if percentiles.any? { |e| !e.between?(0, 1) }
    
    n = data.size
    quantiles = percentiles.size
    percentiles = percentiles.map { |x| x > 1.0 ? 1.0 : (x < 0.0 ? 0.0 : x) }
    
    rough_indices = percentiles.map { |x| 1 + [n - 1, 0].max * x - 1 }
    floor_indices = rough_indices.map { |i| i.floor }
    ceil_indices = rough_indices.map { |i| i.ceil }
    
    data = data.sort
    result = floor_indices.map { |i| data[i] }
    result_ceil = ceil_indices.map { |i| data[i] }
    
    indices_to_fix = (0...quantiles).select { |i| rough_indices[i] > floor_indices[i] && result_ceil[i] != result[i] }
    index_approximation_errors = indices_to_fix.map { |i| rough_indices[i] - floor_indices[i] }
    reduced_index_approximation_errors = index_approximation_errors.map { |i| (1 - i) }
    hi_indices = indices_to_fix.map { |i| ceil_indices[i] }
    data_hi_indices = hi_indices.map { |i| data[i] }
    
    j = 0
    indices_to_fix.each do |i|
        result[i] = reduced_index_approximation_errors[j] * result[i] + index_approximation_errors[j] * data_hi_indices[j]
        j += 1
    end
    
    return percentiles.zip(result).to_h
end

.standard_deviation(data) ⇒ Object Also known as: sd, stddev

Computes the standard deviation of the given data.

Raises:

  • (TypeError)


21
22
23
24
25
# File 'lib/rust/stats/descriptive.rb', line 21

def standard_deviation(data)
    raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
    
    return Math.sqrt(variance(data))
end

.sum(data) ⇒ Object

Sums the given data.

Raises:

  • (TypeError)


61
62
63
64
65
# File 'lib/rust/stats/descriptive.rb', line 61

def sum(data)
    raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
    
    return data.sum
end

.variance(data) ⇒ Object Also known as: var

Computes the variance of the given data.

Raises:

  • (TypeError)


32
33
34
35
36
37
38
# File 'lib/rust/stats/descriptive.rb', line 32

def variance(data)
    raise TypeError, "Expecting Array of numerics" if !data.is_a?(Array) || !data.all? { |e| e.is_a?(Numeric) }
    return Float::NAN if data.size < 2
    
    mean = mean(data)
    return data.map { |v| (v - mean) ** 2 }.sum.to_f / (data.size - 1)
end