Module: Daru::Maths::Statistics::DataFrame

Included in:
DataFrame
Defined in:
lib/daru/maths/statistics/dataframe.rb

Instance Method Summary collapse

Instance Method Details

#acf(max_lags) ⇒ Object

Calculate Autocorrelation coefficient

Parameters:

  • max_lags (Integer)

    (nil) Number of initial lags



73
74
75
76
77
78
79
80
# File 'lib/daru/maths/statistics/dataframe.rb', line 73

%i[
  cumsum standardize acf ema rolling_mean rolling_median rolling_max
  rolling_min rolling_count rolling_std rolling_variance rolling_sum
].each do |meth|
  define_method(meth) do |*args|
    apply_method_to_numerics meth, *args
  end
end

#correlationObject Also known as: corr

Calculate the correlation between the numeric vectors.



154
155
156
157
158
159
160
161
162
# File 'lib/daru/maths/statistics/dataframe.rb', line 154

def correlation
  standard_deviation = std.to_matrix
  corr_arry = cov
              .to_matrix
              .elementwise_division(standard_deviation.transpose *
    standard_deviation).to_a

  Daru::DataFrame.rows(corr_arry, index: numeric_vectors, order: numeric_vectors)
end

#countObject

Count the number of non-nil values in each vector



25
26
27
28
29
# File 'lib/daru/maths/statistics/dataframe.rb', line 25

%i[mean variance_sample range median mode std sum count min product].each do |meth|
  define_method(meth) do
    compute_stats meth
  end
end

#covarianceObject Also known as: cov

Calculate sample variance-covariance between the numeric vectors.



134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# File 'lib/daru/maths/statistics/dataframe.rb', line 134

def covariance
  cache = Hash.new do |h, (col, row)|
    value = vector_cov(self[row],self[col])
    h[[col, row]] = value
    h[[row, col]] = value
  end
  vectors = numeric_vectors

  mat_rows = vectors.collect do |row|
    vectors.collect do |col|
      row == col ? self[row].variance : cache[[col,row]]
    end
  end

  Daru::DataFrame.rows(mat_rows, index: numeric_vectors, order: numeric_vectors)
end

#cumsumObject

Calculate cumulative sum of each numeric Vector



73
74
75
76
77
78
79
80
# File 'lib/daru/maths/statistics/dataframe.rb', line 73

%i[
  cumsum standardize acf ema rolling_mean rolling_median rolling_max
  rolling_min rolling_count rolling_std rolling_variance rolling_sum
].each do |meth|
  define_method(meth) do |*args|
    apply_method_to_numerics meth, *args
  end
end

#describe(methods = nil) ⇒ Object

Create a summary of mean, standard deviation, count, max and min of each numeric vector in the dataframe in one shot.

Arguments

methods - An array with aggregation methods specified as symbols to be applied to numeric vectors. Default is [:count, :mean, :std, :max, :min]. Methods will be applied in the specified order.



90
91
92
93
94
95
96
97
98
# File 'lib/daru/maths/statistics/dataframe.rb', line 90

def describe methods=nil
  methods ||= %i[count mean std min max]

  description_hash = {}
  numeric_vectors.each do |vec|
    description_hash[vec] = methods.map { |m| self[vec].send(m) }
  end
  Daru::DataFrame.new(description_hash, index: methods)
end

#ema(n, wilder) ⇒ Object

Calculate exponential moving average.

Parameters:

  • n (Integer)

    (10) Loopback length.

  • wilder (TrueClass, FalseClass, NilClass)

    (false) If true, 1/n value is used for smoothing; if false, uses 2/(n+1) value.



73
74
75
76
77
78
79
80
# File 'lib/daru/maths/statistics/dataframe.rb', line 73

%i[
  cumsum standardize acf ema rolling_mean rolling_median rolling_max
  rolling_min rolling_count rolling_std rolling_variance rolling_sum
].each do |meth|
  define_method(meth) do |*args|
    apply_method_to_numerics meth, *args
  end
end

#max(opts = {}) ⇒ Object

Calculate the maximum value of each numeric vector.



32
33
34
35
36
37
38
# File 'lib/daru/maths/statistics/dataframe.rb', line 32

def max opts={}
  if opts[:vector]
    row[*self[opts[:vector]].max_index.index.to_a]
  else
    compute_stats :max
  end
end

#meanObject

Calculate mean of numeric vectors



25
26
27
28
29
# File 'lib/daru/maths/statistics/dataframe.rb', line 25

%i[mean variance_sample range median mode std sum count min product].each do |meth|
  define_method(meth) do
    compute_stats meth
  end
end

#medianObject

Calculate median of numeric vectors



25
26
27
28
29
# File 'lib/daru/maths/statistics/dataframe.rb', line 25

%i[mean variance_sample range median mode std sum count min product].each do |meth|
  define_method(meth) do
    compute_stats meth
  end
end

#minObject

Calculate the minimum value of each numeric vector



25
26
27
28
29
# File 'lib/daru/maths/statistics/dataframe.rb', line 25

%i[mean variance_sample range median mode std sum count min product].each do |meth|
  define_method(meth) do
    compute_stats meth
  end
end

#modeObject

Calculate mode of numeric vectors



25
26
27
28
29
# File 'lib/daru/maths/statistics/dataframe.rb', line 25

%i[mean variance_sample range median mode std sum count min product].each do |meth|
  define_method(meth) do
    compute_stats meth
  end
end

#percent_change(periods = 1) ⇒ Object

The percent_change method computes the percent change over the given number of periods for numeric vectors.

Examples:


df = Daru::DataFrame.new({
     'col0' => [1,2,3,4,5,6],
     'col2' => ['a','b','c','d','e','f'],
     'col1' => [11,22,33,44,55,66]
     },
     index: ['one', 'two', 'three', 'four', 'five', 'six'],
     order: ['col0', 'col1', 'col2'])
df.percent_change
#=>
#   <Daru::DataFrame:23513280 @rows: 6 @cols: 2>
#              col0                col1
#   one
#   two	   1.0	               1.0
#   three	   0.5                 0.5
#   four	   0.3333333333333333  0.3333333333333333
#   five       0.25                0.25
#   six        0.2                 0.2

Parameters:

  • periods (Integer) (defaults to: 1)

    (1) number of nils to insert at the beginning.



124
125
126
127
128
129
130
131
# File 'lib/daru/maths/statistics/dataframe.rb', line 124

def percent_change periods=1
  df_numeric = only_numerics.vectors.to_a
  df = Daru::DataFrame.new({}, order: @order, index: @index, name: @name)
  df_numeric.each do |vec|
    df[vec] = self[vec].percent_change periods
  end
  df
end

#productObject

Compute the product of each numeric vector



25
26
27
28
29
# File 'lib/daru/maths/statistics/dataframe.rb', line 25

%i[mean variance_sample range median mode std sum count min product].each do |meth|
  define_method(meth) do
    compute_stats meth
  end
end

#rangeObject

Calculate range of numeric vectors



25
26
27
28
29
# File 'lib/daru/maths/statistics/dataframe.rb', line 25

%i[mean variance_sample range median mode std sum count min product].each do |meth|
  define_method(meth) do
    compute_stats meth
  end
end

#rolling_count(n) ⇒ Object

Calculate moving non-missing count

Parameters:

  • n (Integer)

    (10) Loopback length. Default to 10.



73
74
75
76
77
78
79
80
# File 'lib/daru/maths/statistics/dataframe.rb', line 73

%i[
  cumsum standardize acf ema rolling_mean rolling_median rolling_max
  rolling_min rolling_count rolling_std rolling_variance rolling_sum
].each do |meth|
  define_method(meth) do |*args|
    apply_method_to_numerics meth, *args
  end
end

#rolling_max(n) ⇒ Object

Calculate moving max

Parameters:

  • n (Integer)

    (10) Loopback length. Default to 10.



73
74
75
76
77
78
79
80
# File 'lib/daru/maths/statistics/dataframe.rb', line 73

%i[
  cumsum standardize acf ema rolling_mean rolling_median rolling_max
  rolling_min rolling_count rolling_std rolling_variance rolling_sum
].each do |meth|
  define_method(meth) do |*args|
    apply_method_to_numerics meth, *args
  end
end

#rolling_mean(n) ⇒ Object

Calculate moving averages

Parameters:

  • n (Integer)

    (10) Loopback length. Default to 10.



73
74
75
76
77
78
79
80
# File 'lib/daru/maths/statistics/dataframe.rb', line 73

%i[
  cumsum standardize acf ema rolling_mean rolling_median rolling_max
  rolling_min rolling_count rolling_std rolling_variance rolling_sum
].each do |meth|
  define_method(meth) do |*args|
    apply_method_to_numerics meth, *args
  end
end

#rolling_median(n) ⇒ Object

Calculate moving median

Parameters:

  • n (Integer)

    (10) Loopback length. Default to 10.



73
74
75
76
77
78
79
80
# File 'lib/daru/maths/statistics/dataframe.rb', line 73

%i[
  cumsum standardize acf ema rolling_mean rolling_median rolling_max
  rolling_min rolling_count rolling_std rolling_variance rolling_sum
].each do |meth|
  define_method(meth) do |*args|
    apply_method_to_numerics meth, *args
  end
end

#rolling_min(n) ⇒ Object

Calculate moving min

Parameters:

  • n (Integer)

    (10) Loopback length. Default to 10.



73
74
75
76
77
78
79
80
# File 'lib/daru/maths/statistics/dataframe.rb', line 73

%i[
  cumsum standardize acf ema rolling_mean rolling_median rolling_max
  rolling_min rolling_count rolling_std rolling_variance rolling_sum
].each do |meth|
  define_method(meth) do |*args|
    apply_method_to_numerics meth, *args
  end
end

#rolling_std(n) ⇒ Object

Calculate moving standard deviation

Parameters:

  • n (Integer)

    (10) Loopback length. Default to 10.



73
74
75
76
77
78
79
80
# File 'lib/daru/maths/statistics/dataframe.rb', line 73

%i[
  cumsum standardize acf ema rolling_mean rolling_median rolling_max
  rolling_min rolling_count rolling_std rolling_variance rolling_sum
].each do |meth|
  define_method(meth) do |*args|
    apply_method_to_numerics meth, *args
  end
end

#rolling_variance(n) ⇒ Object

Calculate moving variance

Parameters:

  • n (Integer)

    (10) Loopback length. Default to 10.



73
74
75
76
77
78
79
80
# File 'lib/daru/maths/statistics/dataframe.rb', line 73

%i[
  cumsum standardize acf ema rolling_mean rolling_median rolling_max
  rolling_min rolling_count rolling_std rolling_variance rolling_sum
].each do |meth|
  define_method(meth) do |*args|
    apply_method_to_numerics meth, *args
  end
end

#standardizeObject

Standardize each Vector



73
74
75
76
77
78
79
80
# File 'lib/daru/maths/statistics/dataframe.rb', line 73

%i[
  cumsum standardize acf ema rolling_mean rolling_median rolling_max
  rolling_min rolling_count rolling_std rolling_variance rolling_sum
].each do |meth|
  define_method(meth) do |*args|
    apply_method_to_numerics meth, *args
  end
end

#stdObject Also known as: sds

Calculate sample standard deviation of numeric vectors



25
26
27
28
29
# File 'lib/daru/maths/statistics/dataframe.rb', line 25

%i[mean variance_sample range median mode std sum count min product].each do |meth|
  define_method(meth) do
    compute_stats meth
  end
end

#sumObject

Calculate sum of numeric vectors



25
26
27
28
29
# File 'lib/daru/maths/statistics/dataframe.rb', line 25

%i[mean variance_sample range median mode std sum count min product].each do |meth|
  define_method(meth) do
    compute_stats meth
  end
end

#variance_sampleObject Also known as: variance

Calculate sample variance of numeric vectors



25
26
27
28
29
# File 'lib/daru/maths/statistics/dataframe.rb', line 25

%i[mean variance_sample range median mode std sum count min product].each do |meth|
  define_method(meth) do
    compute_stats meth
  end
end