Module: Daru::Maths::Statistics::DataFrame

Included in:
DataFrame
Defined in:
lib/daru/maths/statistics/dataframe.rb

Instance Method Summary collapse

Instance Method Details

#correlationObject Also known as: corr

Calculate the correlation between the numeric vectors.



94
95
96
97
98
99
100
101
102
# File 'lib/daru/maths/statistics/dataframe.rb', line 94

def correlation
  standard_deviation = std.to_matrix
  corr_arry = (cov
    .to_matrix
    .elementwise_division(standard_deviation.transpose * 
    standard_deviation)).to_a

  Daru::DataFrame.rows(corr_arry, index: numeric_vectors, order: numeric_vectors)
end

#countObject

Count the number of non-nil values in each vector.



21
22
23
# File 'lib/daru/maths/statistics/dataframe.rb', line 21

def count
  compute_stats :count
end

#covarianceObject Also known as: cov

Calculate sample variance-covariance between the numeric vectors.



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/daru/maths/statistics/dataframe.rb', line 68

def covariance
  cache={}
  vectors = self.numeric_vectors

  mat_rows = vectors.collect do |row|
    vectors.collect do |col|
      if row == col
        self[row].variance
      else
        if cache[[col,row]].nil?
          cov = vector_cov(self[row],self[col])
          cache[[row,col]] = cov
          cov
        else
          cache[[col,row]]
        end
      end
    end
  end

  Daru::DataFrame.rows(mat_rows, index: numeric_vectors, order: numeric_vectors)
end

#describe(methods = nil) ⇒ Object

Create a summary of mean, standard deviation, count, max and min of each numeric vector in the dataframe in one shot.

Arguments

methods - An array with aggregation methods specified as symbols to be applied to numeric vectors. Default is [:count, :mean, :std, :max, :min]. Methods will be applied in the specified order.



57
58
59
60
61
62
63
64
65
# File 'lib/daru/maths/statistics/dataframe.rb', line 57

def describe methods=nil
  methods ||= [:count, :mean, :std, :min, :max]

  description_hash = {}
  numeric_vectors.each do |vec|
    description_hash[vec] = methods.map { |m| self[vec].send(m) }
  end
  Daru::DataFrame.new(description_hash, index: methods)
end

#maxObject

Calculate the maximum value of each numeric vector.



26
27
28
# File 'lib/daru/maths/statistics/dataframe.rb', line 26

def max
  compute_stats :max
end

#meanObject

Calculate mean of numeric vectors.



6
7
8
# File 'lib/daru/maths/statistics/dataframe.rb', line 6

def mean
  compute_stats :mean
end

#minObject

Calculate the minimmum value of each numeric vector.



31
32
33
# File 'lib/daru/maths/statistics/dataframe.rb', line 31

def min
  compute_stats :min
end

#productObject

Compute the product of each numeric vector.



36
37
38
# File 'lib/daru/maths/statistics/dataframe.rb', line 36

def product
  compute_stats :product
end

#standardizeObject



40
41
42
43
44
45
46
47
# File 'lib/daru/maths/statistics/dataframe.rb', line 40

def standardize
  df = self.only_numerics clone: true
  df.map! do |v|
    v.standardize
  end

  df
end

#stdObject

Calculate sample standard deviation of numeric vectors.



11
12
13
# File 'lib/daru/maths/statistics/dataframe.rb', line 11

def std
  compute_stats :std
end

#sumObject

Calculate sum of numeric vectors



16
17
18
# File 'lib/daru/maths/statistics/dataframe.rb', line 16

def sum
  compute_stats :sum
end