Module: Utilities::Statistics

Defined in:
lib/utilities/utilities.rb

Instance Method Summary collapse

Instance Method Details

#first_quartile(already_sorted = false) ⇒ Object Also known as: lower_quartile

Return the first quartile of self



87
88
89
90
91
# File 'lib/utilities/utilities.rb', line 87

def first_quartile( already_sorted = false )
  return nil if size < 4
  a = already_sorted ? self : sort
  a[0..((size / 2) - 1)].extend(Utilities::Statistics).median( true )
end

#frequencesObject

Calculate the number of occurences for each element of the array



59
60
61
# File 'lib/utilities/utilities.rb', line 59

def frequences
  inject(Hash.new(0)) { |h, v| h[v] += 1; h }
end

#interquartile_range(already_sorted = false) ⇒ Object

Calculate the interquartile range of self



109
110
111
112
113
# File 'lib/utilities/utilities.rb', line 109

def interquartile_range( already_sorted = false )
  return nil if size < 4
  a = sort_and_extend( already_sorted )
  a.last_quartile - a.first_quartile
end

#last_quartile(already_sorted = false) ⇒ Object Also known as: upper_quartile

Return the last quartile of self



95
96
97
98
99
# File 'lib/utilities/utilities.rb', line 95

def last_quartile( already_sorted = false )
  return nil if size < 4
  a = already_sorted ? self : sort
  a[((size / 2) + 1)..-1].extend(Utilities::Statistics).median( true )
end

#meanObject Also known as: average

Calculate the mean of the array, as long as all objects respond to / operator



52
53
54
55
# File 'lib/utilities/utilities.rb', line 52

def mean
  a = flatten.compact.to_stat
  (a.size > 0) ? a.sum.to_f / a.size : 0.0
end

#median(already_sorted = false) ⇒ Object Also known as: second_quartile

Return the median of sorted self



78
79
80
81
82
83
# File 'lib/utilities/utilities.rb', line 78

def median( already_sorted = false )
  return nil if empty?
  a = sort_and_extend( already_sorted )
  m_pos = size / 2
  size % 2 == 1 ? a[m_pos] : (a[m_pos-1] + a[m_pos]).to_f / 2
end

#midrange(already_sorted = false) ⇒ Object

Return the midrange of sorted self



123
124
125
126
127
# File 'lib/utilities/utilities.rb', line 123

def midrange( already_sorted = false )
  return nil if empty?
  a = sort_and_extend( already_sorted )
  (a.first + a.last) / 2.0
end

#modesObject

Return a hash of modes with their corresponding occurences



116
117
118
119
120
# File 'lib/utilities/utilities.rb', line 116

def modes
  fre = frequences
  max = fre.values.max
  fre.select{ |k, f| f == max }
end

#quartiles(already_sorted = false) ⇒ Object

Return an array containing the first, the second and the last quartile of self



103
104
105
106
# File 'lib/utilities/utilities.rb', line 103

def quartiles( already_sorted = false )
  a = sort_and_extend( already_sorted )
  [a.first_quartile( true ), a.median( true ), a.last_quartile( true )]
end

#ranks(already_sorted = false) ⇒ Object

Return a new array containing the rank of each value Ex: [1, 2, 2, 8, 9] #=> [0.0, 1.5, 1.5, 3.0, 4.0]



41
42
43
44
# File 'lib/utilities/utilities.rb', line 41

def ranks( already_sorted = false )
  a = already_sorted ? self : sort
  map{ |i| (a.index(i) + a.rindex(i)) / 2.0 }
end

#sqrtsObject

Calculate square roots of each item



47
48
49
# File 'lib/utilities/utilities.rb', line 47

def sqrts
  map{ |i| i.sqrt }
end

#squaresObject

Calculate squares of each item



35
36
37
# File 'lib/utilities/utilities.rb', line 35

def squares
  map{ |i| i**2 }
end

#standard_deviation(population = false) ⇒ Object Also known as: std_dev

Return the (sample|population) standard deviation of self If population is set to true, then we consider the dataset as the complete population Else, we consider the dataset as a sample, so we use the sample standard deviation (size - 1)



72
73
74
# File 'lib/utilities/utilities.rb', line 72

def standard_deviation( population = false )
  size > 1 ? Math.sqrt( variance( population ) ) : 0.0
end

#statistical_range(already_sorted = false) ⇒ Object

Return the statistical range of sorted self



130
131
132
133
134
# File 'lib/utilities/utilities.rb', line 130

def statistical_range( already_sorted = false )
  return nil if empty?
  a = sort_and_extend( already_sorted )
  (a.last - a.first)
end

#statistics(already_sorted = false) ⇒ Object Also known as: stats

Return all statistics from self in a simple hash



137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/utilities/utilities.rb', line 137

def statistics( already_sorted = false )
  sorted = sort_and_extend( already_sorted )
  
  {
    :first => self.first,
    :last => self.last,
    :size => self.size,
    :sum => self.sum,
    :squares => self.squares,
    :sqrts => self.sqrts,
    :min => self.min,
    :max => self.max,
    :mean => self.mean,
    :frequences => self.frequences,
    :variance => self.variance,
    :standard_deviation => self.standard_deviation,
    :population_variance => self.variance(true),
    :population_standard_deviation => self.standard_deviation(true),
    :modes => self.modes,
    
    # Need to be sorted...
    :ranks => sorted.ranks( true ),
    :median => sorted.median( true ),
    :midrange => sorted.midrange( true ),
    :statistical_range => sorted.statistical_range( true ),
    :quartiles => sorted.quartiles( true ),
    :interquartile_range => sorted.interquartile_range( true )
  }
end

#sumObject

Add each object of the array to each other in order to get the sum, as long as all objects respond to + operator



30
31
32
# File 'lib/utilities/utilities.rb', line 30

def sum
  flatten.compact.inject( :+ )
end

#variance(population = false) ⇒ Object

Return the variance of self



64
65
66
67
# File 'lib/utilities/utilities.rb', line 64

def variance( population = false )
  m = mean.to_f
  collect{|v| (v - m).square }.to_stats.sum / (size - (population ? 0 : 1))
end