Module: Utilities::Statistics

Defined in:
lib/utilities/utilities.rb

Instance Method Summary collapse

Instance Method Details

#first_quartile(already_sorted = false) ⇒ Object Also known as: lower_quartile

Return the first quartile of self



89
90
91
92
93
# File 'lib/utilities/utilities.rb', line 89

def first_quartile( already_sorted = false )
  return nil if size < 4
  a = already_sorted ? self : sort
  a[0..((size / 2) - 1)].extend(Utilities::Statistics).median( true )
end

#frequencesObject

Calculate the number of occurences for each element of the array



59
60
61
# File 'lib/utilities/utilities.rb', line 59

def frequences
  inject(Hash.new(0)) { |h, v| h[v] += 1; h }
end

#interquartile_range(already_sorted = false) ⇒ Object

Calculate the interquartile range of self



111
112
113
114
115
# File 'lib/utilities/utilities.rb', line 111

def interquartile_range( already_sorted = false )
  return nil if size < 4
  a = sort_and_extend( already_sorted )
  a.last_quartile - a.first_quartile
end

#last_quartile(already_sorted = false) ⇒ Object Also known as: upper_quartile

Return the last quartile of self



97
98
99
100
101
# File 'lib/utilities/utilities.rb', line 97

def last_quartile( already_sorted = false )
  return nil if size < 4
  a = already_sorted ? self : sort
  a[((size / 2) + 1)..-1].extend(Utilities::Statistics).median( true )
end

#meanObject Also known as: average

Calculate the mean of the array, as long as all objects respond to / operator



52
53
54
55
# File 'lib/utilities/utilities.rb', line 52

def mean
  a = flatten.compact.to_stat
  (a.size > 0) ? a.sum.to_f / a.size : nil
end

#median(already_sorted = false) ⇒ Object Also known as: second_quartile

Return the median of sorted self



80
81
82
83
84
85
# File 'lib/utilities/utilities.rb', line 80

def median( already_sorted = false )
  return nil if empty?
  a = sort_and_extend( already_sorted )
  m_pos = size / 2
  size % 2 == 1 ? a[m_pos] : (a[m_pos-1] + a[m_pos]).to_f / 2
end

#midrange(already_sorted = false) ⇒ Object

Return the midrange of sorted self



125
126
127
128
129
# File 'lib/utilities/utilities.rb', line 125

def midrange( already_sorted = false )
  return nil if empty?
  a = sort_and_extend( already_sorted )
  (a.first + a.last) / 2.0
end

#modesObject

Return a hash of modes with their corresponding occurences



118
119
120
121
122
# File 'lib/utilities/utilities.rb', line 118

def modes
  fre = frequences
  max = fre.values.max
  fre.select{ |k, f| f == max }
end

#quartiles(already_sorted = false) ⇒ Object

Return an array containing the first, the second and the last quartile of self



105
106
107
108
# File 'lib/utilities/utilities.rb', line 105

def quartiles( already_sorted = false )
  a = sort_and_extend( already_sorted )
  [a.first_quartile( true ), a.median( true ), a.last_quartile( true )]
end

#ranks(already_sorted = false) ⇒ Object

Return a new array containing the rank of each value Ex: [1, 2, 2, 8, 9] #=> [0.0, 1.5, 1.5, 3.0, 4.0]



41
42
43
44
# File 'lib/utilities/utilities.rb', line 41

def ranks( already_sorted = false )
  a = already_sorted ? self : sort
  map{ |i| (a.index(i) + a.rindex(i)) / 2.0 }
end

#sqrtsObject

Calculate square roots of each item



47
48
49
# File 'lib/utilities/utilities.rb', line 47

def sqrts
  map{ |i| i.sqrt }
end

#squaresObject

Calculate squares of each item



35
36
37
# File 'lib/utilities/utilities.rb', line 35

def squares
  map{ |i| i**2 }
end

#standard_deviation(population = false) ⇒ Object Also known as: std_dev

Return the (sample|population) standard deviation of self If population is set to true, then we consider the dataset as the complete population Else, we consider the dataset as a sample, so we use the sample standard deviation (size - 1)



73
74
75
76
# File 'lib/utilities/utilities.rb', line 73

def standard_deviation( population = false )
  return nil if empty?
  size > 1 ? Math.sqrt( variance( population ) ) : 0.0
end

#statistical_range(already_sorted = false) ⇒ Object

Return the statistical range of sorted self



132
133
134
135
136
# File 'lib/utilities/utilities.rb', line 132

def statistical_range( already_sorted = false )
  return nil if empty?
  a = sort_and_extend( already_sorted )
  (a.last - a.first)
end

#statistics(already_sorted = false) ⇒ Object Also known as: stats

Return all statistics from self in a simple hash



139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
# File 'lib/utilities/utilities.rb', line 139

def statistics( already_sorted = false )
  sorted = sort_and_extend( already_sorted )
  
  {
    :first => self.first,
    :last => self.last,
    :size => self.size,
    :sum => self.sum,
    :squares => self.squares,
    :sqrts => self.sqrts,
    :min => self.min,
    :max => self.max,
    :mean => self.mean,
    :frequences => self.frequences,
    :variance => self.variance,
    :standard_deviation => self.standard_deviation,
    :population_variance => self.variance(true),
    :population_standard_deviation => self.standard_deviation(true),
    :modes => self.modes,
    
    # Need to be sorted...
    :ranks => sorted.ranks( true ),
    :median => sorted.median( true ),
    :midrange => sorted.midrange( true ),
    :statistical_range => sorted.statistical_range( true ),
    :quartiles => sorted.quartiles( true ),
    :interquartile_range => sorted.interquartile_range( true )
  }
end

#sumObject

Add each object of the array to each other in order to get the sum, as long as all objects respond to + operator



30
31
32
# File 'lib/utilities/utilities.rb', line 30

def sum
  empty? ? 0 : flatten.compact.inject( :+ )
end

#variance(population = false) ⇒ Object

Return the variance of self



64
65
66
67
68
# File 'lib/utilities/utilities.rb', line 64

def variance( population = false )
  return nil if empty?
  m = mean.to_f
  collect{|v| (v - m).square }.to_stats.sum / (size - (population ? 0 : 1))
end