Class: Array

Inherits:
Object
  • Object
show all
Defined in:
lib/array_statistics.rb

Instance Method Summary collapse

Instance Method Details

#<<(*as_args) ⇒ Object



266
267
268
269
270
# File 'lib/array_statistics.rb', line 266

def <<(*as_args) 
  ret = as_old_app(*as_args)
  dirty
  ret
end

#[]=(*as_args) ⇒ Object



259
260
261
262
263
# File 'lib/array_statistics.rb', line 259

def []=(*as_args) 
  ret = as_old_ass(*as_args)
  dirty
  ret 
end

#as_old_appObject



265
# File 'lib/array_statistics.rb', line 265

alias as_old_app <<

#as_old_assObject

Side-note: there are a lot of array methods that don’t follow the destructive-methods-end-in-! naming pattern



258
# File 'lib/array_statistics.rb', line 258

alias as_old_ass []=

#as_old_collectObject



279
# File 'lib/array_statistics.rb', line 279

alias as_old_collect collect!

#as_old_fillObject



293
# File 'lib/array_statistics.rb', line 293

alias as_old_fill fill

#as_old_flattenObject



300
# File 'lib/array_statistics.rb', line 300

alias as_old_flatten flatten!

#as_old_map!Object



286
# File 'lib/array_statistics.rb', line 286

alias as_old_map! map!

#as_old_pushObject



272
# File 'lib/array_statistics.rb', line 272

alias as_old_push push

#as_old_replaceObject



307
# File 'lib/array_statistics.rb', line 307

alias as_old_replace replace

#as_old_reverseObject



314
# File 'lib/array_statistics.rb', line 314

alias as_old_reverse reverse!

#as_old_unshiftObject



321
# File 'lib/array_statistics.rb', line 321

alias as_old_unshift unshift

#average(&value_block) ⇒ Object

Returns the average of the values in this array.

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



198
199
200
201
202
# File 'lib/array_statistics.rb', line 198

def average(&value_block) # :yields: element
  s = sum(&value_block)
  s = s.to_f if s.integer?
  s / length
end

#cleanObject



245
246
247
# File 'lib/array_statistics.rb', line 245

def clean 
  @as_sort_dirty = false
end

#collect!(*as_args, &block) ⇒ Object



280
281
282
283
284
# File 'lib/array_statistics.rb', line 280

def collect!(*as_args, &block) 
  ret = as_old_collect(*as_args, &block)
  dirty
  ret
end

#dirtyObject

:stopdoc:



236
237
238
# File 'lib/array_statistics.rb', line 236

def dirty 
  @as_sort_dirty = true    
end

#dirty?Boolean

Returns:

  • (Boolean)


240
241
242
243
# File 'lib/array_statistics.rb', line 240

def dirty? 
  dirty unless instance_variable_defined? :@as_sort_dirty
  @as_sort_dirty
end

#fill(*as_args, &block) ⇒ Object



294
295
296
297
298
# File 'lib/array_statistics.rb', line 294

def fill(*as_args, &block) 
  ret = as_old_fill(*as_args, &block)
  dirty
  ret
end

#flatten!(*as_args, &block) ⇒ Object



301
302
303
304
305
# File 'lib/array_statistics.rb', line 301

def flatten!(*as_args, &block) 
  ret = as_old_flatten(*as_args, &block)
  dirty
  ret
end

#map!(*as_args, &block) ⇒ Object



287
288
289
290
291
# File 'lib/array_statistics.rb', line 287

def map!(*as_args, &block) 
  ret = as_old_map!(*as_args, &block)
  dirty
  ret
end

#median(sort_required = true, &value_block) ⇒ Object

Get the median value of this array.

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



58
59
60
61
62
63
64
65
66
67
68
# File 'lib/array_statistics.rb', line 58

def median(sort_required=true, &value_block) # :yields: element
  return 0 if empty? #to reduce instances of calling math methods on nil.
  value_block = proc{|element| element} unless block_given?

  median_index_arr = median_indices(&value_block)
  median_vals = median_index_arr.collect do |element_index|
    value_block.call(self[element_index])
  end

  median_vals.average
end

#median_indices(sort_required = true, &value_block) ⇒ Object

returns either a single or double-value array containing the index or surrounding indeces

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/array_statistics.rb', line 74

def median_indices(sort_required=true, &value_block) # :yields: element
  return 0 if length == 0
  value_block = proc{|element| element} unless block_given?
  if(sort_required) 
    sort! do |x, y|
      value_block.call(x) <=> value_block.call(y)
    end
  end

  median_index_arr = [length / 2]
  if(length % 2 == 0)
    median_index_arr.unshift(median_index_arr[0]-1)
  end

  median_index_arr
end

#old_sort!Object



220
# File 'lib/array_statistics.rb', line 220

alias old_sort! sort!

#outlier_threshold_indices(quartile_range_factor = 1.5, &value_block) ⇒ Object

Returns an array with two values. The first value is the index of the last low outlier in this sorted array (this array will be sorted as a side-effect of this method) or nil if there are no low-end outliers The second value is the index of the first high outlier in this sorted array or nil if there are no high-end outliers

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# File 'lib/array_statistics.rb', line 148

def outlier_threshold_indices(quartile_range_factor=1.5, &value_block) # :yields: element
  value_block = proc{|element| element} unless block_given?
  thresholds = outlier_thresholds(quartile_range_factor, &value_block) #this sorts self!
  low_index = -1
  while(value_block.call(self[low_index +1] ) < thresholds[0]) do
    low_index = low_index + 1
  end
  low_index = nil if(low_index == -1)

  high_index = length
  while(value_block.call(self[high_index -1] ) > thresholds[1]) do
    high_index = high_index - 1
  end
  high_index = nil if(high_index == length) 
  return [low_index, high_index]
end

#outlier_thresholds(quartile_range_factor = 1.5, &value_block) ⇒ Object

returns an array with two values: The first value is the low outlier threshhold for this data set The second value is the high outlier threshhold for this data set

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/array_statistics.rb', line 171

def outlier_thresholds(quartile_range_factor=1.5, &value_block) # :yields: element
  value_block = proc{|element| element} unless block_given?

  quartile_is = quartile_indices(&value_block)
  q1 = quartile_is[0].collect do |element_index|
    value_block.call(self[element_index])
  end
  q1 = q1.average

  q3 = quartile_is[1].collect do |element_index|
    value_block.call(self[element_index])
  end
  q3 = q3.average

  interquartile_range = q3-q1
  # puts("Interquartile Range = [#{q1} <=> #{q3}], quartile range factor: #{quartile_range_factor}")
  low_outlier_threshold = q1 - (interquartile_range * quartile_range_factor)
  high_outlier_threshold = q3 + (interquartile_range * quartile_range_factor)
  # puts("Outlier Thresholds = [#{low_outlier_threshold} <=> #{high_outlier_threshold}]")

  return [low_outlier_threshold, high_outlier_threshold]
end

#outliers(quartile_range_factor = 1.5, &value_block) ⇒ Object

returns an array containing all the outliers in this set

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



115
116
117
118
119
120
121
122
123
124
# File 'lib/array_statistics.rb', line 115

def outliers(quartile_range_factor=1.5, &value_block) # :yields: element
  value_block = proc{|element| element} unless block_given?
  outlier_arr = []
  outlier_threshold_is = outlier_threshold_indices(quartile_range_factor, &value_block)
  # puts("Outlier Thresholds Indeces = [#{outlier_threshold_is[0]} <=> #{outlier_threshold_is[1]}]")
  outlier_arr << self[0..outlier_threshold_is[0]] unless outlier_threshold_is[0].nil?
  # puts("upper outliers = #{self[outlier_threshold_is[1]..-1] }")
  outlier_arr << self[outlier_threshold_is[1]..-1] unless outlier_threshold_is[1].nil?
  return outlier_arr.flatten
end

#percentile(percent_less_than, &value_block) ⇒ Object

Get percent order statistic based on “order statistic” from here: mathworld.wolfram.com/topics/RankStatistics.html Given some percentage between 0 and 1 (inclusive) return the greatest value in the subarray of this array which contains the bottom percent_less_than values of this array.

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



19
20
21
22
23
24
25
26
27
28
29
30
# File 'lib/array_statistics.rb', line 19

def percentile(percent_less_than, &value_block) #  :yields: element
  value_block = proc{|element| element} unless block_given?
  sort! do |x, y|
    value_block.call(x) <=> value_block.call(y)
  end

  percent_less_than = 0 if percent_less_than < 0
  return nil if percent_less_than == 0
  percent_less_than = 1 if percent_less_than > 1
  percentile_i = (percent_less_than * (length-1)).floor
  return self[percentile_i]
end

#percentile_rank(value, &value_block) ⇒ Object

Get percent rank based on “statistical rank” from here:mathworld.wolfram.com/topics/RankStatistics.html Given some value, find the percentage of its rank in this array the number returned will be the number between 0 and 1 (inclusive) which represents the percentage of values in this array which are less than or equal to the value passed in.

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/array_statistics.rb', line 39

def percentile_rank(value, &value_block) # :yields: element
  value_block = proc{|element| element} unless block_given?
  sort! do |x, y|
    value_block.call(x) <=> value_block.call(y)
  end
  return 0.0 if value < value_block.call(self[0])
  each_index do |i|
    if(value_block.call(self[i]) > value)

      return i.to_f/length 
    end
  end
  return 1
end

#push(*as_args) ⇒ Object



273
274
275
276
277
# File 'lib/array_statistics.rb', line 273

def push(*as_args)
  ret = as_old_push(*as_args)
  dirty
  ret
end

#quartile_indices(&value_block) ⇒ Object

returns an array with 2 values. The values are the first and third quartile indices following the same rules as the results of the median_indices method

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'lib/array_statistics.rb', line 95

def quartile_indices(&value_block) # :yields: element
  median_i = median_indices(&value_block)
  low_end = median_i[0] 
  high_start = median_i[median_i.length() -1] 

  low_arr = self[0..low_end]
  high_arr = self[high_start..-1]

  q1_indices = low_arr.median_indices(false, &value_block)
  q3_indices = high_arr.median_indices(false, &value_block)
  q3_indices.collect! do |index|
    index + high_start
  end
  return [q1_indices, q3_indices]
end

#remove_outliers!(quartile_range_factor = 1.5, &value_block) ⇒ Object

removes all the outliers from this set and returns them.

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



130
131
132
133
134
135
136
137
138
139
# File 'lib/array_statistics.rb', line 130

def remove_outliers!(quartile_range_factor=1.5, &value_block) # :yields: element
  outlier_arr = []
  outlier_threshold_is = outlier_threshold_indices(quartile_range_factor, &value_block)

  outlier_arr = outlier_arr + self.slice!(0..outlier_threshold_is[0]) unless outlier_threshold_is[0].nil?
  #the preceeding slice!() will, of course shift the upper outlier threshold index down
  high_outlier_index_offset = (outlier_threshold_is[0].nil?)? 0 : outlier_threshold_is[0] + 1
  outlier_arr = outlier_arr + self.slice!(outlier_threshold_is[1]-high_outlier_index_offset..-1) unless outlier_threshold_is[1].nil?
  return outlier_arr
end

#replace(*as_args, &block) ⇒ Object



308
309
310
311
312
# File 'lib/array_statistics.rb', line 308

def replace(*as_args, &block) 
  ret = as_old_replace(*as_args, &block)
  dirty
  ret
end

#reverse!(*as_args, &block) ⇒ Object



315
316
317
318
319
# File 'lib/array_statistics.rb', line 315

def reverse!(*as_args, &block) 
  ret = as_old_reverse(*as_args, &block)
  dirty
  ret
end

#sort!(&comparison_block) ⇒ Object

Adds smarter sorting: A lot of methods above need the array sorted and they may call one another. This sort! method only sorts the array if it isn’t currently known to be sorted.

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



226
227
228
229
230
231
232
233
# File 'lib/array_statistics.rb', line 226

def sort!(&comparison_block) # :yields: element
   if(dirty? || (comparison_block != @as_last_comparison_block)) 
     old_sort!(&comparison_block)
     @as_last_comparison_block = comparison_block
     clean
   end
   self
end

#sum(&value_block) ⇒ Object

Returns the sum of all the values in this array.

Like most methods in this package, this method takes an optional block that defines the “value” of the objects in the array. This block can be safely skipped if the array contains numbers.



208
209
210
211
212
213
214
215
# File 'lib/array_statistics.rb', line 208

def sum(&value_block) # :yields: element
  value_block = proc{|element| element} unless block_given?
  s = 0
  each  do |element| 
    s = s + value_block.call(element)
  end
  s
end

#unshift(*as_args, &block) ⇒ Object



322
323
324
325
326
# File 'lib/array_statistics.rb', line 322

def unshift(*as_args, &block) 
  ret = as_old_unshift(*as_args, &block)
  dirty
  ret
end