Module: Enumerable

Defined in:
lib/viral_seq/enumerable.rb

Overview

additional statistic/math functions to Module::Enumerable

Examples:

median number

array = [1,2,3,4,5,6,7,8,9,10]
array.median
=> 5.5

average number (mean)

array = [1,2,3,4,5,6,7,8,9,10]
array.mean
=> 5.5

sample variance

array = [1,2,3,4,5,6,7,8,9,10]
array.sample_variance
=> 9.166666666666666

standard deviation

array = [1,2,3,4,5,6,7,8,9,10]
array.stdev
=> 3.0276503540974917

upper quartile

array = [1,2,3,4,5,6,7,8,9,10]
array.upper_quartile
=> 7.5

lower_quartile

array = [1,2,3,4,5,6,7,8,9,10]
array.lower_quartile
=> 3.5

count frequency of elements in an array

array = %w{cat dog monkey cat cat cat monkey}
array.count_freq
=> {"cat"=>4, "dog"=>1, "monkey"=>2}

count frequency as percentage of elements in an array

array = %w{cat dog monkey cat cat cat monkey}
array.count_freq2
=> {"cat"=>0.57, "dog"=>0.14, "monkey"=>0.29}

Instance Method Summary collapse

Instance Method Details

#count_freqObject

tabulate elements and frequencies of an Enumerable return [Hash] return a hash of :element => :freq_count



97
98
99
100
101
102
103
# File 'lib/viral_seq/enumerable.rb', line 97

def count_freq
  hash = Hash.new(0)
  self.each do |element|
    hash[element] +=1
  end
  return hash
end

#count_freq2(decimal = 2) ⇒ Object

tabulate elements and frequencies (as percentage) of an Enumerable { return [Hash] return a hash of :element => :percentage

Parameters:

  • decimal (Integer) (defaults to: 2)

    decimals of frequency



109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/viral_seq/enumerable.rb', line 109

def count_freq2(decimal = 2)
  hash1 = Hash.new(0)
  self.each do |element|
    hash1[element] += 1
  end
  total_elements = self.size
  hash2 = Hash.new(0)
  hash1.each do |key,value|
    hash2[key] = (value/total_elements.to_f).round(decimal)
  end
  return hash2
end

#lower_quartileNumeric

generate lower quartile value

Returns:

  • (Numeric)

    lower quartile value



81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/viral_seq/enumerable.rb', line 81

def lower_quartile
  return nil if self.empty?
  sorted_array = self.sort
  u = 0.25*sorted_array.length + 1
  if (u-u.truncate).is_a?(Integer)
    return sorted_array[(u-u.truncate)-1]
  else
    sample = sorted_array[u.truncate.abs-1]
    sample1 = sorted_array[(u.truncate.abs)]
    return sample+((sample1-sample)*(u-u.truncate))
  end
end

#meanFloat

generate mean number

Returns:

  • (Float)

    mean value



46
47
48
# File 'lib/viral_seq/enumerable.rb', line 46

def mean
  self.sum/self.length.to_f
end

#medianNumeric

generate median number

Returns:

  • (Numeric)

    median number



38
39
40
41
42
# File 'lib/viral_seq/enumerable.rb', line 38

def median
  len = self.length
  sorted = self.sort
  len % 2 == 1 ? sorted[len/2] : (sorted[len/2 - 1] + sorted[len/2]).to_f / 2
end

#sample_varianceFloat

generate sample variance

Returns:

  • (Float)

    sample variance



52
53
54
55
56
# File 'lib/viral_seq/enumerable.rb', line 52

def sample_variance
  m = self.mean
  sum = self.inject(0){|accum, i| accum + (i-m)**2 }
  sum/(self.length - 1).to_f
end

#stdevFloat

generate standard deviation

Returns:

  • (Float)

    standard deviation



60
61
62
# File 'lib/viral_seq/enumerable.rb', line 60

def stdev
  return Math.sqrt(self.sample_variance)
end

#upper_quartileNumeric

generate upper quartile value

Returns:

  • (Numeric)

    upper quartile value



66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/viral_seq/enumerable.rb', line 66

def upper_quartile
  return nil if self.empty?
  sorted_array = self.sort
  u = (0.25*(3*sorted_array.length))
  if (u-u.truncate).is_a?(Integer)
    return sorted_array[(u-u.truncate)-1]
  else
    sample = sorted_array[u.truncate.abs-1]
    sample1 = sorted_array[(u.truncate.abs)]
    return sample+((sample1-sample)*(u-u.truncate))
  end
end