Class: RubyStatistics::StatisticalTest::WilcoxonRankSumTest

Inherits:

Object

Object
RubyStatistics::StatisticalTest::WilcoxonRankSumTest

show all

Defined in:: lib/ruby-statistics/statistical_test/wilcoxon_rank_sum_test.rb

Instance Method Summary collapse

#perform(alpha, tails, group_one, group_two) ⇒ Object

Steps to perform the calculation are based on www.mit.edu/~6.s085/notes/lecture5.pdf.
#rank(elements) ⇒ Object

Instance Method Details

#perform(alpha, tails, group_one, group_two) ⇒ `Object`

Steps to perform the calculation are based on www.mit.edu/~6.s085/notes/lecture5.pdf

# File 'lib/ruby-statistics/statistical_test/wilcoxon_rank_sum_test.rb', line 22

def perform(alpha, tails, group_one, group_two)
  # Size for each group
  n1, n2 = group_one.size, group_two.size

  # Rank all data
  total_ranks = rank(group_one + group_two)

  # sum rankings per group
  r1 = ranked_sum_for(total_ranks, group_one)
  r2 = ranked_sum_for(total_ranks, group_two)

  # calculate U statistic
  u1 = (n1 * (n1 + 1)/2.0) - r1
  u2 = (n2 * (n2 + 1)/2.0 ) - r2

  u_statistic = [u1.abs, u2.abs].min

  median_u = (n1 * n2)/2.0

  ties = total_ranks.values.select { |element| element[:counter] > 1 }

  std_u = if ties.size > 0
            corrected_sigma(ties, n1, n2)
          else
            Math.sqrt((n1 * n2 * (n1 + n2 + 1))/12.0)
          end

  z = (u_statistic - median_u)/std_u

  # Most literature are not very specific about the normal distribution to be used.
  # We ran multiple tests with a Normal(median_u, std_u) and Normal(0, 1) and we found
  # the latter to be more aligned with the results.
  probability = Distribution::StandardNormal.new.cumulative_function(z.abs)
  p_value = 1 - probability
  p_value *= 2 if tails == :two_tail

  { probability: probability,
    u: u_statistic,
    z: z,
    p_value: p_value,
    alpha: alpha,
    null: alpha < p_value,
    alternative: p_value <= alpha,
    confidence_level: 1 - alpha }
end

#rank(elements) ⇒ `Object`

# File 'lib/ruby-statistics/statistical_test/wilcoxon_rank_sum_test.rb', line 4

def rank(elements)
  ranked_elements = {}

  elements.sort.each_with_index do |element, index|
    if ranked_elements.fetch(element, false)
      # This allow us to solve the ties easily when performing the rank summation per group
      ranked_elements[element][:counter] += 1
      ranked_elements[element][:rank] += (index + 1)
    else
      ranked_elements[element] = { counter: 1, rank: (index + 1) }
    end
  end

  # ranked_elements = [{ x => { counter: 1, rank: y } ]
  ranked_elements
end

Class: RubyStatistics::StatisticalTest::WilcoxonRankSumTest

Instance Method Summary collapse

Instance Method Details

#perform(alpha, tails, group_one, group_two) ⇒ Object

#rank(elements) ⇒ Object

#perform(alpha, tails, group_one, group_two) ⇒ `Object`

#rank(elements) ⇒ `Object`