Top Level Namespace

Defined Under Namespace

Modules: Expcalc, Numo Classes: Array, Hash, NMatrix, Pvalue

Instance Method Summary collapse

#binom(n, k) ⇒ Object
#compute_hyper_prob(a, b, c, d, n) ⇒ Object
#cummin(array) ⇒ Object
#get_benjaminiHochberg_pvalues(arr_pvalues) ⇒ Object

to cmpute adjusted pvalues rosettacode.org/wiki/P-value_correction#Ruby.
#get_fisher_exact_test(listA, listB, all_elements_count, tail = 'two_sided', weigths = nil, partial_weigths = true) ⇒ Object

TODO: Make a pull request to rubygems.org/gems/ruby-statistics, with all the statistic code implemented here.
#get_less_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count) ⇒ Object
#get_two_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count) ⇒ Object
#order(array, decreasing = false) ⇒ Object
#pmin(array) ⇒ Object

Instance Method Details

#binom(n, k) ⇒ `Object`

# File 'lib/expcalc/pvalue_expansion.rb', line 99

def binom(n,k)
	if k > 0 && k < n
		res = (1+n-k..n).inject(:*)/(1..k).inject(:*)
	else
		res = 1
	end
end

#compute_hyper_prob(a, b, c, d, n) ⇒ `Object`

# File 'lib/expcalc/pvalue_expansion.rb', line 91

def compute_hyper_prob(a, b, c, d, n)
	# https://en.wikipedia.org/wiki/Fisher%27s_exact_test
	binomA = binom(a + b, a)
	binomC = binom(c + d, c)
	divisor = binom(n, a + c)
	return (binomA * binomC).fdiv(divisor)
end

#cummin(array) ⇒ `Object`

# File 'lib/expcalc/pvalue_expansion.rb', line 130

def cummin(array)
	cumulative_min = array.first
	arr_cummin = []
	array.each do |p|
		cumulative_min = [p, cumulative_min].min
		arr_cummin << cumulative_min
	end
	return arr_cummin
end

#get_benjaminiHochberg_pvalues(arr_pvalues) ⇒ `Object`

to cmpute adjusted pvalues rosettacode.org/wiki/P-value_correction#Ruby

# File 'lib/expcalc/pvalue_expansion.rb', line 109

def get_benjaminiHochberg_pvalues(arr_pvalues)
	n = arr_pvalues.length
	arr_o = order(arr_pvalues, true)
	arr_cummin_input = []
	(0..(n - 1)).each do |i|
		arr_cummin_input[i] = (n / (n - i).to_f) * arr_pvalues[arr_o[i]]
	end
	arr_ro = order(arr_o)
	arr_cummin = cummin(arr_cummin_input)
	arr_pmin = pmin(arr_cummin)
	return arr_pmin.values_at(*arr_ro)
end

#get_fisher_exact_test(listA, listB, all_elements_count, tail = 'two_sided', weigths = nil, partial_weigths = true) ⇒ `Object`

TODO: Make a pull request to rubygems.org/gems/ruby-statistics, with all the statistic code implemented here. to cmpute fisher exact test Fisher => www.biostathandbook.com/fishers.html TODO:NetAnalyzer when performs hyper simple metric use Rubystats::FishersExactTest, compare results and mody implementation.

# File 'lib/expcalc/pvalue_expansion.rb', line 5

def get_fisher_exact_test(listA, listB, all_elements_count, tail ='two_sided', weigths=nil, partial_weigths=true)
	#puts '-', listA.inspect, listB.inspect, '-'
	listA_listB = listA & listB
	listA_nolistB = listA - listB
	nolistA_listB = listB - listA
	if weigths.nil?
		listA_listB_count = listA_listB.length
		listA_nolistB_count = listA_nolistB.length
		nolistA_listB_count = nolistA_listB.length
		nolistA_nolistB_count = all_elements_count - (listA | listB).length
	else
		# Fisher exact test weigthed as proposed in Improved scoring of functional groups from gene expression data by decorrelating GO graph structure
		# https://academic.oup.com/bioinformatics/article/22/13/1600/193669
		listA_listB_count = listA_listB.map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
		listA_nolistB_count = listA_nolistB.map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
		nolistA_listB_count = nolistA_listB.map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil

		if partial_weigths
			nolistA_nolistB_count = all_elements_count - (listA | listB).length
			all_elements_count = nolistA_nolistB_count + listA_listB_count + listA_nolistB_count + nolistA_listB_count
		else
			nolistA_nolistB_count = (weigths.keys - (listA | listB)).map{|i| weigths[i]}.inject(0){|sum, n| sum + n}.ceil
			all_elements_count = weigths.values.inject(0){|sum, n| sum + n}.ceil
		end
	end
	#puts [listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count].inspect
	if tail == 'two_sided'
		accumulated_prob = get_two_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
	elsif tail == 'less' 
		accumulated_prob = get_less_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
	end
	return accumulated_prob
end

#get_less_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count) ⇒ `Object`

# File 'lib/expcalc/pvalue_expansion.rb', line 77

def get_less_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
	accumulated_prob = 0
	[listA_listB_count, nolistA_nolistB_count].min.times do |n|
		accumulated_prob += compute_hyper_prob(
			listA_listB_count - n, 
			listA_nolistB_count + n, 
			nolistA_listB_count + n, 
			nolistA_nolistB_count - n, 
			all_elements_count
		)
	end
	return accumulated_prob
end

#get_two_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count) ⇒ `Object`

# File 'lib/expcalc/pvalue_expansion.rb', line 39

def get_two_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count)
	#https://www.sheffield.ac.uk/polopoly_fs/1.43998!/file/tutorial-9-fishers.pdf
	accumulated_prob = 0
	ref_prob = compute_hyper_prob(
		listA_listB_count, 
		listA_nolistB_count, 
		nolistA_listB_count, 
		nolistA_nolistB_count, 
		all_elements_count
	)
	accumulated_prob += ref_prob
	[listA_listB_count, nolistA_nolistB_count].min.times do |n| #less
		n += 1
		prob = compute_hyper_prob(
			listA_listB_count - n, 
			listA_nolistB_count + n, 
			nolistA_listB_count + n, 
			nolistA_nolistB_count - n, 
			all_elements_count
		)
		prob <= ref_prob ? accumulated_prob += prob : break
	end

	[listA_nolistB_count, nolistA_listB_count].min.times do |n| #greater
		n += 1
		prob = compute_hyper_prob(
			listA_listB_count + n, 
			listA_nolistB_count - n, 
			nolistA_listB_count - n, 
			nolistA_nolistB_count + n, 
			all_elements_count
		)
		accumulated_prob += prob if prob <= ref_prob
	end

	return accumulated_prob
end

#order(array, decreasing = false) ⇒ `Object`

# File 'lib/expcalc/pvalue_expansion.rb', line 122

def order(array, decreasing = false)
	if decreasing == false
		array.sort.map { |n| array.index(n) }
	else
		array.sort.map { |n| array.index(n) }.reverse
	end
end

#pmin(array) ⇒ `Object`

# File 'lib/expcalc/pvalue_expansion.rb', line 140

def pmin(array)
	x = 1
	pmin_array = []
	array.each_index do |i|
		pmin_array[i] = [array[i], x].min
		abort if pmin_array[i] > 1
	end
	return pmin_array
end

Top Level Namespace

Defined Under Namespace

Instance Method Summary collapse

Instance Method Details

#binom(n, k) ⇒ Object

#compute_hyper_prob(a, b, c, d, n) ⇒ Object

#cummin(array) ⇒ Object

#get_benjaminiHochberg_pvalues(arr_pvalues) ⇒ Object

#get_fisher_exact_test(listA, listB, all_elements_count, tail = 'two_sided', weigths = nil, partial_weigths = true) ⇒ Object

#get_less_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count) ⇒ Object

#get_two_tail(listA_listB_count, listA_nolistB_count, nolistA_listB_count, nolistA_nolistB_count, all_elements_count) ⇒ Object

#order(array, decreasing = false) ⇒ Object

#pmin(array) ⇒ Object