Class: Bioinform::PWM
- Inherits:
-
Object
- Object
- Bioinform::PWM
- Defined in:
- lib/macroape/counting.rb
Instance Method Summary collapse
- #count_by_threshold(threshold) ⇒ Object
- #count_distribution ⇒ Object
- #count_distribution_after_threshold(threshold) ⇒ Object
- #count_distribution_under_pvalue(max_pvalue) ⇒ Object
- #counts_by_thresholds(*thresholds) ⇒ Object
- #pvalue_by_threshold(threshold) ⇒ Object
- #recalc_score_hash(scores, column, least_sufficient) ⇒ Object
- #threshold(pvalue) ⇒ Object
- #threshold_and_real_pvalue(pvalue) ⇒ Object
- #thresholds(*pvalues) ⇒ Object
-
#thresholds_by_pvalues(*pvalues) ⇒ Object
ret-value: hash => [thresholds, counts] thresholds = left_threshold ..
- #weak_threshold(pvalue) ⇒ Object
- #weak_threshold_and_real_pvalue(pvalue) ⇒ Object
-
#weak_thresholds(*pvalues) ⇒ Object
“weak” means that threshold has real pvalue not less than given pvalue, while usual threshold not greater.
Instance Method Details
#count_by_threshold(threshold) ⇒ Object
113 114 115 |
# File 'lib/macroape/counting.rb', line 113 def count_by_threshold(threshold) counts_by_thresholds(threshold)[threshold] end |
#count_distribution ⇒ Object
88 89 90 |
# File 'lib/macroape/counting.rb', line 88 def count_distribution @count_distribution ||= count_distribution_after_threshold(worst_score) end |
#count_distribution_after_threshold(threshold) ⇒ Object
78 79 80 81 82 83 84 85 86 |
# File 'lib/macroape/counting.rb', line 78 def count_distribution_after_threshold(threshold) return @count_distribution.select{|score, count| score >= threshold} if @count_distribution scores = { 0 => 1 } length.times do |column| scores.replace recalc_score_hash(scores, @matrix[column], threshold - best_suffix(column + 1)) raise 'Hash overflow in PWM::ThresholdByPvalue#count_distribution_after_threshold' if max_hash_size && scores.size > max_hash_size end scores end |
#count_distribution_under_pvalue(max_pvalue) ⇒ Object
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/macroape/counting.rb', line 39 def count_distribution_under_pvalue(max_pvalue) cnt_distribution = {} look_for_count = max_pvalue * vocabulary_volume until cnt_distribution.inject(0.0){|sum,(score,count)| sum + count} >= look_for_count begin approximate_threshold = threshold_gauss_estimation(max_pvalue) rescue approximate_threshold = worst_score end cnt_distribution = count_distribution_after_threshold(approximate_threshold) max_pvalue *=2 # if estimation counted too small amount of words - try to lower threshold estimation by doubling pvalue end cnt_distribution end |
#counts_by_thresholds(*thresholds) ⇒ Object
105 106 107 108 109 110 111 |
# File 'lib/macroape/counting.rb', line 105 def counts_by_thresholds(*thresholds) scores = count_distribution_after_threshold(thresholds.min) thresholds.inject({}){ |hsh, threshold| hsh[threshold] = scores.inject(0.0){|sum,(score,count)| (score >= threshold) ? sum + count : sum} hsh } end |
#pvalue_by_threshold(threshold) ⇒ Object
117 118 119 |
# File 'lib/macroape/counting.rb', line 117 def pvalue_by_threshold(threshold) count_by_threshold(threshold) / vocabulary_volume end |
#recalc_score_hash(scores, column, least_sufficient) ⇒ Object
92 93 94 95 96 97 98 99 100 101 102 103 |
# File 'lib/macroape/counting.rb', line 92 def recalc_score_hash(scores, column, least_sufficient) new_scores = Hash.new(0) scores.each do |score, count| 4.times do |letter| new_score = score + column[letter] if new_score >= least_sufficient new_scores[new_score] += count * background[letter] end end end new_scores end |
#threshold(pvalue) ⇒ Object
8 9 10 |
# File 'lib/macroape/counting.rb', line 8 def threshold(pvalue) thresholds(pvalue){|_, thresh, _| return thresh } end |
#threshold_and_real_pvalue(pvalue) ⇒ Object
11 12 13 |
# File 'lib/macroape/counting.rb', line 11 def threshold_and_real_pvalue(pvalue) thresholds(pvalue){|_, thresh, real_pv| return thresh, real_pv } end |
#thresholds(*pvalues) ⇒ Object
21 22 23 24 25 26 27 |
# File 'lib/macroape/counting.rb', line 21 def thresholds(*pvalues) thresholds_by_pvalues(*pvalues).each do |pvalue,(thresholds, counts)| threshold = thresholds.begin + 0.1 * (thresholds.end - thresholds.begin) real_pvalue = counts.end.to_f / vocabulary_volume yield pvalue, threshold, real_pvalue end end |
#thresholds_by_pvalues(*pvalues) ⇒ Object
ret-value: hash => [thresholds, counts] thresholds = left_threshold .. right_threshold (left_threshold < right_threshold) counts = left_count .. right_count (left_count > right_count)
59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 |
# File 'lib/macroape/counting.rb', line 59 def thresholds_by_pvalues(*pvalues) sorted_scores = count_distribution_under_pvalue(pvalues.max).sort.reverse scores = sorted_scores.map{|score,count| score} counts = sorted_scores.map{|score,count| count} partial_sums = counts.partial_sums results = {} pvalue_counts = pvalues.sort.collect_hash{|pvalue| [pvalue, pvalue * vocabulary_volume] } pvalue_counts.map do |pvalue,look_for_count| ind = partial_sums.index{|sum| sum >= look_for_count} minscore, count_at_minscore = scores[ind], partial_sums[ind] maxscore, count_at_maxscore = ind > 0 ? [ scores[ind-1], partial_sums[ind-1] ] : [ best_score + 1.0, 0.0 ] results[pvalue] = [(minscore .. maxscore), (count_at_minscore .. count_at_maxscore)] end results end |
#weak_threshold(pvalue) ⇒ Object
14 15 16 |
# File 'lib/macroape/counting.rb', line 14 def weak_threshold(pvalue) weak_thresholds(pvalue){|_, thresh, _| return thresh } end |
#weak_threshold_and_real_pvalue(pvalue) ⇒ Object
17 18 19 |
# File 'lib/macroape/counting.rb', line 17 def weak_threshold_and_real_pvalue(pvalue) weak_thresholds(pvalue){|_, thresh, real_pv| return thresh, real_pv } end |
#weak_thresholds(*pvalues) ⇒ Object
“weak” means that threshold has real pvalue not less than given pvalue, while usual threshold not greater
30 31 32 33 34 35 36 |
# File 'lib/macroape/counting.rb', line 30 def weak_thresholds(*pvalues) thresholds_by_pvalues(*pvalues).each do |pvalue,(thresholds, counts)| threshold = thresholds.begin.to_f real_pvalue = counts.begin.to_f / vocabulary_volume yield pvalue, threshold, real_pvalue end end |