Class: Macroape::PWMCompareAligned
- Inherits:
-
Object
- Object
- Macroape::PWMCompareAligned
- Defined in:
- lib/macroape/pwm_compare_aligned.rb,
lib/macroape/aligned_pair_intersection.rb
Instance Attribute Summary collapse
-
#first ⇒ Object
readonly
Returns the value of attribute first.
-
#first_length ⇒ Object
readonly
Returns the value of attribute first_length.
-
#length ⇒ Object
readonly
Returns the value of attribute length.
-
#orientation ⇒ Object
readonly
Returns the value of attribute orientation.
-
#second ⇒ Object
readonly
Returns the value of attribute second.
-
#second_length ⇒ Object
readonly
Returns the value of attribute second_length.
-
#shift ⇒ Object
readonly
Returns the value of attribute shift.
Class Method Summary collapse
Instance Method Summary collapse
- #alignment_infos ⇒ Object
-
#counts_for_two_matrices(threshold_first, threshold_second) ⇒ Object
unoptimized version of this and related methods.
- #direct? ⇒ Boolean
-
#first_overlaps?(pos) ⇒ Boolean
whether first matrix overlap specified position of alignment.
- #first_pwm_alignment ⇒ Object
-
#get_counts(threshold_first, threshold_second, &count_contribution_block) ⇒ Object
block has form: {|score,letter| contribution to count by ‘letter` with `score` }.
-
#initialize(first_unaligned, second_unaligned, shift, orientation) ⇒ PWMCompareAligned
constructor
A new instance of PWMCompareAligned.
-
#jaccard(first_threshold, second_threshold) ⇒ Object
def discrete(rate) PWMCompareAligned.new(first.discrete(rate), second.discrete(rate)) end.
- #jaccard_by_pvalue(pvalue) ⇒ Object
- #max_hash_size(*args) ⇒ Object
-
#max_hash_size!(new_max_hash_size) ⇒ Object
sets or gets limit of summary size of calculation hash.
- #overlap ⇒ Object
-
#recalc_score_hash(scores, first_column, second_column, least_sufficient_first, least_sufficient_second) ⇒ Object
wouldn’t work without count_contribution_block.
- #revcomp? ⇒ Boolean
- #second_overlaps?(pos) ⇒ Boolean
- #second_pwm_alignment ⇒ Object
Constructor Details
#initialize(first_unaligned, second_unaligned, shift, orientation) ⇒ PWMCompareAligned
Returns a new instance of PWMCompareAligned.
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 6 def initialize(first_unaligned, second_unaligned, shift, orientation) @shift, @orientation = shift, orientation @first_length, @second_length = first_unaligned.length, second_unaligned.length @length = self.class.calculate_alignment_length(@first_length, @second_length, @shift) first, second = first_unaligned, second_unaligned second = second.reverse_complement if revcomp? if shift > 0 second = second.left_augment(shift) else first = first.left_augment(-shift) end @first = first.right_augment(@length - first.length) @second = second.right_augment(@length - second.length) end |
Instance Attribute Details
#first ⇒ Object (readonly)
Returns the value of attribute first.
5 6 7 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 5 def first @first end |
#first_length ⇒ Object (readonly)
Returns the value of attribute first_length.
5 6 7 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 5 def first_length @first_length end |
#length ⇒ Object (readonly)
Returns the value of attribute length.
5 6 7 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 5 def length @length end |
#orientation ⇒ Object (readonly)
Returns the value of attribute orientation.
5 6 7 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 5 def orientation @orientation end |
#second ⇒ Object (readonly)
Returns the value of attribute second.
5 6 7 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 5 def second @second end |
#second_length ⇒ Object (readonly)
Returns the value of attribute second_length.
5 6 7 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 5 def second_length @second_length end |
#shift ⇒ Object (readonly)
Returns the value of attribute shift.
5 6 7 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 5 def shift @shift end |
Class Method Details
.calculate_alignment_length(first_len, second_len, shift) ⇒ Object
127 128 129 130 131 132 133 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 127 def self.calculate_alignment_length(first_len, second_len, shift) if shift > 0 [first_len, second_len + shift].max else [first_len - shift, second_len].max end end |
Instance Method Details
#alignment_infos ⇒ Object
56 57 58 59 60 61 62 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 56 def alignment_infos {shift: shift, orientation: orientation, text: "#{first_pwm_alignment}\n#{second_pwm_alignment}", overlap: overlap, alignment_length: length} end |
#counts_for_two_matrices(threshold_first, threshold_second) ⇒ Object
unoptimized version of this and related methods
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 |
# File 'lib/macroape/aligned_pair_intersection.rb', line 4 def counts_for_two_matrices(threshold_first, threshold_second) # just not to call method each time first_background = first.background second_background = second.background unless first_background == second_background first_result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score } second_result = get_counts(threshold_first, threshold_second) {|score,letter| second_background[letter] * score } return [first_result, second_result] end if first.background == [1,1,1,1] result = get_counts(threshold_first, threshold_second) {|score,letter| score} [result, result] else result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score } [result, result] end end |
#direct? ⇒ Boolean
25 26 27 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 25 def direct? orientation == :direct end |
#first_overlaps?(pos) ⇒ Boolean
whether first matrix overlap specified position of alignment
65 66 67 68 69 70 71 72 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 65 def first_overlaps?(pos) return false unless pos >= 0 && pos < length if shift > 0 pos < first_length else pos >= -shift && pos < -shift + first_length end end |
#first_pwm_alignment ⇒ Object
36 37 38 39 40 41 42 43 44 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 36 def first_pwm_alignment length.times.map do |pos| if first_overlaps?(pos) '>' else '.' end end.join end |
#get_counts(threshold_first, threshold_second, &count_contribution_block) ⇒ Object
block has form: {|score,letter| contribution to count by ‘letter` with `score` }
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/macroape/aligned_pair_intersection.rb', line 24 def get_counts(threshold_first, threshold_second, &count_contribution_block) # scores_on_first_pwm, scores_on_second_pwm --> count scores = { 0 => {0 => 1} } length.times do |column| new_scores = recalc_score_hash(scores, first.matrix[column], second.matrix[column], threshold_first - first.best_suffix(column + 1), threshold_second - second.best_suffix(column + 1), &count_contribution_block) scores.replace(new_scores) if max_hash_size && scores.inject(0){|sum,hsh|sum + hsh.size} > max_hash_size raise 'Hash overflow in Macroape::AlignedPairIntersection#counts_for_two_matrices_with_different_probabilities' end end scores.inject(0.0){|sum,(score_first, hsh)| sum + hsh.inject(0.0){|sum,(score_second, count)| sum + count }} end |
#jaccard(first_threshold, second_threshold) ⇒ Object
def discrete(rate)
PWMCompareAligned.new(first.discrete(rate), second.discrete(rate))
end
def sort_pair_of_matrices_by(&block)
mat = first.pwm.zip(second.pwm).sort_by(&block).transpose
PWMCompareAligned.new(SinglePWM(mat[0],first.probabilities), SinglePWM(mat[1], second.probabilities))
end
def sort_decreasing_max
PWMCompareAligned.new(*sort_pair_of_matrices_by{|col_pair| -col_pair[0].max} )
end
def sort_increasing_min
PWMCompareAligned.new(*sort_pair_of_matrices_by{|col_pair| col_pair[0].min} )
end
def permute_columns(permutation_index)
PWMCompareAligned.new(first.permute(permutation_index), second.permute(permutation_index))
end
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 103 def jaccard(first_threshold, second_threshold) f = first.counts_by_thresholds(first_threshold).first s = second.counts_by_thresholds(second_threshold).first if f == 0 || s == 0 return {similarity: -1, tanimoto: -1, recognized_by_both: 0, recognized_by_first: f, recognized_by_second: s, } end intersect = counts_for_two_matrices(first_threshold, second_threshold) intersect = Math.sqrt(intersect[0] * intersect[1]) union = f + s - intersect similarity = intersect.to_f / union { similarity: similarity, tanimoto: 1.0 - similarity, recognized_by_both: intersect, recognized_by_first: f, recognized_by_second: s } end |
#jaccard_by_pvalue(pvalue) ⇒ Object
121 122 123 124 125 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 121 def jaccard_by_pvalue(pvalue) threshold_first = first.threshold(pvalue) threshold_second = second.threshold(pvalue) jaccard(threshold_first, threshold_second) end |
#max_hash_size(*args) ⇒ Object
141 142 143 144 145 146 147 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 141 def max_hash_size(*args) case args.size when 0 then @max_hash_size when 1 then max_hash_size!(args.first) else raise ArgumentError, '#max_hash_size method can get 0 or 1 argument' end end |
#max_hash_size!(new_max_hash_size) ⇒ Object
sets or gets limit of summary size of calculation hash. It’s a defence against overuse CPU resources by non-appropriate data
136 137 138 139 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 136 def max_hash_size!(new_max_hash_size) @max_hash_size = new_max_hash_size self end |
#overlap ⇒ Object
32 33 34 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 32 def overlap length.times.count{|pos| first_overlaps?(pos) && second_overlaps?(pos) } end |
#recalc_score_hash(scores, first_column, second_column, least_sufficient_first, least_sufficient_second) ⇒ Object
wouldn’t work without count_contribution_block
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
# File 'lib/macroape/aligned_pair_intersection.rb', line 41 def recalc_score_hash(scores, first_column, second_column, least_sufficient_first, least_sufficient_second) new_scores = Hash.new{|h,k| h[k] = Hash.new(0)} scores.each do |score_first, second_scores| second_scores.each do |score_second, count| 4.times do |letter| new_score_first = score_first + first_column[letter] if new_score_first >= least_sufficient_first new_score_second = score_second + second_column[letter] if new_score_second >= least_sufficient_second new_scores[new_score_first][new_score_second] += yield(count, letter) end end end end end new_scores end |
#revcomp? ⇒ Boolean
28 29 30 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 28 def revcomp? orientation == :revcomp end |
#second_overlaps?(pos) ⇒ Boolean
74 75 76 77 78 79 80 81 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 74 def second_overlaps?(pos) return false unless pos >= 0 && pos < length if shift > 0 pos >= shift && pos < shift + second_length else pos < second_length end end |
#second_pwm_alignment ⇒ Object
46 47 48 49 50 51 52 53 54 |
# File 'lib/macroape/pwm_compare_aligned.rb', line 46 def second_pwm_alignment length.times.map do |pos| if second_overlaps?(pos) direct? ? '>' : '<' else '.' end end.join end |