Class: Macroape::PWMCompareAligned

Inherits:
Object
  • Object
show all
Defined in:
lib/macroape/pwm_compare_aligned.rb,
lib/macroape/aligned_pair_intersection.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(first_unaligned, second_unaligned, shift, orientation) ⇒ PWMCompareAligned

Returns a new instance of PWMCompareAligned.



6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/macroape/pwm_compare_aligned.rb', line 6

def initialize(first_unaligned, second_unaligned, shift, orientation)
  @shift, @orientation = shift, orientation

  @first_length, @second_length = first_unaligned.length, second_unaligned.length
  @length = self.class.calculate_alignment_length(@first_length, @second_length, @shift)

  first, second = first_unaligned, second_unaligned
  second = second.reverse_complement  if revcomp?
  
  if shift > 0
    second = second.left_augment(shift)
  else
    first = first.left_augment(-shift)
  end

  @first = first.right_augment(@length - first.length)
  @second = second.right_augment(@length - second.length)
end

Instance Attribute Details

#firstObject (readonly)

Returns the value of attribute first.



5
6
7
# File 'lib/macroape/pwm_compare_aligned.rb', line 5

def first
  @first
end

#first_lengthObject (readonly)

Returns the value of attribute first_length.



5
6
7
# File 'lib/macroape/pwm_compare_aligned.rb', line 5

def first_length
  @first_length
end

#lengthObject (readonly)

Returns the value of attribute length.



5
6
7
# File 'lib/macroape/pwm_compare_aligned.rb', line 5

def length
  @length
end

#orientationObject (readonly)

Returns the value of attribute orientation.



5
6
7
# File 'lib/macroape/pwm_compare_aligned.rb', line 5

def orientation
  @orientation
end

#secondObject (readonly)

Returns the value of attribute second.



5
6
7
# File 'lib/macroape/pwm_compare_aligned.rb', line 5

def second
  @second
end

#second_lengthObject (readonly)

Returns the value of attribute second_length.



5
6
7
# File 'lib/macroape/pwm_compare_aligned.rb', line 5

def second_length
  @second_length
end

#shiftObject (readonly)

Returns the value of attribute shift.



5
6
7
# File 'lib/macroape/pwm_compare_aligned.rb', line 5

def shift
  @shift
end

Class Method Details

.calculate_alignment_length(first_len, second_len, shift) ⇒ Object



127
128
129
130
131
132
133
# File 'lib/macroape/pwm_compare_aligned.rb', line 127

def self.calculate_alignment_length(first_len, second_len, shift)
  if shift > 0
    [first_len, second_len + shift].max
  else
    [first_len - shift, second_len].max
  end
end

Instance Method Details

#alignment_infosObject



56
57
58
59
60
61
62
# File 'lib/macroape/pwm_compare_aligned.rb', line 56

def alignment_infos
  {shift: shift,
  orientation: orientation,
  text: "#{first_pwm_alignment}\n#{second_pwm_alignment}",
  overlap: overlap,
  alignment_length: length}
end

#counts_for_two_matrices(threshold_first, threshold_second) ⇒ Object

unoptimized version of this and related methods



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
# File 'lib/macroape/aligned_pair_intersection.rb', line 4

def counts_for_two_matrices(threshold_first, threshold_second)
  # just not to call method each time
  first_background = first.background
  second_background = second.background
  unless first_background == second_background
    first_result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score }
    second_result = get_counts(threshold_first, threshold_second) {|score,letter| second_background[letter] * score }
    return [first_result, second_result]
  end
  if first.background == [1,1,1,1]
    result = get_counts(threshold_first, threshold_second) {|score,letter| score}
    [result, result]
  else
    result = get_counts(threshold_first, threshold_second) {|score,letter| first_background[letter] * score }
    [result, result]
  end
end

#direct?Boolean

Returns:

  • (Boolean)


25
26
27
# File 'lib/macroape/pwm_compare_aligned.rb', line 25

def direct?
  orientation == :direct
end

#first_overlaps?(pos) ⇒ Boolean

whether first matrix overlap specified position of alignment

Returns:

  • (Boolean)


65
66
67
68
69
70
71
72
# File 'lib/macroape/pwm_compare_aligned.rb', line 65

def first_overlaps?(pos)
  return false unless pos >= 0 && pos < length
  if shift > 0
    pos < first_length
  else
    pos >= -shift && pos < -shift + first_length
  end
end

#first_pwm_alignmentObject



36
37
38
39
40
41
42
43
44
# File 'lib/macroape/pwm_compare_aligned.rb', line 36

def first_pwm_alignment
  length.times.map do |pos|
    if first_overlaps?(pos)
      '>'
    else
      '.'
    end
  end.join
end

#get_counts(threshold_first, threshold_second, &count_contribution_block) ⇒ Object

block has form: {|score,letter| contribution to count by ‘letter` with `score` }



24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/macroape/aligned_pair_intersection.rb', line 24

def get_counts(threshold_first, threshold_second, &count_contribution_block)
  # scores_on_first_pwm, scores_on_second_pwm --> count
  scores = { 0 => {0 => 1} }
  length.times do |column|
    new_scores = recalc_score_hash(scores,
                      first.matrix[column], second.matrix[column],
                      threshold_first - first.best_suffix(column + 1),
                      threshold_second - second.best_suffix(column + 1), &count_contribution_block)
    scores.replace(new_scores)
    if max_hash_size && scores.inject(0){|sum,hsh|sum + hsh.size} > max_hash_size
      raise 'Hash overflow in Macroape::AlignedPairIntersection#counts_for_two_matrices_with_different_probabilities'
    end
  end
  scores.inject(0.0){|sum,(score_first, hsh)| sum + hsh.inject(0.0){|sum,(score_second, count)| sum + count }}      
end

#jaccard(first_threshold, second_threshold) ⇒ Object

def discrete(rate)

  PWMCompareAligned.new(first.discrete(rate), second.discrete(rate))
end

def sort_pair_of_matrices_by(&block)
  mat = first.pwm.zip(second.pwm).sort_by(&block).transpose
  PWMCompareAligned.new(SinglePWM(mat[0],first.probabilities), SinglePWM(mat[1], second.probabilities))
end
def sort_decreasing_max
  PWMCompareAligned.new(*sort_pair_of_matrices_by{|col_pair| -col_pair[0].max} )
end
def sort_increasing_min
  PWMCompareAligned.new(*sort_pair_of_matrices_by{|col_pair| col_pair[0].min} )
end
def permute_columns(permutation_index)
  PWMCompareAligned.new(first.permute(permutation_index), second.permute(permutation_index))
end


103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
# File 'lib/macroape/pwm_compare_aligned.rb', line 103

def jaccard(first_threshold, second_threshold)
  f = first.counts_by_thresholds(first_threshold).first
  s = second.counts_by_thresholds(second_threshold).first
  if f == 0 || s == 0
    return {similarity: -1, tanimoto: -1, recognized_by_both: 0,
          recognized_by_first: f,
          recognized_by_second: s,
        }
  end

  intersect = counts_for_two_matrices(first_threshold, second_threshold)
  intersect = Math.sqrt(intersect[0] * intersect[1])
  union = f + s - intersect
  similarity = intersect.to_f / union
  { similarity: similarity,  tanimoto: 1.0 - similarity,  recognized_by_both: intersect,
    recognized_by_first: f,  recognized_by_second: s }
end

#jaccard_by_pvalue(pvalue) ⇒ Object



121
122
123
124
125
# File 'lib/macroape/pwm_compare_aligned.rb', line 121

def jaccard_by_pvalue(pvalue)
  threshold_first = first.threshold(pvalue)
  threshold_second = second.threshold(pvalue)
  jaccard(threshold_first, threshold_second)
end

#max_hash_size(*args) ⇒ Object



141
142
143
144
145
146
147
# File 'lib/macroape/pwm_compare_aligned.rb', line 141

def max_hash_size(*args)
  case args.size
  when 0 then @max_hash_size
  when 1 then max_hash_size!(args.first)
  else raise ArgumentError, '#max_hash_size method can get 0 or 1 argument'
  end
end

#max_hash_size!(new_max_hash_size) ⇒ Object

sets or gets limit of summary size of calculation hash. It’s a defence against overuse CPU resources by non-appropriate data



136
137
138
139
# File 'lib/macroape/pwm_compare_aligned.rb', line 136

def max_hash_size!(new_max_hash_size)
  @max_hash_size = new_max_hash_size
  self
end

#overlapObject



32
33
34
# File 'lib/macroape/pwm_compare_aligned.rb', line 32

def overlap
  length.times.count{|pos| first_overlaps?(pos) && second_overlaps?(pos) }
end

#recalc_score_hash(scores, first_column, second_column, least_sufficient_first, least_sufficient_second) ⇒ Object

wouldn’t work without count_contribution_block



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
# File 'lib/macroape/aligned_pair_intersection.rb', line 41

def recalc_score_hash(scores, first_column, second_column, least_sufficient_first, least_sufficient_second)
  new_scores = Hash.new{|h,k| h[k] = Hash.new(0)}
  scores.each do |score_first, second_scores|
    second_scores.each do |score_second, count|

      4.times do |letter|
        new_score_first = score_first + first_column[letter]
        if new_score_first >= least_sufficient_first
          new_score_second = score_second + second_column[letter]
          if new_score_second >= least_sufficient_second
            new_scores[new_score_first][new_score_second] += yield(count, letter)
          end
        end
      end
      
    end
  end
  new_scores
end

#revcomp?Boolean

Returns:

  • (Boolean)


28
29
30
# File 'lib/macroape/pwm_compare_aligned.rb', line 28

def revcomp?
  orientation == :revcomp
end

#second_overlaps?(pos) ⇒ Boolean

Returns:

  • (Boolean)


74
75
76
77
78
79
80
81
# File 'lib/macroape/pwm_compare_aligned.rb', line 74

def second_overlaps?(pos)
  return false unless pos >= 0 && pos < length
  if shift > 0
    pos >= shift && pos < shift + second_length
  else
    pos < second_length
  end
end

#second_pwm_alignmentObject



46
47
48
49
50
51
52
53
54
# File 'lib/macroape/pwm_compare_aligned.rb', line 46

def second_pwm_alignment
  length.times.map do |pos|
    if second_overlaps?(pos)
      direct? ? '>' : '<'
    else
      '.'
    end
  end.join
end