Class: Align::SmithWaterman

Inherits:
PairwiseAlgorithm show all
Defined in:
lib/align/smith_waterman.rb

Overview

Align two sequences via [SmithWaterman.align] References:

www.avatar.se/molbioinfo2001/dynprog/dynamic.html

Constant Summary collapse

SCORING_DEFAULT =

Default scoring for

BasicScoring.new(2,-1,-3)

Instance Attribute Summary collapse

Attributes inherited from PairwiseAlgorithm

#scoring, #seq1, #seq2

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from PairwiseAlgorithm

#max2, #max3, #max4

Constructor Details

#initialize(seq1, seq2, opts = {}) ⇒ SmithWaterman

Returns a new instance of SmithWaterman.

Parameters:

  • seq1 (#[], #size)

    The first sequence

  • seq2 (#[], #size)

    The second sequence

  • opts (Hash) (defaults to: {})

    Options

Options Hash (opts):

  • :scoring (SmithWatermanScoring) — default: SmithWatermanScoring

    An instance of a scoring object.

  • :skip_obj (Object) — default: nil

    An object to shove into the gaps of the aligned sequences



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/align/smith_waterman.rb', line 20

def initialize(seq1, seq2, opts = {})
  super(seq1, seq2, opts[:scoring] || SCORING_DEFAULT)

  @max_score = nil
  @max_row = nil
  @max_col = nil

  @rows = @seq1.size + 1
  @cols = @seq2.size + 1

  @skip_obj = opts[:skip_obj] || nil

  @matrix = Array.new(@rows) do 
    Array.new(@cols)
  end

  fill()
end

Instance Attribute Details

#colsObject (readonly)

Returns the value of attribute cols.



9
10
11
# File 'lib/align/smith_waterman.rb', line 9

def cols
  @cols
end

#matrixObject (readonly)

Returns the value of attribute matrix.



9
10
11
# File 'lib/align/smith_waterman.rb', line 9

def matrix
  @matrix
end

#max_colObject (readonly)

Returns the value of attribute max_col.



9
10
11
# File 'lib/align/smith_waterman.rb', line 9

def max_col
  @max_col
end

#max_rowObject (readonly)

Returns the value of attribute max_row.



9
10
11
# File 'lib/align/smith_waterman.rb', line 9

def max_row
  @max_row
end

#max_scoreObject (readonly)

Returns the value of attribute max_score.



9
10
11
# File 'lib/align/smith_waterman.rb', line 9

def max_score
  @max_score
end

#rowsObject (readonly)

Returns the value of attribute rows.



9
10
11
# File 'lib/align/smith_waterman.rb', line 9

def rows
  @rows
end

Class Method Details

.align(seq1, seq2, opts = {}) ⇒ Object

Aligns two sequences together.

Parameters:



152
153
154
# File 'lib/align/smith_waterman.rb', line 152

def self.align(seq1, seq2, opts = {})
  self.new(seq1, seq2, opts).align
end

Instance Method Details

#align(r = @max_row, c = @max_col) ⇒ Object

Returns the sequences in aligned arrays. Gaps are filled with :skip_obj

Parameters:

  • r (Integer) (defaults to: @max_row)

    The row to traceback from

  • c (Integer) (defaults to: @max_col)

    The column to traceback from

Returns:

  • Two arrays containing the sequences, and their elements.



128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/align/smith_waterman.rb', line 128

def align(r = @max_row, c = @max_col)
  alignment_1 = []
  alignment_2 = []

  traceback(r,c) do |i, j, flags|
    seq1_val = seq2_val = @skip_obj
    case flags
    when :align
      seq1_val = @seq1[i]
      seq2_val = @seq2[j]
    when :insert
      seq2_val = @seq2[j]
    when :delete
      seq1_val = @seq1[i]
    end
    alignment_1.unshift seq1_val
    alignment_2.unshift seq2_val
  end

  [alignment_1, alignment_2]
end

#fillObject

Fills the matrix with the alignment map.



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/align/smith_waterman.rb', line 40

def fill
  @matrix[0][0] = 0
  # Set up the first column on each row.
  1.upto(@rows-1) {|i| @matrix[i][0] = 0}
  # Set up the first row 
  1.upto(@cols-1) {|j| @matrix[0][j] = 0}

  1.upto(@rows-1) do |i|
    prv_row = @matrix[i-1]
    cur_row = @matrix[i]

    1.upto(@cols-1) do |j|
      
      seq1_obj = @seq1[i-1]
      seq2_obj = @seq2[j-1]

      # Calculate the score.
      score_align = prv_row[j-1] + @scoring.score_align(seq1_obj, seq2_obj)
      score_delete = prv_row[j] + @scoring.score_delete(seq1_obj)
      score_insert = cur_row[j-1] + @scoring.score_insert(seq2_obj)
      max = max4(score_align, score_delete, score_insert, 0)

      if @max_score.nil? || max >= @max_score
        @max_score = max
        @max_row = i
        @max_col = j
      end

      @matrix[i][j] = max
    end
  end
end

#traceback(i = @max_row, j = @max_col) {|i, j, step| ... } ⇒ Object

Traces backward, finding the alignment.

Parameters:

  • i (Integer) (defaults to: @max_row)

    The row to traceback from

  • j (Integer) (defaults to: @max_col)

    The column to traceback from

Yields:

  • (i, j, step)

Yield Parameters:

  • i (Integer)

    The location in sequence one

  • j (Integer)

    The location in sequence two

  • step (Integer)

    The direction we took



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/align/smith_waterman.rb', line 80

def traceback(i = @max_row, j = @max_col)

  while (i > 0 && j > 0) && @matrix[i][j] > 0
    score = @matrix[i][j]

    seq1_obj = @seq1[i-1]
    seq2_obj = @seq2[j-1]

    score_align = @matrix[i-1][j-1] + @scoring.score_align(seq1_obj, seq2_obj)
    score_delete = @matrix[i-1][j] + @scoring.score_delete(seq1_obj)
    score_insert = @matrix[i][j-1] + @scoring.score_insert(seq2_obj)

    flags = 0
    need_select = false

    if score == score_align
      flags = :align
      i-=1
      j-=1
    elsif score == score_delete
      flags = :delete
      i-=1
    else
      flags = :insert
      j-=1
    end

    yield(i,j,flags) 
  end # while

end

#traceback_array(r = @max_row, c = @max_col) ⇒ Object

Like traceback, but returns an array of the traceback instead of yielding blocks.

Parameters:

  • r (Integer) (defaults to: @max_row)

    The row to traceback from

  • c (Integer) (defaults to: @max_col)

    The column to traceback from



116
117
118
119
120
121
122
# File 'lib/align/smith_waterman.rb', line 116

def traceback_array(r = @max_row, c = @max_col)
  trace = []
  traceback(r,c) do |i,j,flags|
    trace << [i,j,flags]
  end
  trace
end