Module: SSW
- Defined in:
- lib/libssw.rb,
lib/ssw/dna.rb,
lib/ssw/aaseq.rb,
lib/ssw/align.rb,
lib/ssw/libssw.rb,
lib/ssw/profile.rb,
lib/ssw/version.rb,
lib/ssw/BLOSUM50.rb,
lib/ssw/BLOSUM62.rb
Defined Under Namespace
Modules: AASeq, DNA, LibSSW Classes: Align, Error, Profile
Constant Summary collapse
- VERSION =
'0.0.5'- BLOSUM50 =
[ # A R N D C Q E G H I L K M F P S T W Y V B Z X * 5, -2, -1, -2, -1, -1, -1, 0, -2, -1, -2, -1, -1, -3, -1, 1, 0, -3, -2, 0, -2, -1, -1, -5, # A -2, 7, -1, -2, -4, 1, 0, -3, 0, -4, -3, 3, -2, -3, -3, -1, -1, -3, -1, -3, -1, 0, -1, -5, # R -1, -1, 7, 2, -2, 0, 0, 0, 1, -3, -4, 0, -2, -4, -2, 1, 0, -4, -2, -3, 5, 0, -1, -5, # N -2, -2, 2, 8, -4, 0, 2, -1, -1, -4, -4, -1, -4, -5, -1, 0, -1, -5, -3, -4, 6, 1, -1, -5, # D -1, -4, -2, -4, 13, -3, -3, -3, -3, -2, -2, -3, -2, -2, -4, -1, -1, -5, -3, -1, -3, -3, -1, -5, # C -1, 1, 0, 0, -3, 7, 2, -2, 1, -3, -2, 2, 0, -4, -1, 0, -1, -1, -1, -3, 0, 4, -1, -5, # Q -1, 0, 0, 2, -3, 2, 6, -3, 0, -4, -3, 1, -2, -3, -1, -1, -1, -3, -2, -3, 1, 5, -1, -5, # E 0, -3, 0, -1, -3, -2, -3, 8, -2, -4, -4, -2, -3, -4, -2, 0, -2, -3, -3, -4, -1, -2, -1, -5, # G -2, 0, 1, -1, -3, 1, 0, -2, 10, -4, -3, 0, -1, -1, -2, -1, -2, -3, 2, -4, 0, 0, -1, -5, # H -1, -4, -3, -4, -2, -3, -4, -4, -4, 5, 2, -3, 2, 0, -3, -3, -1, -3, -1, 4, -4, -3, -1, -5, # I -2, -3, -4, -4, -2, -2, -3, -4, -3, 2, 5, -3, 3, 1, -4, -3, -1, -2, -1, 1, -4, -3, -1, -5, # L -1, 3, 0, -1, -3, 2, 1, -2, 0, -3, -3, 6, -2, -4, -1, 0, -1, -3, -2, -3, 0, 1, -1, -5, # K -1, -2, -2, -4, -2, 0, -2, -3, -1, 2, 3, -2, 7, 0, -3, -2, -1, -1, 0, 1, -3, -1, -1, -5, # M -3, -3, -4, -5, -2, -4, -3, -4, -1, 0, 1, -4, 0, 8, -4, -3, -2, 1, 4, -1, -4, -4, -1, -5, # F -1, -3, -2, -1, -4, -1, -1, -2, -2, -3, -4, -1, -3, -4, 10, -1, -1, -4, -3, -3, -2, -1, -1, -5, # P 1, -1, 1, 0, -1, 0, -1, 0, -1, -3, -3, 0, -2, -3, -1, 5, 2, -4, -2, -2, 0, 0, -1, -5, # S 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 2, 5, -3, -2, 0, 0, -1, -1, -5, # T -3, -3, -4, -5, -5, -1, -3, -3, -3, -3, -2, -3, -1, 1, -4, -4, -3, 15, 2, -3, -5, -2, -1, -5, # W -2, -1, -2, -3, -3, -1, -2, -3, 2, -1, -1, -2, 0, 4, -3, -2, -2, 2, 8, -1, -3, -2, -1, -5, # Y 0, -3, -3, -4, -1, -3, -3, -4, -4, 4, 1, -3, 1, -1, -3, -2, 0, -3, -1, 5, -3, -3, -1, -5, # V -2, -1, 5, 6, -3, 0, 1, -1, 0, -4, -4, 0, -3, -4, -2, 0, 0, -5, -3, -3, 6, 1, -1, -5, # B -1, 0, 0, 1, -3, 4, 5, -2, 0, -3, -3, 1, -1, -4, -1, 0, -1, -2, -2, -3, 1, 5, -1, -5, # Z -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -5, # X -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, -5, 1 # * ]
- BLOSUM62 =
[ # A R N D C Q E G H I L K M F P S T W Y V B Z X * 4, -1, -2, -2, 0, -1, -1, 0, -2, -1, -1, -1, -1, -2, -1, 1, 0, -3, -2, 0, -2, -1, 0, -4, # A -1, 5, 0, -2, -3, 1, 0, -2, 0, -3, -2, 2, -1, -3, -2, -1, -1, -3, -2, -3, -1, 0, -1, -4, # R -2, 0, 6, 1, -3, 0, 0, 0, 1, -3, -3, 0, -2, -3, -2, 1, 0, -4, -2, -3, 3, 0, -1, -4, # N -2, -2, 1, 6, -3, 0, 2, -1, -1, -3, -4, -1, -3, -3, -1, 0, -1, -4, -3, -3, 4, 1, -1, -4, # D 0, -3, -3, -3, 9, -3, -4, -3, -3, -1, -1, -3, -1, -2, -3, -1, -1, -2, -2, -1, -3, -3, -2, -4, # C -1, 1, 0, 0, -3, 5, 2, -2, 0, -3, -2, 1, 0, -3, -1, 0, -1, -2, -1, -2, 0, 3, -1, -4, # Q -1, 0, 0, 2, -4, 2, 5, -2, 0, -3, -3, 1, -2, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4, # E 0, -2, 0, -1, -3, -2, -2, 6, -2, -4, -4, -2, -3, -3, -2, 0, -2, -2, -3, -3, -1, -2, -1, -4, # G -2, 0, 1, -1, -3, 0, 0, -2, 8, -3, -3, -1, -2, -1, -2, -1, -2, -2, 2, -3, 0, 0, -1, -4, # H -1, -3, -3, -3, -1, -3, -3, -4, -3, 4, 2, -3, 1, 0, -3, -2, -1, -3, -1, 3, -3, -3, -1, -4, # I -1, -2, -3, -4, -1, -2, -3, -4, -3, 2, 4, -2, 2, 0, -3, -2, -1, -2, -1, 1, -4, -3, -1, -4, # L -1, 2, 0, -1, -3, 1, 1, -2, -1, -3, -2, 5, -1, -3, -1, 0, -1, -3, -2, -2, 0, 1, -1, -4, # K -1, -1, -2, -3, -1, 0, -2, -3, -2, 1, 2, -1, 5, 0, -2, -1, -1, -1, -1, 1, -3, -1, -1, -4, # M -2, -3, -3, -3, -2, -3, -3, -3, -1, 0, 0, -3, 0, 6, -4, -2, -2, 1, 3, -1, -3, -3, -1, -4, # F -1, -2, -2, -1, -3, -1, -1, -2, -2, -3, -3, -1, -2, -4, 7, -1, -1, -4, -3, -2, -2, -1, -2, -4, # P 1, -1, 1, 0, -1, 0, 0, 0, -1, -2, -2, 0, -1, -2, -1, 4, 1, -3, -2, -2, 0, 0, 0, -4, # S 0, -1, 0, -1, -1, -1, -1, -2, -2, -1, -1, -1, -1, -2, -1, 1, 5, -2, -2, 0, -1, -1, 0, -4, # T -3, -3, -4, -4, -2, -2, -3, -2, -2, -3, -2, -3, -1, 1, -4, -3, -2, 11, 2, -3, -4, -3, -2, -4, # W -2, -2, -2, -3, -2, -1, -2, -3, 2, -1, -1, -2, -1, 3, -3, -2, -2, 2, 7, -1, -3, -2, -1, -4, # Y 0, -3, -3, -3, -1, -2, -2, -3, -3, 3, 1, -2, 1, -1, -2, -2, 0, -3, -1, 4, -3, -2, -1, -4, # V -2, -1, 3, 4, -3, 0, 1, -1, 0, -3, -4, 0, -3, -3, -2, 0, -1, -4, -3, -3, 4, 1, -1, -4, # B -1, 0, 0, 1, -3, 3, 4, -2, 0, -3, -3, 1, -1, -3, -1, 0, -1, -3, -2, -2, 1, 4, -1, -4, # Z 0, -1, -1, -1, -2, -1, -1, -1, -1, -1, -1, -1, -1, -1, -2, 0, 0, -2, -1, -1, -1, -1, -1, -4, # X -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, -4, 1, # * ]
Class Attribute Summary collapse
-
.ffi_lib ⇒ Object
Returns the value of attribute ffi_lib.
Class Method Summary collapse
-
.align(prof, ref, weight_gap0, weight_gapE, flag, filters, filterd, mask_len = nil) ⇒ Align
Do Striped Smith-Waterman alignment.
-
.align_destroy(align) ⇒ Object
Release the memory allocated by function ssw_align.
-
.build_path(q_seq, r_seq, align) ⇒ Array
TODO: fix variable names.
-
.create_scoring_matrix(elements, match_score, mismatch_score) ⇒ Object
Create scoring matrix of Smith-Waterman algrithum.
-
.init(read, mat, n = nil, score_size: 2) ⇒ Object
Create the query profile using the query sequence.
-
.init_destroy(profile) ⇒ Object
Release the memory allocated by function ssw_init.
-
.mark_mismatch(ref_begin1, read_begin1, read_end1, ref, read, read_len, cigar, cigar_len) ⇒ Integer
1.
Class Attribute Details
.ffi_lib ⇒ Object
Returns the value of attribute ffi_lib.
13 14 15 |
# File 'lib/libssw.rb', line 13 def ffi_lib @ffi_lib end |
Class Method Details
.align(prof, ref, weight_gap0, weight_gapE, flag, filters, filterd, mask_len = nil) ⇒ Align
Do Striped Smith-Waterman alignment.
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 |
# File 'lib/libssw.rb', line 139 def align(prof, ref, weight_gap0, weight_gapE, flag, filters, filterd, mask_len = nil) unless prof.is_a?(Fiddle::Pointer) || prof.is_a?(Profile) || prof.respond_to?(:to_ptr) raise ArgumentError, 'Expect class of filename to be Profile or Pointer' end raise ArgumentError, 'Expect class of ref to be Array' unless ref.is_a?(Array) ref_str = ref.pack('c*') ref_len = ref.size mask_len ||= [ref_len / 2, 15].max ptr = LibSSW.ssw_align( prof, ref_str, ref_len, weight_gap0, weight_gapE, flag, filters, filterd, mask_len ) # Garbage collection workaround: # Keep ref_str alive while the C code might still need it. # However, since Align.new immediately reads all values and calls align_destroy, # the C memory is freed immediately, so ref_str only needs to live until then. # We store it on ptr just to be safe during the Align.new call. ptr.instance_variable_set(:@ref_str, ref_str) SSW::Align.new(ptr) end |
.align_destroy(align) ⇒ Object
Release the memory allocated by function ssw_align.
163 164 165 166 167 168 169 170 171 172 |
# File 'lib/libssw.rb', line 163 def align_destroy(align) if align.is_a?(Align) warn "You don't need to call this method for Ruby's Align class." nil elsif align.is_a?(Fiddle::Pointer) || align.respond_to?(:to_ptr) LibSSW.align_destroy(align) else raise ArgumentError, 'Expect class of align to be Pointer' end end |
.build_path(q_seq, r_seq, align) ⇒ Array
TODO: fix variable names
224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 |
# File 'lib/libssw.rb', line 224 def build_path(q_seq, r_seq, align) sQ = '' sA = '' sR = '' q_off = align.read_begin1 r_off = align.ref_begin1 align.cigar.each do |x| n = x >> 4 m = x & 15 c = m > 8 ? 'M' : 'MIDNSHP=X'[m] case c when 'M' sQ += q_seq[q_off...(q_off + n)] sA += Array.new(n) { |j| q_seq[q_off + j] == r_seq[r_off + j] ? '|' : '*' }.join sR += r_seq[r_off...(r_off + n)] q_off += n r_off += n when 'I' sQ += q_seq[q_off...(q_off + n)] sA += ' ' * n sR += ' ' * n q_off += n when 'D' sQ += ' ' * n sA += ' ' * n sR += r_seq[r_off...(r_off + n)] r_off += n end end [align.cigar_string, sQ, sA, sR] end |
.create_scoring_matrix(elements, match_score, mismatch_score) ⇒ Object
Create scoring matrix of Smith-Waterman algrithum.
207 208 209 210 211 212 213 214 215 216 217 |
# File 'lib/libssw.rb', line 207 def create_scoring_matrix(elements, match_score, mismatch_score) size = elements.size score = Array.new(size * size, 0) (size - 1).times do |i| (size - 1).times do |j| score[i * size + j] = \ (elements[i] == elements[j] ? match_score : mismatch_score) end end score end |
.init(read, mat, n = nil, score_size: 2) ⇒ Object
Create the query profile using the query sequence.
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/libssw.rb', line 44 def init(read, mat, n = nil, score_size: 2) read = read.to_a mat = mat.to_a.flatten raise ArgumentError, 'Expect class of read to be Array' unless read.is_a?(Array) raise ArgumentError, 'Expect class of mat to be Array' unless mat.is_a?(Array) read_str = read.pack('c*') read_len = read.size n = Math.sqrt(mat.size) if n.nil? raise "Not a square matrix. size: #{mat.size}, n: #{n}" if mat.size != n * n mat_str = mat.flatten.pack('c*') ptr = LibSSW.ssw_init( read_str, read_len, mat_str, n, score_size ) # Garbage collection workaround: # The C library stores pointers to read and mat without copying the data. # We must keep the Ruby strings (read_str, mat_str) alive for the lifetime # of the profile structure to prevent segmentation faults. # # We cannot use Fiddle's automatic memory management (ptr.free) here because: # - Calling init_destroy from Ruby's GC causes segmentation violations # - The user should explicitly call SSW.init_destroy when done, or let # Ruby's GC clean up the profile structure itself (though the contained # profile_byte/profile_word will leak unless init_destroy is called) ptr.instance_variable_set(:@read_str, read_str) ptr.instance_variable_set(:@mat_str, mat_str) ptr.instance_variable_set(:@read_len, read_len) ptr.instance_variable_set(:@n, n) ptr.instance_variable_set(:@score_size, score_size) SSW::Profile.new(ptr) end |
.init_destroy(profile) ⇒ Object
Ruby has garbage collection, so there is not much reason to call this method.
Release the memory allocated by function ssw_init.
87 88 89 90 91 92 93 |
# File 'lib/libssw.rb', line 87 def init_destroy(profile) unless profile.is_a?(Fiddle::Pointer) || profile.is_a?(Profile) || profile.respond_to?(:to_ptr) raise ArgumentError, 'Expect class of profile to be Profile or Pointer' end LibSSW.init_destroy(profile) end |
.mark_mismatch(ref_begin1, read_begin1, read_end1, ref, read, read_len, cigar, cigar_len) ⇒ Integer
This method takes a Fiddle::Pointer as an argument. Please read the source code and understand it well before using this method. (Needs to be improved)
-
Calculate the number of mismatches.
-
Modify the cigar string:
differentiate matches (=), mismatches(X), and softclip(S).
196 197 198 199 200 201 |
# File 'lib/libssw.rb', line 196 def mark_mismatch(ref_begin1, read_begin1, read_end1, ref, read, read_len, cigar, cigar_len) warn 'implementation: fiexme: **cigar' # FIXME LibSSW.mark_mismatch( ref_begin1, read_begin1, read_end1, ref.pack('c*'), read.pack('c*'), read_len, cigar, cigar_len.pack('l*') ) end |