Module: SmithWaterman

Defined in:
lib/rbbt/util/misc/ssw.rb

Overview

From: github.com/mengyao/Complete-Striped-Smith-Waterman-Library Citation: SSW Library: An SIMD Smith-Waterman C/C++ Library for Use in Genomic Applications

Mengyao Zhao, Wan-Ping Lee, Gabor T. Marth 
http://arxiv.org/abs/1208.6350

Class Method Summary collapse

Class Method Details

.align(query_sequence, target_sequence) ⇒ Object



241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
# File 'lib/rbbt/util/misc/ssw.rb', line 241

def self.align(query_sequence, target_sequence)
  Log.low { "Aligning #{ Misc.fingerprint query_sequence } to #{ Misc.fingerprint target_sequence }" }

  begin
    raise "No query sequence" if query_sequence.nil?
    raise "No target sequence" if target_sequence.nil?

    s_out = Misc.open_pipe do |s_in|
      SmithWaterman.ssw_aa(query_sequence, target_sequence, query_sequence.length, target_sequence.length, s_in.fileno)
    end

    txt = s_out.read
    s_out.close
    s_out.join
    txt

    target_start, target, target_end = txt.match(/Target:\s+(\d+)\s+([A-Z\-?*]+)\s+(\d+)/).values_at 1, 2, 3

    query_start, query, query_end = txt.match(/Query:\s+(\d+)\s+([A-Z\-?*]+)\s+(\d+)/).values_at 1, 2, 3

    txt.replace ""
    [("_" * (query_start.to_i - 1)) + query, ("_" * (target_start.to_i - 1)) + target]
  rescue
    Log.warn("Error in aligmnent: #{$!.message}")
    return ["-", "-"]
  end
end

.alignment_map(source, target) ⇒ Object



269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
# File 'lib/rbbt/util/misc/ssw.rb', line 269

def self.alignment_map(source, target)
  alignment_source, alignment_target = SmithWaterman.align(source, target)
  map = {}

  offset_source, alignment_source = alignment_source.match(/^(_*)(.*)/).values_at( 1, 2)
  offset_target, alignment_target = alignment_target.match(/^(_*)(.*)/).values_at( 1, 2)

  gaps_source = 0 
  gaps_target = 0
  miss_match = 0
  alignment_source.chars.zip(alignment_target.chars).each_with_index do |p,i|
    char_source, char_target = p
    gaps_source += 1 if char_source == '-'
    gaps_target += 1 if char_target == '-'
    source_pos = i + 1 + offset_source.length - gaps_source
    target_pos = i + 1 + offset_target.length - gaps_target
    if char_source != char_target or char_source == "-"
      miss_match += 1
    else
      map[source_pos] = target_pos 
    end
  end

  if miss_match + gaps_source > alignment_source.length.to_f / 2
    {}
  else
    map
  end
end