Class: Bio::MAF::Sequence
- Inherits:
-
Object
- Object
- Bio::MAF::Sequence
- Defined in:
- lib/bio/maf/maf.rb
Overview
A sequence within an alignment block.
Direct Known Subclasses
Constant Summary collapse
- I_STATUS =
{ 'C' => :contiguous, 'I' => :intervening, 'N' => :first, 'n' => :first_bridged, 'M' => :missing_data, 'T' => :tandem }
Instance Attribute Summary collapse
-
#i_data ⇒ Array<String>
Array of raw synteny information from 'i' line.
-
#quality ⇒ String
Quality string from 'q' line.
-
#size ⇒ Integer
readonly
Size of aligning region in source sequence.
-
#source ⇒ String
readonly
Source sequence name.
-
#src_size ⇒ Integer
(also: #source_size)
readonly
Size of the entire source sequence, not just the aligning region.
-
#start ⇒ Integer
readonly
Zero-based start position.
-
#strand ⇒ Symbol
readonly
:+ or :-, indicating which strand the alignment is to.
-
#text ⇒ String
readonly
Sequence data for the alignment, including insertions.
Instance Method Summary collapse
- #decode_status_char(c) ⇒ Object
- #delete_text(offset, len) ⇒ Object
-
#empty? ⇒ Boolean
Whether this sequence is empty.
- #end ⇒ Object
- #fasta_desc ⇒ Object
- #gapped? ⇒ Boolean
-
#initialize(source, start, size, strand, src_size, text) ⇒ Sequence
constructor
A new instance of Sequence.
- #interval ⇒ Object
- #join(o) ⇒ Object
- #joinable_with?(o) ⇒ Boolean
- #left_count ⇒ Object
- #left_status ⇒ Object
- #left_status_char ⇒ Object
- #right_count ⇒ Object
- #right_status ⇒ Object
- #right_status_char ⇒ Object
- #slice(range) ⇒ Object
- #species ⇒ Object
-
#text_range(range) ⇒ Object
Maps the given zero-based genomic range onto a range of string offsets, suitable for extracting the text for the given range from #text.
- #to_bio_alignment ⇒ Object
- #upcase! ⇒ Object
Constructor Details
#initialize(source, start, size, strand, src_size, text) ⇒ Sequence
Returns a new instance of Sequence.
277 278 279 280 281 282 283 284 |
# File 'lib/bio/maf/maf.rb', line 277 def initialize(source, start, size, strand, src_size, text) @source = source @start = start @size = size @strand = strand @src_size = src_size @text = text end |
Instance Attribute Details
#i_data ⇒ Array<String>
Array of raw synteny information from 'i' line.
271 272 273 |
# File 'lib/bio/maf/maf.rb', line 271 def i_data @i_data end |
#quality ⇒ String
Quality string from 'q' line.
274 275 276 |
# File 'lib/bio/maf/maf.rb', line 274 def quality @quality end |
#size ⇒ Integer (readonly)
Returns Size of aligning region in source sequence.
258 259 260 |
# File 'lib/bio/maf/maf.rb', line 258 def size @size end |
#source ⇒ String (readonly)
Returns Source sequence name.
254 255 256 |
# File 'lib/bio/maf/maf.rb', line 254 def source @source end |
#src_size ⇒ Integer (readonly) Also known as: source_size
Size of the entire source sequence, not just the aligning region.
265 266 267 |
# File 'lib/bio/maf/maf.rb', line 265 def src_size @src_size end |
#start ⇒ Integer (readonly)
Returns Zero-based start position.
256 257 258 |
# File 'lib/bio/maf/maf.rb', line 256 def start @start end |
#strand ⇒ Symbol (readonly)
:+ or :-, indicating which strand the alignment is to.
261 262 263 |
# File 'lib/bio/maf/maf.rb', line 261 def strand @strand end |
#text ⇒ String (readonly)
Sequence data for the alignment, including insertions.
268 269 270 |
# File 'lib/bio/maf/maf.rb', line 268 def text @text end |
Instance Method Details
#decode_status_char(c) ⇒ Object
332 333 334 |
# File 'lib/bio/maf/maf.rb', line 332 def decode_status_char(c) I_STATUS[c] || raise("Unsupported status character #{c}!") end |
#delete_text(offset, len) ⇒ Object
365 366 367 368 369 370 371 372 |
# File 'lib/bio/maf/maf.rb', line 365 def delete_text(offset, len) unless empty? text.slice!(offset, len) if quality quality.slice!(offset, len) end end end |
#empty? ⇒ Boolean
Whether this sequence is empty. Only true for EmptySequence instances from 'e' lines.
315 316 317 |
# File 'lib/bio/maf/maf.rb', line 315 def empty? false end |
#end ⇒ Object
286 287 288 |
# File 'lib/bio/maf/maf.rb', line 286 def end start + size end |
#fasta_desc ⇒ Object
382 383 384 |
# File 'lib/bio/maf/maf.rb', line 382 def fasta_desc "#{source}:#{start}-#{start + size}" end |
#gapped? ⇒ Boolean
319 320 321 |
# File 'lib/bio/maf/maf.rb', line 319 def gapped? size != text.size end |
#interval ⇒ Object
290 291 292 |
# File 'lib/bio/maf/maf.rb', line 290 def interval GenomicInterval.zero_based(self.source, self.start, self.end) end |
#join(o) ⇒ Object
392 393 394 395 396 397 398 399 400 401 402 403 |
# File 'lib/bio/maf/maf.rb', line 392 def join(o) s2 = Sequence.new(source, start, size + o.size, strand, src_size, text + o.text) if quality && o.quality s2.quality = quality + o.quality end s2 end |
#joinable_with?(o) ⇒ Boolean
386 387 388 389 390 |
# File 'lib/bio/maf/maf.rb', line 386 def joinable_with?(o) (self.end == o.start) \ && (self.strand == o.strand) \ && (self.empty? == o.empty?) end |
#left_count ⇒ Object
344 345 346 |
# File 'lib/bio/maf/maf.rb', line 344 def left_count i_data && i_data[1].to_i end |
#left_status ⇒ Object
340 341 342 |
# File 'lib/bio/maf/maf.rb', line 340 def left_status i_data && decode_status_char(left_status_char()) end |
#left_status_char ⇒ Object
336 337 338 |
# File 'lib/bio/maf/maf.rb', line 336 def left_status_char i_data && i_data[0] end |
#right_count ⇒ Object
356 357 358 |
# File 'lib/bio/maf/maf.rb', line 356 def right_count i_data && i_data[3].to_i end |
#right_status ⇒ Object
352 353 354 |
# File 'lib/bio/maf/maf.rb', line 352 def right_status i_data && decode_status_char(right_status_char()) end |
#right_status_char ⇒ Object
348 349 350 |
# File 'lib/bio/maf/maf.rb', line 348 def right_status_char i_data && i_data[2] end |
#slice(range) ⇒ Object
294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 |
# File 'lib/bio/maf/maf.rb', line 294 def slice(range) before = text.slice(0...(range.begin)) non_gap_prev = before.delete("-").size new_text = text.slice(range) unless new_text raise "could not extract slice #{range} from #{self.inspect}!" end non_gap_text = new_text.delete("-").size s2 = Sequence.new(source, start + non_gap_prev, non_gap_text, strand, src_size, new_text) s2.quality = quality.slice(range) if quality # TODO: what to do with synteny data? s2 end |
#species ⇒ Object
360 361 362 363 |
# File 'lib/bio/maf/maf.rb', line 360 def species parts = source.split('.', 2) parts.size == 2 ? parts[0] : nil end |
#text_range(range) ⇒ Object
Maps the given zero-based genomic range onto a range of string offsets, suitable for extracting the text for the given range from #text.
410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 |
# File 'lib/bio/maf/maf.rb', line 410 def text_range(range) r_end = range.exclude_end? ? range.end : range.end + 1 r_size = r_end - range.begin if range.begin == start && r_size == size # special case, entire text 0...text.size else if range.begin < start || r_end > self.end raise "Range #{range} outside sequence bounds; start #{start}, size #{size}" end if ! gapped? # no gaps, can map indexes directly (range.begin - start)...(r_end - start) else # gaps present g_start = start # genomic position of the start t_start = 0 # text position of the start m_begin = nil # beginning of match match = nil text.scan(/(\w+|-+)/) do |parts| part = parts[0] if part[0] != '-' # sequence text g_end = g_start + part.size if g_start <= range.begin && range.begin < g_end offset_in_part = range.begin - g_start m_begin = offset_in_part + t_start end if g_start <= r_end && r_end <= g_end raise "reached end before start!" unless m_begin offset_in_part = r_end - g_start m_end = offset_in_part + t_start match = m_begin...m_end break end g_start = g_end else # gap end t_start += part.size end raise "no match found!" unless match return match end end end |
#to_bio_alignment ⇒ Object
378 379 380 |
# File 'lib/bio/maf/maf.rb', line 378 def to_bio_alignment Bio::BioAlignment::Sequence.new(source, text) end |
#upcase! ⇒ Object
374 375 376 |
# File 'lib/bio/maf/maf.rb', line 374 def upcase! text.upcase! end |