Class: Bio::MAF::Sequence
- Inherits:
-
Object
- Object
- Bio::MAF::Sequence
- Defined in:
- lib/bio/maf/maf.rb
Overview
A sequence within an alignment block.
Direct Known Subclasses
Constant Summary collapse
- I_STATUS =
{ 'C' => :contiguous, 'I' => :intervening, 'N' => :first, 'n' => :first_bridged, 'M' => :missing_data, 'T' => :tandem }
Instance Attribute Summary collapse
-
#i_data ⇒ Array<String>
Array of raw synteny information from 'i' line.
-
#quality ⇒ String
Quality string from 'q' line.
-
#size ⇒ Integer
readonly
Size of aligning region in source sequence.
-
#source ⇒ String
readonly
Source sequence name.
-
#src_size ⇒ Integer
(also: #source_size)
readonly
Size of the entire source sequence, not just the aligning region.
-
#start ⇒ Integer
readonly
Zero-based start position.
-
#strand ⇒ Symbol
readonly
:+ or :-, indicating which strand the alignment is to.
-
#text ⇒ String
readonly
Sequence data for the alignment, including insertions.
Instance Method Summary collapse
- #decode_status_char(c) ⇒ Object
- #delete_text(offset, len) ⇒ Object
-
#empty? ⇒ Boolean
Whether this sequence is empty.
- #end ⇒ Object
- #fasta_desc ⇒ Object
- #gapped? ⇒ Boolean
-
#initialize(source, start, size, strand, src_size, text) ⇒ Sequence
constructor
A new instance of Sequence.
- #interval ⇒ Object
- #join(o) ⇒ Object
- #joinable_with?(o) ⇒ Boolean
- #left_count ⇒ Object
- #left_status ⇒ Object
- #left_status_char ⇒ Object
- #right_count ⇒ Object
- #right_status ⇒ Object
- #right_status_char ⇒ Object
- #slice(range) ⇒ Object
- #species ⇒ Object
-
#text_range(range) ⇒ Object
Maps the given zero-based genomic range onto a range of string offsets, suitable for extracting the text for the given range from #text.
- #to_bio_alignment ⇒ Object
Constructor Details
#initialize(source, start, size, strand, src_size, text) ⇒ Sequence
Returns a new instance of Sequence.
260 261 262 263 264 265 266 267 |
# File 'lib/bio/maf/maf.rb', line 260 def initialize(source, start, size, strand, src_size, text) @source = source @start = start @size = size @strand = strand @src_size = src_size @text = text end |
Instance Attribute Details
#i_data ⇒ Array<String>
Array of raw synteny information from 'i' line.
254 255 256 |
# File 'lib/bio/maf/maf.rb', line 254 def i_data @i_data end |
#quality ⇒ String
Quality string from 'q' line.
257 258 259 |
# File 'lib/bio/maf/maf.rb', line 257 def quality @quality end |
#size ⇒ Integer (readonly)
Returns Size of aligning region in source sequence.
241 242 243 |
# File 'lib/bio/maf/maf.rb', line 241 def size @size end |
#source ⇒ String (readonly)
Returns Source sequence name.
237 238 239 |
# File 'lib/bio/maf/maf.rb', line 237 def source @source end |
#src_size ⇒ Integer (readonly) Also known as: source_size
Size of the entire source sequence, not just the aligning region.
248 249 250 |
# File 'lib/bio/maf/maf.rb', line 248 def src_size @src_size end |
#start ⇒ Integer (readonly)
Returns Zero-based start position.
239 240 241 |
# File 'lib/bio/maf/maf.rb', line 239 def start @start end |
#strand ⇒ Symbol (readonly)
:+ or :-, indicating which strand the alignment is to.
244 245 246 |
# File 'lib/bio/maf/maf.rb', line 244 def strand @strand end |
#text ⇒ String (readonly)
Sequence data for the alignment, including insertions.
251 252 253 |
# File 'lib/bio/maf/maf.rb', line 251 def text @text end |
Instance Method Details
#decode_status_char(c) ⇒ Object
315 316 317 |
# File 'lib/bio/maf/maf.rb', line 315 def decode_status_char(c) I_STATUS[c] || raise("Unsupported status character #{c}!") end |
#delete_text(offset, len) ⇒ Object
348 349 350 351 352 353 354 355 |
# File 'lib/bio/maf/maf.rb', line 348 def delete_text(offset, len) unless empty? text.slice!(offset, len) if quality quality.slice!(offset, len) end end end |
#empty? ⇒ Boolean
Whether this sequence is empty. Only true for EmptySequence instances from 'e' lines.
298 299 300 |
# File 'lib/bio/maf/maf.rb', line 298 def empty? false end |
#end ⇒ Object
269 270 271 |
# File 'lib/bio/maf/maf.rb', line 269 def end start + size end |
#fasta_desc ⇒ Object
361 362 363 |
# File 'lib/bio/maf/maf.rb', line 361 def fasta_desc "#{source}:#{start}-#{start + size}" end |
#gapped? ⇒ Boolean
302 303 304 |
# File 'lib/bio/maf/maf.rb', line 302 def gapped? size != text.size end |
#interval ⇒ Object
273 274 275 |
# File 'lib/bio/maf/maf.rb', line 273 def interval GenomicInterval.zero_based(self.source, self.start, self.end) end |
#join(o) ⇒ Object
371 372 373 374 375 376 377 378 379 380 381 382 |
# File 'lib/bio/maf/maf.rb', line 371 def join(o) s2 = Sequence.new(source, start, size + o.size, strand, src_size, text + o.text) if quality && o.quality s2.quality = quality + o.quality end s2 end |
#joinable_with?(o) ⇒ Boolean
365 366 367 368 369 |
# File 'lib/bio/maf/maf.rb', line 365 def joinable_with?(o) (self.end == o.start) \ && (self.strand == o.strand) \ && (self.empty? == o.empty?) end |
#left_count ⇒ Object
327 328 329 |
# File 'lib/bio/maf/maf.rb', line 327 def left_count i_data && i_data[1].to_i end |
#left_status ⇒ Object
323 324 325 |
# File 'lib/bio/maf/maf.rb', line 323 def left_status i_data && decode_status_char(left_status_char()) end |
#left_status_char ⇒ Object
319 320 321 |
# File 'lib/bio/maf/maf.rb', line 319 def left_status_char i_data && i_data[0] end |
#right_count ⇒ Object
339 340 341 |
# File 'lib/bio/maf/maf.rb', line 339 def right_count i_data && i_data[3].to_i end |
#right_status ⇒ Object
335 336 337 |
# File 'lib/bio/maf/maf.rb', line 335 def right_status i_data && decode_status_char(right_status_char()) end |
#right_status_char ⇒ Object
331 332 333 |
# File 'lib/bio/maf/maf.rb', line 331 def right_status_char i_data && i_data[2] end |
#slice(range) ⇒ Object
277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 |
# File 'lib/bio/maf/maf.rb', line 277 def slice(range) before = text.slice(0...(range.begin)) non_gap_prev = before.delete("-").size new_text = text.slice(range) unless new_text raise "could not extract slice #{range} from #{self.inspect}!" end non_gap_text = new_text.delete("-").size s2 = Sequence.new(source, start + non_gap_prev, non_gap_text, strand, src_size, new_text) s2.quality = quality.slice(range) if quality # TODO: what to do with synteny data? s2 end |
#species ⇒ Object
343 344 345 346 |
# File 'lib/bio/maf/maf.rb', line 343 def species parts = source.split('.', 2) parts.size == 2 ? parts[0] : nil end |
#text_range(range) ⇒ Object
Maps the given zero-based genomic range onto a range of string offsets, suitable for extracting the text for the given range from #text.
389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 |
# File 'lib/bio/maf/maf.rb', line 389 def text_range(range) r_end = range.exclude_end? ? range.end : range.end + 1 r_size = r_end - range.begin if range.begin == start && r_size == size # special case, entire text 0...text.size else if range.begin < start || r_end > self.end raise "Range #{range} outside sequence bounds; start #{start}, size #{size}" end if ! gapped? # no gaps, can map indexes directly (range.begin - start)...(r_end - start) else # gaps present g_start = start # genomic position of the start t_start = 0 # text position of the start m_begin = nil # beginning of match match = nil text.scan(/(\w+|-+)/) do |parts| part = parts[0] if part[0] != '-' # sequence text g_end = g_start + part.size if g_start <= range.begin && range.begin < g_end offset_in_part = range.begin - g_start m_begin = offset_in_part + t_start end if g_start <= r_end && r_end <= g_end raise "reached end before start!" unless m_begin offset_in_part = r_end - g_start m_end = offset_in_part + t_start match = m_begin...m_end break end g_start = g_end else # gap end t_start += part.size end raise "no match found!" unless match return match end end end |
#to_bio_alignment ⇒ Object
357 358 359 |
# File 'lib/bio/maf/maf.rb', line 357 def to_bio_alignment Bio::BioAlignment::Sequence.new(source, text) end |