Class: Bio::MAF::Tiler
- Inherits:
-
Object
- Object
- Bio::MAF::Tiler
- Defined in:
- lib/bio/maf/tiler.rb
Overview
Tiles a given genomic interval. Inspired by: lib/bx/align/tools/tile.py in bx-python
Instance Attribute Summary collapse
-
#index ⇒ Object
Returns the value of attribute index.
-
#interval ⇒ Object
GenomicInterval.
-
#parser ⇒ Object
Returns the value of attribute parser.
-
#reference ⇒ Object
Returns the value of attribute reference.
-
#species ⇒ Object
Returns the value of attribute species.
-
#species_map ⇒ Object
Returns the value of attribute species_map.
Instance Method Summary collapse
-
#initialize ⇒ Tiler
constructor
A new instance of Tiler.
- #ref_data(range) ⇒ Object
- #runs(mask) {|cur_start...mask.size, cur| ... } ⇒ Object
- #species_to_use ⇒ Object
- #tile ⇒ Object
- #write_fasta(f) ⇒ Object
Constructor Details
#initialize ⇒ Tiler
Returns a new instance of Tiler.
19 20 21 |
# File 'lib/bio/maf/tiler.rb', line 19 def initialize @species_map = {} end |
Instance Attribute Details
#index ⇒ Object
Returns the value of attribute index.
11 12 13 |
# File 'lib/bio/maf/tiler.rb', line 11 def index @index end |
#interval ⇒ Object
GenomicInterval
15 16 17 |
# File 'lib/bio/maf/tiler.rb', line 15 def interval @interval end |
#parser ⇒ Object
Returns the value of attribute parser.
12 13 14 |
# File 'lib/bio/maf/tiler.rb', line 12 def parser @parser end |
#reference ⇒ Object
Returns the value of attribute reference.
13 14 15 |
# File 'lib/bio/maf/tiler.rb', line 13 def reference @reference end |
#species ⇒ Object
Returns the value of attribute species.
16 17 18 |
# File 'lib/bio/maf/tiler.rb', line 16 def species @species end |
#species_map ⇒ Object
Returns the value of attribute species_map.
17 18 19 |
# File 'lib/bio/maf/tiler.rb', line 17 def species_map @species_map end |
Instance Method Details
#ref_data(range) ⇒ Object
42 43 44 45 46 47 48 49 50 51 52 53 54 |
# File 'lib/bio/maf/tiler.rb', line 42 def ref_data(range) if reference if reference.respond_to? :read_interval reference.read_interval(range.begin, range.end) elsif reference.is_a? String reference.slice(range) else raise "Unhandled reference data source: #{reference}" end else nil end end |
#runs(mask) {|cur_start...mask.size, cur| ... } ⇒ Object
120 121 122 123 124 125 126 127 128 129 130 131 |
# File 'lib/bio/maf/tiler.rb', line 120 def runs(mask) cur = nil cur_start = nil mask.each_with_index do |obj, i| if ! cur.equal?(obj) yield(cur_start...i, cur) if cur cur = obj cur_start = i end end yield(cur_start...mask.size, cur) end |
#species_to_use ⇒ Object
56 57 58 |
# File 'lib/bio/maf/tiler.rb', line 56 def species_to_use species || species_map.keys end |
#tile ⇒ Object
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 |
# File 'lib/bio/maf/tiler.rb', line 60 def tile parser.sequence_filter[:only_species] = species_to_use # TODO: remove gaps blocks = index.find([interval], parser).sort_by { |b| b.vars[:score] } mask = Array.new(interval.length, :ref) i_start = interval.zero_start i_end = interval.zero_end if reference ref_region = ref_data(i_start...i_end) end blocks.each do |block| ref = block.ref_seq slice_start = [i_start, ref.start].max slice_end = [i_end, ref.end].min mask.fill(block, (slice_start - i_start)...(slice_end - i_start)) end text = [] species_to_use.each { |s| text << '' } nonref_text = text[1...text.size] runs(mask) do |range, block| g_range = (range.begin + i_start)...(range.end + i_start) if block == :ref # not covered by an alignment block # use the reference sequence if given, otherwise 'N' range_size = range.end - range.begin text[0] << if ref_region ref_region.slice(range) else 'N' * range_size end stars = '*' * range_size nonref_text.each { |t| t << stars } else # covered by an alignment block t_range = block.ref_seq.text_range(g_range) species_to_use.each_with_index do |species, i| sp_text = text[i] seq = block.sequences.find { |s| s.source == species || s.species == species } if seq # got alignment text sp_text << seq.text.slice(t_range) else # no alignment for this one here, use '*' sp_text << '*' * (t_range.end - t_range.begin) end end end end text end |
#write_fasta(f) ⇒ Object
112 113 114 115 116 117 118 |
# File 'lib/bio/maf/tiler.rb', line 112 def write_fasta(f) species_to_use.zip(tile()) do |species, text| sp_out = species_map[species] || species f.puts ">#{sp_out}" f.puts text end end |