Class: Bio::MAF::Tiler

Inherits:
Object
  • Object
show all
Defined in:
lib/bio/maf/tiler.rb

Overview

Tiles a given genomic interval. Inspired by: lib/bx/align/tools/tile.py in bx-python

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeTiler

Returns a new instance of Tiler.



19
20
21
# File 'lib/bio/maf/tiler.rb', line 19

def initialize
  @species_map = {}
end

Instance Attribute Details

#indexObject

Returns the value of attribute index.



11
12
13
# File 'lib/bio/maf/tiler.rb', line 11

def index
  @index
end

#intervalObject

GenomicInterval



15
16
17
# File 'lib/bio/maf/tiler.rb', line 15

def interval
  @interval
end

#parserObject

Returns the value of attribute parser.



12
13
14
# File 'lib/bio/maf/tiler.rb', line 12

def parser
  @parser
end

#referenceObject

Returns the value of attribute reference.



13
14
15
# File 'lib/bio/maf/tiler.rb', line 13

def reference
  @reference
end

#speciesObject

Returns the value of attribute species.



16
17
18
# File 'lib/bio/maf/tiler.rb', line 16

def species
  @species
end

#species_mapObject

Returns the value of attribute species_map.



17
18
19
# File 'lib/bio/maf/tiler.rb', line 17

def species_map
  @species_map
end

Instance Method Details

#ref_data(range) ⇒ Object



42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/bio/maf/tiler.rb', line 42

def ref_data(range)
  if reference
    if reference.respond_to? :read_interval
      reference.read_interval(range.begin, range.end)
    elsif reference.is_a? String
      reference.slice(range)
    else
      raise "Unhandled reference data source: #{reference}"
    end
  else
    nil
  end
end

#runs(mask) {|cur_start...mask.size, cur| ... } ⇒ Object

Yields:

  • (cur_start...mask.size, cur)


120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/bio/maf/tiler.rb', line 120

def runs(mask)
  cur = nil
  cur_start = nil
  mask.each_with_index do |obj, i|
    if ! cur.equal?(obj)
      yield(cur_start...i, cur) if cur
      cur = obj
      cur_start = i
    end
  end
  yield(cur_start...mask.size, cur)
end

#species_to_useObject



56
57
58
# File 'lib/bio/maf/tiler.rb', line 56

def species_to_use
  species || species_map.keys
end

#tileObject



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/bio/maf/tiler.rb', line 60

def tile
  parser.sequence_filter[:only_species] = species_to_use
  # TODO: remove gaps
  blocks = index.find([interval], parser).sort_by { |b| b.vars[:score] }
  mask = Array.new(interval.length, :ref)
  i_start = interval.zero_start
  i_end = interval.zero_end
  if reference
    ref_region = ref_data(i_start...i_end)
  end
  blocks.each do |block|
    ref = block.ref_seq
    slice_start = [i_start, ref.start].max
    slice_end = [i_end, ref.end].min
    mask.fill(block,
              (slice_start - i_start)...(slice_end - i_start))
  end
  text = []
  species_to_use.each { |s| text << '' }
  nonref_text = text[1...text.size]
  runs(mask) do |range, block|
    g_range = (range.begin + i_start)...(range.end + i_start)
    if block == :ref
      # not covered by an alignment block
      # use the reference sequence if given, otherwise 'N'
      range_size = range.end - range.begin
      text[0] << if ref_region
                   ref_region.slice(range)
                 else
                   'N' * range_size
                 end
      stars = '*' * range_size
      nonref_text.each { |t| t << stars }
    else
      # covered by an alignment block
      t_range = block.ref_seq.text_range(g_range)
      species_to_use.each_with_index do |species, i|
        sp_text = text[i]
        seq = block.sequences.find { |s| s.source == species || s.species == species }
        if seq
          # got alignment text
          sp_text << seq.text.slice(t_range)
        else
          # no alignment for this one here, use '*'
          sp_text << '*' * (t_range.end - t_range.begin)
        end
      end
    end
  end
  text
end

#write_fasta(f) ⇒ Object



112
113
114
115
116
117
118
# File 'lib/bio/maf/tiler.rb', line 112

def write_fasta(f)
  species_to_use.zip(tile()) do |species, text|
    sp_out = species_map[species] || species
    f.puts ">#{sp_out}"
    f.puts text
  end
end