Class: Bio::MAF::Block
- Inherits:
-
Object
- Object
- Bio::MAF::Block
- Defined in:
- lib/bio/maf/maf.rb
Overview
A MAF alignment block.
Constant Summary collapse
- GAP =
/-+/
Instance Attribute Summary collapse
-
#offset ⇒ Integer
Offset of the alignment block within the MAF file, in bytes.
-
#orig_text ⇒ String
Original text of the MAF block.
-
#sequences ⇒ Array<Sequence>
readonly
Sequences, one per 's' or 'e' line.
-
#size ⇒ Integer
readonly
Size of the alignment block within the MAF file, in bytes.
-
#vars ⇒ Object
readonly
Parameters from the 'a' line starting the alignment block.
Instance Method Summary collapse
- #_slice(interval) ⇒ Object
- #_slice_text_range(interval) ⇒ Object
- #each_raw_seq ⇒ Object
-
#filtered? ⇒ Boolean
Whether this block has been modified by a parser filter.
-
#find_gaps ⇒ Object
Find gaps present in all sequences.
-
#initialize(vars, sequences, offset, size, filtered) ⇒ Block
constructor
A new instance of Block.
- #join(other) ⇒ Object
- #joinable_with?(other) ⇒ Boolean
- #raw_seq(i) ⇒ Object
- #ref_seq ⇒ Object
-
#remove_gaps! ⇒ Object
Remove gaps present in all sequences.
- #seq_from(src, pos_guess) ⇒ Object
-
#slice(interval) ⇒ Block
Returns a new Block covering only the region where it overlaps the given interval.
-
#text_size ⇒ Object
Text size of the alignment block.
- #to_bio_alignment ⇒ Object
- #to_s ⇒ Object
- #upcase! ⇒ Object
Constructor Details
#initialize(vars, sequences, offset, size, filtered) ⇒ Block
Returns a new instance of Block.
70 71 72 73 74 75 76 |
# File 'lib/bio/maf/maf.rb', line 70 def initialize(vars, sequences, offset, size, filtered) @vars = vars @sequences = sequences @offset = offset @size = size @filtered = filtered end |
Instance Attribute Details
#offset ⇒ Integer
Offset of the alignment block within the MAF file, in bytes.
61 62 63 |
# File 'lib/bio/maf/maf.rb', line 61 def offset @offset end |
#orig_text ⇒ String
Original text of the MAF block. Only available if the :retain_text parser option is set.
68 69 70 |
# File 'lib/bio/maf/maf.rb', line 68 def orig_text @orig_text end |
#sequences ⇒ Array<Sequence> (readonly)
Sequences, one per 's' or 'e' line.
58 59 60 |
# File 'lib/bio/maf/maf.rb', line 58 def sequences @sequences end |
#size ⇒ Integer (readonly)
Size of the alignment block within the MAF file, in bytes.
64 65 66 |
# File 'lib/bio/maf/maf.rb', line 64 def size @size end |
#vars ⇒ Object (readonly)
Parameters from the 'a' line starting the alignment block.
55 56 57 |
# File 'lib/bio/maf/maf.rb', line 55 def vars @vars end |
Instance Method Details
#_slice(interval) ⇒ Object
177 178 179 180 181 182 183 184 |
# File 'lib/bio/maf/maf.rb', line 177 def _slice(interval) range = _slice_text_range(interval) s2 = sequences.collect { |s| s.slice(range) } v2 = vars.dup #v2[:score] = '0.0' # TODO: should the filtered param be #modified? instead? Block.new(v2, s2, offset, size, @filtered) end |
#_slice_text_range(interval) ⇒ Object
186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 |
# File 'lib/bio/maf/maf.rb', line 186 def _slice_text_range(interval) i_start = interval.zero_start i_end = interval.zero_end g_pos = ref_seq.start t_start = nil t_end = nil ref_seq.text.each_char.each_with_index do |c, t_pos| if c != '-' # non-gap if g_pos == i_start t_start = t_pos end g_pos += 1 if t_start && g_pos == i_end t_end = t_pos + 1 break end end end unless t_start && t_end raise "did not find start and end for #{interval} in #{ref_seq.inspect}!" end return t_start...t_end end |
#each_raw_seq ⇒ Object
86 87 88 |
# File 'lib/bio/maf/maf.rb', line 86 def each_raw_seq sequences.each { |s| yield s } end |
#filtered? ⇒ Boolean
Whether this block has been modified by a parser filter.
103 104 105 |
# File 'lib/bio/maf/maf.rb', line 103 def filtered? @filtered end |
#find_gaps ⇒ Object
Find gaps present in all sequences. These would generally occur when some sequences have been filtered out.
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
# File 'lib/bio/maf/maf.rb', line 126 def find_gaps ref_s = StringScanner.new(sequences.first.text) others = sequences.slice(1, sequences.size - 1).reject { |s| s.empty? }.collect { |s| StringScanner.new(s.text) } gaps = [] while ref_s.scan_until(GAP) offset = ref_s.pos - ref_s.matched_size others.each { |s| s.pos = offset } unless others.find { |s| ! s.scan(GAP) } # all matched gap_size = [ref_s.matched_size, others.map {|s| s.matched_size}.min].min gaps << [offset, gap_size] end end gaps end |
#join(other) ⇒ Object
229 230 231 232 233 234 235 236 237 |
# File 'lib/bio/maf/maf.rb', line 229 def join(other) nseq = sequences.each_with_index.collect do |s1, i| s2 = other.seq_from(s1.source, i) s1.join(s2) end v2 = vars.dup v2[:score] = '0.0' Block.new(v2, nseq, offset, nil, @filtered) end |
#joinable_with?(other) ⇒ Boolean
211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
# File 'lib/bio/maf/maf.rb', line 211 def joinable_with?(other) if sequences.size == other.sequences.size r1 = ref_seq r2 = other.ref_seq return false if r1.source != r2.source return false if r1.end != r2.start rest = sequences.each_with_index rest.next mismatch = rest.find do |s1, i| s2 = other.seq_from(s1.source, i) (! s2) || ! s1.joinable_with?(s2) end return (! mismatch) else return false end end |
#raw_seq(i) ⇒ Object
82 83 84 |
# File 'lib/bio/maf/maf.rb', line 82 def raw_seq(i) sequences.fetch(i) end |
#ref_seq ⇒ Object
78 79 80 |
# File 'lib/bio/maf/maf.rb', line 78 def ref_seq sequences[0] end |
#remove_gaps! ⇒ Object
Remove gaps present in all sequences. These would generally occur when some sequences have been filtered out.
148 149 150 151 152 153 154 155 156 |
# File 'lib/bio/maf/maf.rb', line 148 def remove_gaps! gaps = find_gaps() gaps.reverse_each do |offset, len| sequences.each do |seq| seq.delete_text(offset, len) end end gaps.size end |
#seq_from(src, pos_guess) ⇒ Object
239 240 241 242 243 244 245 246 |
# File 'lib/bio/maf/maf.rb', line 239 def seq_from(src, pos_guess) sg = sequences[pos_guess] if sg.source == src sg else sequences.find { |s| s.source == src } end end |
#slice(interval) ⇒ Block
Returns a new Block covering only the region where it overlaps the given interval.
162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
# File 'lib/bio/maf/maf.rb', line 162 def slice(interval) case interval.compare(ref_seq.interval) when :equal return self when :contains, :contained_by, :left_overlapped, :right_overlapped _slice(interval.intersection(ref_seq.interval)) when :left_adjacent, :right_adjacent, :left_off, :right_off raise "Cannot slice a block with a non-overlapping interval! Block #{ref_seq.interval}, interval #{interval}" when :different_chrom raise "Cannot slice a block with reference sequence #{ref_seq.source} using an interval on #{interval.chrom}!" else raise "Unhandled comparison result: #{interval.compare(ref_seq.interval)}" end end |
#text_size ⇒ Object
Text size of the alignment block. This is the number of text characters in each line of sequence data, including dashes and other gaps in the sequence.
93 94 95 |
# File 'lib/bio/maf/maf.rb', line 93 def text_size sequences.first.text.size end |
#to_bio_alignment ⇒ Object
107 108 109 110 |
# File 'lib/bio/maf/maf.rb', line 107 def to_bio_alignment ba_seq = sequences.collect { |s| s.to_bio_alignment } Bio::BioAlignment::Alignment.new(ba_seq) end |
#to_s ⇒ Object
112 113 114 115 116 117 |
# File 'lib/bio/maf/maf.rb', line 112 def to_s buf = StringIO.new writer = Writer.new(buf) writer.write_block(self) return buf.string end |
#upcase! ⇒ Object
97 98 99 |
# File 'lib/bio/maf/maf.rb', line 97 def upcase! sequences.each { |s| s.upcase! } end |