Module: Bio::GFFbrowser::Helpers::Gff3Sequence

Defined in:
lib/bio/db/gff/gffassemble.rb

Instance Method Summary collapse

Instance Method Details

#assemble(sequence, startpos, reclist, options = { :phase=>false, :reverse=>true, :trim=>false, :complement=>true, :debug=>false }) ⇒ Object

Patch a sequence together from a Sequence string and an array of records. Note that rec positions are 1-based coordinates, relative to the landmark given in column 1 - in this case the sequence as it is passed in. The following options are available:

:reverse      : do reverse if reverse is indicated (default true)
:complement   : do complement if reverse is indicated (default true)
:phase        : do set CDS phase (default false, normally ignore)
:trim         : make sure sequence is multiple of 3 nucleotide bps (default false)

there are two special options:

:raw          : raw sequence (all above false)
:codonize     : codon sequence (reverse, complement and trim are true)


211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
# File 'lib/bio/db/gff/gffassemble.rb', line 211

def assemble sequence, startpos, reclist, options = { :phase=>false, :reverse=>true, :trim=>false, :complement=>true, :debug=>false }
  do_debug = options[:debug]
  do_phase = options[:phase]
  do_reverse = (options[:reverse] == false ? false : true)
  do_trim    = (options[:trim] == false ? false : true)
  do_complement = (options[:complement] == false ? false : true)
  if options[:raw]
    do_phase = false
    do_reverse = false
    do_trim = false
    do_complement = false
  elsif options[:codonize]
    do_phase = false
    do_reverse = true
    do_trim = true
    do_complement = true
  end
  sectionlist = Sections::sort(reclist)
  rec0 = sectionlist.first.rec
  # we assume ORF is always read in the same direction
  orf_reverse = (rec0.strand == '-')
  orf_frame = startpos - 1
  orf_frameshift = orf_frame % 3
  sectionlist = sectionlist.reverse if orf_reverse
  if do_debug
    p "------------------"
    p options 
    p [:reverse,do_reverse]
    p [:complement,do_complement]
    p [:trim,do_trim]
    p [:orf_reverse, orf_reverse, rec0.strand]
  end

  if sequence.kind_of?(Bio::FastaFormat)
    # BioRuby conversion
    sequence = sequence.seq
  end
  # Generate array of sequences
  seq = sectionlist.map { | section |
    rec = section.rec
    s = sequence[(section.begin-1)..(section.end-1)]
    if do_reverse and orf_reverse
      s = s.reverse 
    end
    # Correct for phase. Unfortunately the use of phase is ambiguous.
    # Here we check whether rec.start is in line with orf_frame. If it
    # is, we correct for phase. Otherwise it is ignored.
    if do_phase and rec.phase
      phase = rec.phase.to_i
      # if ((rec.start-startpos) % 3 == 0) 
      s = s[phase..-1]
      # end
    end
    s
  }
  # p seq
  seq = seq.join
  if do_complement and do_reverse and orf_reverse
    ntseq = Bio::Sequence::NA.new(seq)
    seq = ntseq.forward_complement.upcase
  end
  if do_trim
    reduce = seq.size % 3
    seq = seq[0..(seq.size-1-reduce)] if reduce != 0
  end
  retval = seq
  retval
end

#assembleAA(sequence, startpos, reclist, options = { :phase=>false, :reverse=>true, :trim=>false, :complement=>true }) ⇒ Object

Patch a sequence together from a Sequence string and an array of records and translate in the correct direction and frame. The options are the same as for assemble.



283
284
285
286
287
# File 'lib/bio/db/gff/gffassemble.rb', line 283

def assembleAA sequence, startpos, reclist, options = { :phase=>false, :reverse=>true, :trim=>false, :complement=>true }
  seq = assemble(sequence, startpos, reclist, options)
  ntseq = Bio::Sequence::NA.new(seq)
  ntseq.translate
end

#description(id, component, rec) ⇒ Object

Create a description for output



290
291
292
293
294
# File 'lib/bio/db/gff/gffassemble.rb', line 290

def description id, component, rec
  sections = Sections::sort(rec)
  id+' Sequence:'+component.seqname+"_#{component.start}:#{component.end} ("+
   sections.map { |s| "#{s.first}:#{s.last}" }.join(', ')  +")"
end