Class: GTF::Transcript

Inherits:
Object
  • Object
show all
Defined in:
lib/gtf/gene.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(array, name, gtf) ⇒ Transcript

Returns a new instance of Transcript.



87
88
89
90
91
92
93
# File 'lib/gtf/gene.rb', line 87

def initialize array, name, gtf
  @intervals = array
  @name = name
  @gtf = gtf

  @transcript = @intervals.find{|t| t.feature == "transcript"}
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(sym, *args, &block) ⇒ Object



263
264
265
# File 'lib/gtf/gene.rb', line 263

def method_missing sym, *args, &block
  @transcript.send(sym, *args, &block)
end

Instance Attribute Details

#intervalsObject (readonly)

Returns the value of attribute intervals.



86
87
88
# File 'lib/gtf/gene.rb', line 86

def intervals
  @intervals
end

#intronsObject (readonly)

Returns the value of attribute introns.



86
87
88
# File 'lib/gtf/gene.rb', line 86

def introns
  @introns
end

#nameObject (readonly)

Returns the value of attribute name.



86
87
88
# File 'lib/gtf/gene.rb', line 86

def name
  @name
end

#transcriptObject (readonly)

Returns the value of attribute transcript.



86
87
88
# File 'lib/gtf/gene.rb', line 86

def transcript
  @transcript
end

Instance Method Details

#build_intronsObject



237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
# File 'lib/gtf/gene.rb', line 237

def build_introns
  return if !exons
  @introns = exons.map.with_index do |e1,i|
    e2 = @exons[i+1]
    next if !e2
    intron = e1.clone do |c|
      c.start = e1.stop+1
      c.stop = e2.start-1
    end
    intron.feature = "intron"
    intron.prev_exon = e1
    intron.post_exon = e2
    intron
  end.compact
  @intervals.concat @introns
end

#build_utrsObject



254
255
256
257
# File 'lib/gtf/gene.rb', line 254

def build_utrs
  @intervals.concat @utr3 if @utr3
  @intervals.concat @utr5 if @utr5
end

#canonical_transcript_scoreObject



121
122
123
# File 'lib/gtf/gene.rb', line 121

def canonical_transcript_score
  (is_ccds? ? 100000 : 0) + cds_size
end

#cdsObject



279
280
281
# File 'lib/gtf/gene.rb', line 279

def cds
  @cds ||= @intervals.select{|e| e.feature == "CDS"}
end

#cds_posObject



139
140
141
# File 'lib/gtf/gene.rb', line 139

def cds_pos
  @cds_pos ||= get_cds_pos
end

#cds_seqObject



135
136
137
# File 'lib/gtf/gene.rb', line 135

def cds_seq
  @cds_seq ||= get_cds_seq
end

#cds_sizeObject



129
130
131
132
133
# File 'lib/gtf/gene.rb', line 129

def cds_size
  cds.inject(0) do |sum,reg|
    sum += reg.size
  end
end

#contains?(pos) ⇒ Boolean

Returns:

  • (Boolean)


267
268
269
# File 'lib/gtf/gene.rb', line 267

def contains? pos
  start <= pos && stop >= pos
end

#exonsObject



276
277
278
# File 'lib/gtf/gene.rb', line 276

def exons
  @exons ||= @intervals.select{|e| e.feature == "exon"}
end

#inspectObject



282
283
284
# File 'lib/gtf/gene.rb', line 282

def inspect
  "#<#{self.class}:0x#{'%x' % (object_id << 1)} @name=#{@name} @intervals=#{@intervals.count}>"
end

#intron_frame(intron) ⇒ Object



112
113
114
115
116
117
118
119
# File 'lib/gtf/gene.rb', line 112

def intron_frame intron
  # find the terminal frame of the leading exon
  if strand == "+"
    (intron.prev_exon.frame + intron.prev_exon.size)%3
  else
    intron.post_exon.frame
  end
end

#intron_pos(intron) ⇒ Object



211
212
213
# File 'lib/gtf/gene.rb', line 211

def intron_pos intron
  { :type => :intron, :pos => cds_pos(intron.start-1), :frame => intron_frame(intron) }
end

#is_ccds?Boolean

Returns:

  • (Boolean)


125
126
127
# File 'lib/gtf/gene.rb', line 125

def is_ccds?
  respond_to?(:ccds_id) && ccds_id != nil
end

#protein_change(mutation) ⇒ Object



173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# File 'lib/gtf/gene.rb', line 173

def protein_change mutation
  # replace the positions that overlap the mutation
  tnucs = trinucs.select do |tn|
    tn.pos.any? do |p|
      p.overlaps? mutation
    end
  end
  return nil if tnucs.empty?
  muts = tnucs.map do |tn|
    seq = tn.seq.to_s
    3.times do |i|
      next unless mutation.overlaps? tn.pos[i]
      seq[i] = mutation.alt_at(tn.pos[i])
      seq[i] = seq[i].tr('ATGC', 'TACG') if strand == "-"
    end
    TriNuc.new seq, tn.pos, strand
  end
  pre = tnucs.map do |tn|
    tn.codon.aa.letter
  end.join ''
  post = muts.map do |tn|
    tn.codon.aa.letter
  end.join ''
  "#{pre}#{tnucs.first.index+1}#{post}"
end

#protein_seqObject



160
161
162
163
164
# File 'lib/gtf/gene.rb', line 160

def protein_seq
  trinucs.map do |t|
    t.codon.aa.letter
  end.join ''
end

#protein_seq_at(locus) ⇒ Object



166
167
168
169
170
171
# File 'lib/gtf/gene.rb', line 166

def protein_seq_at locus
  trinucs.map do |t|
    # Just include it if it overlaps the locus
    t.codon.aa.letter if t.pos.any? {|p| p.overlaps? locus}
  end.compact.join ''
end

#respond_to_missing?(sym, include_all = false) ⇒ Boolean

Returns:

  • (Boolean)


259
260
261
# File 'lib/gtf/gene.rb', line 259

def respond_to_missing? sym, include_all=false
  @transcript.respond_to?(sym) || super
end

#site(pos) ⇒ Object



95
96
97
98
99
100
101
102
103
104
105
# File 'lib/gtf/gene.rb', line 95

def site pos
  i = @transcript.clone do |c|
    c.start = c.stop = pos
  end
  intron = nil
  overlaps = @intervals.select{|f| f.contains? i }
  return cds_pos i if overlaps.find{|f| f.feature == "cds" }
  return intron_pos intron if intron = overlaps.find{|f| f.feature == "intron" }
  return utr_pos if overlaps.find{|f| f.feature =~ /UTR/ }
  { :type => :transcript }
end

#to_refflatObject

output this transcript in the odious ‘refFlat’ format, demanded by Picard and others



286
287
288
289
290
291
# File 'lib/gtf/gene.rb', line 286

def to_refflat
  [ gene_name, name, seqname, strand, start, stop, cds.map(&:start).min, cds.map(&:stop).max, exons.count,
    exons.map(&:start).sort.join(','),
    exons.map(&:stop).sort.join(',')
  ].join "\t"
end

#transcript_startObject



270
271
272
273
274
275
# File 'lib/gtf/gene.rb', line 270

def transcript_start
  @transcript_start ||= @transcript.clone do |c|
    c.start = c.stop = (strand == "+" ? start : stop)
    c.feature = "transcript_start"
  end
end

#trinucsObject



199
200
201
# File 'lib/gtf/gene.rb', line 199

def trinucs
  @trinucs ||= trinucs_for cds_seq, cds_pos
end

#trinucs_for(cds_seq, cds_pos) ⇒ Object



203
204
205
206
207
208
209
# File 'lib/gtf/gene.rb', line 203

def trinucs_for cds_seq, cds_pos
  aa_count = cds_seq.size / 3
  aa_count.times.map do |i|
    range = 3 * i .. 3*i + 2
    TriNuc.new cds_seq[range], cds_pos[range], strand, i
  end
end

#utr3Object



215
216
217
218
219
220
221
222
223
224
# File 'lib/gtf/gene.rb', line 215

def utr3
  return @utr3 if @utr3
  cs = strand == "+" ? cds.first : cds.last
  @utr3 = exons.select{ |e| strand == "+" ? !e.above?(cs) : !e.below?(cs) }
    .map{|e| e.strict_diff(cs) }
    .compact.map(&:to_a)
  @utr3.each do |u|
    u.feature = "3' UTR"
  end
end

#utr5Object



226
227
228
229
230
231
232
233
234
235
# File 'lib/gtf/gene.rb', line 226

def utr5
  return @utr5 if @utr5
  cs = strand == "+" ? cds.last : cds.first
  @utr5 = exons.select{|e| strand == "+" ? !e.below?(cs) : !e.above?(cs) }
    .map{|e| e.strict_diff(cs)}
    .compact.map(&:to_a)
  @utr5.each do |u|
    u.feature = "5' UTR"
  end
end

#utr_posObject



108
109
110
# File 'lib/gtf/gene.rb', line 108

def utr_pos
  { :type => :utr }
end