Class: Bio::DB::Pileup

Inherits:
Object
  • Object
show all
Defined in:
lib/bio/db/pileup.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(pile_up_line) ⇒ Pileup

creates the Pileup object

pile_up_line = "seq2\t151\tG\tG\t36\t0\t99\t12\t...........A\t:9<;;7=<<<<<"
pile = Bio::DB::Pileup.new(pile_up_line)

35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/bio/db/pileup.rb', line 35

def initialize(pile_up_line)
  cols = pile_up_line.split(/\t/)
  @consensus = nil
  @consensus_quality = nil
  @read_quals = nil
  @bases = nil
  @allele_frequency = nil
  @consensus_iuap = nil
  if cols.length == 6 ##should only be able to get 6 lines from mpileup
    @ref_name, @pos, @ref_base, @coverage, @read_bases, @read_quals = cols
  elsif (10..13).include?(cols.length) ##incase anyone tries to use deprecated pileup with -c flag we get upto 13 cols...
    if cols[2] == '*' #indel
      @ref_name, @pos, @ref_base, @consensus, @consensus_quality, @snp_quality, @rms_mapq, @coverage, @indel_1, @indel_2, @ar1, @ar2, @ar3 = cols
    else #snp / identity
      @ref_name, @pos, @ref_base, @consensus, @consensus_quality, @snp_quality, @rms_mapq, @coverage, @read_bases, @read_quals = cols
    end
    @consensus_quality = @consensus_quality.to_f
    @snp_quality = @snp_quality.to_f
    @rms_mapq = @rms_mapq.to_f
  else
    #raise RuntimeError, "parsing line '#{pile_up_line.chomp}' failed"
  end
    
  @pos = @pos.to_i
  @coverage = @coverage.to_f
  @ref_count = nil
  @non_ref_count_hash = nil 
  @non_ref_count = nil
end

Instance Attribute Details

#ar1Object

Returns the value of attribute ar1


30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def ar1
  @ar1
end

#ar2Object

Returns the value of attribute ar2


30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def ar2
  @ar2
end

#ar3Object

Returns the value of attribute ar3


30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def ar3
  @ar3
end

#consensus_qualityObject

Returns the value of attribute consensus_quality


30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def consensus_quality
  @consensus_quality
end

#coverageObject

Returns the value of attribute coverage


30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def coverage
  @coverage
end

#indel_1Object

Returns the value of attribute indel_1


30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def indel_1
  @indel_1
end

#indel_2Object

Returns the value of attribute indel_2


30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def indel_2
  @indel_2
end

#posObject

Returns the value of attribute pos


30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def pos
  @pos
end

#read_basesObject

Returns the value of attribute read_bases


30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def read_bases
  @read_bases
end

#read_qualsObject

Returns the value of attribute read_quals


30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def read_quals
  @read_quals
end

#ref_baseObject

Returns the value of attribute ref_base


30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def ref_base
  @ref_base
end

#ref_nameObject

Returns the value of attribute ref_name


30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def ref_name
  @ref_name
end

#rms_mapqObject

Returns the value of attribute rms_mapq


30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def rms_mapq
  @rms_mapq
end

#snp_qualityObject

Returns the value of attribute snp_quality


30
31
32
# File 'lib/bio/db/pileup.rb', line 30

def snp_quality
  @snp_quality
end

Class Method Details

.iupac_to_base(alt_base) ⇒ Object


187
188
189
190
191
192
193
194
195
196
197
# File 'lib/bio/db/pileup.rb', line 187

def Pileup.iupac_to_base(alt_base)
    case alt_base
          when 'K' then ['G','T']
          when 'M' then ['A','C']
          when 'S' then ['C','G']
          when 'R' then ['A','G']
          when 'W' then ['A','T']
          when 'Y' then ['C','T']
          else alt_base.split(//)
    end
end

Instance Method Details

#allele_freqObject

returns the frequency of all bases in pileup position


240
241
242
243
244
245
246
247
248
# File 'lib/bio/db/pileup.rb', line 240

def allele_freq
  return @allele_frequency if @allele_frequency
  bases = self.bases
  @allele_frequency = Hash.new
  bases.each do |k,v| 
    @allele_frequency[k] = v.to_f/self.base_coverage.to_f 
  end
  @allele_frequency
end

#base_coverageObject


231
232
233
234
235
236
237
# File 'lib/bio/db/pileup.rb', line 231

def base_coverage
  total = 0
  @bases.each do |k,v|
    total += v  
  end
  total
end

#basesObject


223
224
225
226
227
228
229
# File 'lib/bio/db/pileup.rb', line 223

def bases
   return @bases if @bases
   @bases = self.non_refs
   #puts self.ref_count
   @bases[self.ref_base.upcase.to_sym] = self.ref_count 
   @bases
end

#consensusObject

returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string of all equally represented bases in alphabetical order


91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/bio/db/pileup.rb', line 91

def consensus
    if @consensus.nil?
      max = self.non_refs.values.max
      #if the ref base is in more than half the coverage..
      if (self.ref_count / self.coverage) > 0.5
        #..then the ref base is the concensus
        @consensus = self.ref_base
      ##not sure if the following will ever apply as the non_refs method also returns the ref base count, hence can never be over the max count  
      #elsif self.ref_count > max
      #  @consensus = self.ref_base
      else
        #get the base(s) and count(s) that has the max count
        arr = self.non_refs.select {|k,v| v == max }
        #just get the bases (remove the counts)
        bases = arr.collect {|b| b[0].to_s }
        #add the ref base if the ref base has a max count (commenting this out as it should already be in)
        #bases << self.ref_base if self.ref_count == max
        @consensus = bases.sort.join
      end
    end
    @consensus
end

#consensus_iuap(minumum_ratio_for_iup_consensus) ⇒ Object

returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string of all equally represented bases in alphabetical order


251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
# File 'lib/bio/db/pileup.rb', line 251

def consensus_iuap(minumum_ratio_for_iup_consensus)
  
  tmp = []
  if @consensus_iuap.nil?
    @consensus_iuap = self.ref_base.downcase
    bases = self.bases
    #tmp = String.new
    bases.each do |k,v|
      tmp << k[0].to_s if v/self.coverage.to_f > minumum_ratio_for_iup_consensus
    end
    if tmp.length > 0
      tmp = tmp.collect{ |x| Bio::Sequence::NA.new(x) }
       # creates alignment object
       a = Bio::Alignment.new(tmp)
       # shows IUPAC consensus
      @consensus_iuap = a.consensus_iupac
    end
  end 
  @consensus_iuap
end

#genotype_listObject


177
178
179
180
181
182
183
# File 'lib/bio/db/pileup.rb', line 177

def genotype_list
  if self.ref_base == '*'
    return indel_gt
  else
   return snp_gt
  end
end

#non_ref_countObject

returns the total non-reference bases in the reads at this position


75
76
77
78
79
80
# File 'lib/bio/db/pileup.rb', line 75

def non_ref_count
  if @non_ref_count.nil?
    @non_ref_count = @read_bases.count("ATGCatgc").to_f
  end
  @non_ref_count
end

#non_refsObject

Calculate the total count of each non-reference nucleotide and return a hash of all 4 nt counts returns a hash pile.non_refs #=> 1, :C => 0, :T => 0, :G => 0


67
68
69
70
71
72
# File 'lib/bio/db/pileup.rb', line 67

def non_refs
  if @non_ref_count_hash.nil?
     @non_ref_count_hash = {:A => self.read_bases.count("Aa"), :C => self.read_bases.count("Cc"), :G => self.read_bases.count("Gg"), :T => self.read_bases.count("Tt")}
  end
    @non_ref_count_hash
end

#parse_indel(alt) ⇒ Object

identifies if the indel is an insertion or a deletion


200
201
202
203
204
205
206
207
# File 'lib/bio/db/pileup.rb', line 200

def parse_indel(alt)
  return "D#{$'.length}" if alt =~/^-/ 
  if alt=~/^\+/
    return "I#{$'}"
  elsif alt == '*' 
    return nil
  end
end

#ref_countObject

returns the count of reference-bases in the reads at this position


83
84
85
86
87
88
# File 'lib/bio/db/pileup.rb', line 83

def ref_count
  if @ref_count.nil?
    @ref_count = self.read_bases.count(".,")
  end
  @ref_count
end

#to_sObject

returns pileup format line


211
212
213
214
215
216
217
218
219
220
# File 'lib/bio/db/pileup.rb', line 211

def to_s
  if @read_quals and !@consensus_quality #6col
    [@ref_name, @pos, @ref_base, @coverage.to_i, @read_bases, @read_quals].join("\t")    
  elsif @indel_1 #13 cols
    [@ref_name, @pos, @ref_base, @consensus, @consensus_quality.to_i, @snp_quality.to_i, @rms_mapq.to_i, @coverage.to_i, @indel_1, @indel_2, @ar1, @ar2, @ar3].join("\t")
  else #10 cols
    [@ref_name, @pos, @ref_base, @consensus, @consensus_quality.to_i, @snp_quality.to_i, @rms_mapq.to_i, @coverage.to_i, @read_bases, @read_quals].join("\t")
  end
    
end

#to_vcfObject

returns basic VCF string as per samtools/misc sam2vcf.pl except that it scrimps on the ref for indels, returning a '*' instead of the reference allele


115
116
117
118
119
120
121
122
123
124
# File 'lib/bio/db/pileup.rb', line 115

def to_vcf
  alt,g = self.genotype_list 
  alt = self.consensus.split(//).join(',') unless self.ref_base == '*'
  alt = '.' if alt == self.ref_base
  alt = alt.split(',')
  #if the reference base is in alt, remove it
  alt.delete(self.ref_base.to_s)
  alt = alt.join(',')
  [self.ref_name, self.pos, '.', self.ref_base, alt, self.snp_quality.to_i, "0", "DP=#{self.coverage.to_i}", "GT:GQ:DP", "#{g}:#{self.consensus_quality.to_i}:#{self.coverage.to_i}" ].join("\t")
end