Class: Bio::DB::Pileup
- Inherits:
-
Object
- Object
- Bio::DB::Pileup
- Defined in:
- lib/bio/db/pileup.rb
Instance Attribute Summary collapse
-
#ar1 ⇒ Object
Returns the value of attribute ar1.
-
#ar2 ⇒ Object
Returns the value of attribute ar2.
-
#ar3 ⇒ Object
Returns the value of attribute ar3.
-
#consensus_quality ⇒ Object
Returns the value of attribute consensus_quality.
-
#coverage ⇒ Object
Returns the value of attribute coverage.
-
#indel_1 ⇒ Object
Returns the value of attribute indel_1.
-
#indel_2 ⇒ Object
Returns the value of attribute indel_2.
-
#pos ⇒ Object
Returns the value of attribute pos.
-
#read_bases ⇒ Object
Returns the value of attribute read_bases.
-
#read_quals ⇒ Object
Returns the value of attribute read_quals.
-
#ref_base ⇒ Object
Returns the value of attribute ref_base.
-
#ref_name ⇒ Object
Returns the value of attribute ref_name.
-
#rms_mapq ⇒ Object
Returns the value of attribute rms_mapq.
-
#snp_quality ⇒ Object
Returns the value of attribute snp_quality.
Class Method Summary collapse
Instance Method Summary collapse
-
#allele_freq ⇒ Object
returns the frequency of all bases in pileup position.
- #base_coverage ⇒ Object
- #bases ⇒ Object
-
#consensus ⇒ Object
returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string of all equally represented bases in alphabetical order.
-
#consensus_iuap(minumum_ratio_for_iup_consensus) ⇒ Object
returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string of all equally represented bases in alphabetical order.
- #genotype_list ⇒ Object
-
#initialize(pile_up_line) ⇒ Pileup
constructor
creates the Pileup object pile_up_line = “seq2t151tGtGt36t0t99t12t.….……At:9<;;7=<<<<<” pile = Bio::DB::Pileup.new(pile_up_line).
-
#non_ref_count ⇒ Object
returns the total non-reference bases in the reads at this position.
-
#non_refs ⇒ Object
Calculate the total count of each non-reference nucleotide and return a hash of all 4 nt counts returns a hash pile.non_refs #=> 1, :C => 0, :T => 0, :G => 0.
-
#parse_indel(alt) ⇒ Object
identifies if the indel is an insertion or a deletion.
-
#ref_count ⇒ Object
returns the count of reference-bases in the reads at this position.
-
#to_s ⇒ Object
returns pileup format line.
-
#to_vcf ⇒ Object
returns basic VCF string as per samtools/misc sam2vcf.pl except that it scrimps on the ref for indels, returning a ‘*’ instead of the reference allele.
Constructor Details
#initialize(pile_up_line) ⇒ Pileup
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/bio/db/pileup.rb', line 35 def initialize(pile_up_line) cols = pile_up_line.split(/\t/) @consensus = nil @consensus_quality = nil @read_quals = nil @bases = nil @allele_frequency = nil @consensus_iuap = nil if cols.length == 6 ##should only be able to get 6 lines from mpileup @ref_name, @pos, @ref_base, @coverage, @read_bases, @read_quals = cols elsif (10..13).include?(cols.length) ##incase anyone tries to use deprecated pileup with -c flag we get upto 13 cols... if cols[2] == '*' #indel @ref_name, @pos, @ref_base, @consensus, @consensus_quality, @snp_quality, @rms_mapq, @coverage, @indel_1, @indel_2, @ar1, @ar2, @ar3 = cols else #snp / identity @ref_name, @pos, @ref_base, @consensus, @consensus_quality, @snp_quality, @rms_mapq, @coverage, @read_bases, @read_quals = cols end @consensus_quality = @consensus_quality.to_f @snp_quality = @snp_quality.to_f @rms_mapq = @rms_mapq.to_f else #raise RuntimeError, "parsing line '#{pile_up_line.chomp}' failed" end @pos = @pos.to_i @coverage = @coverage.to_f @ref_count = nil @non_ref_count_hash = nil @non_ref_count = nil end |
Instance Attribute Details
#ar1 ⇒ Object
Returns the value of attribute ar1.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def ar1 @ar1 end |
#ar2 ⇒ Object
Returns the value of attribute ar2.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def ar2 @ar2 end |
#ar3 ⇒ Object
Returns the value of attribute ar3.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def ar3 @ar3 end |
#consensus_quality ⇒ Object
Returns the value of attribute consensus_quality.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def consensus_quality @consensus_quality end |
#coverage ⇒ Object
Returns the value of attribute coverage.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def coverage @coverage end |
#indel_1 ⇒ Object
Returns the value of attribute indel_1.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def indel_1 @indel_1 end |
#indel_2 ⇒ Object
Returns the value of attribute indel_2.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def indel_2 @indel_2 end |
#pos ⇒ Object
Returns the value of attribute pos.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def pos @pos end |
#read_bases ⇒ Object
Returns the value of attribute read_bases.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def read_bases @read_bases end |
#read_quals ⇒ Object
Returns the value of attribute read_quals.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def read_quals @read_quals end |
#ref_base ⇒ Object
Returns the value of attribute ref_base.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def ref_base @ref_base end |
#ref_name ⇒ Object
Returns the value of attribute ref_name.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def ref_name @ref_name end |
#rms_mapq ⇒ Object
Returns the value of attribute rms_mapq.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def rms_mapq @rms_mapq end |
#snp_quality ⇒ Object
Returns the value of attribute snp_quality.
30 31 32 |
# File 'lib/bio/db/pileup.rb', line 30 def snp_quality @snp_quality end |
Class Method Details
.iupac_to_base(alt_base) ⇒ Object
187 188 189 190 191 192 193 194 195 196 197 |
# File 'lib/bio/db/pileup.rb', line 187 def Pileup.iupac_to_base(alt_base) case alt_base when 'K' then ['G','T'] when 'M' then ['A','C'] when 'S' then ['C','G'] when 'R' then ['A','G'] when 'W' then ['A','T'] when 'Y' then ['C','T'] else alt_base.split(//) end end |
Instance Method Details
#allele_freq ⇒ Object
returns the frequency of all bases in pileup position
240 241 242 243 244 245 246 247 248 |
# File 'lib/bio/db/pileup.rb', line 240 def allele_freq return @allele_frequency if @allele_frequency bases = self.bases @allele_frequency = Hash.new bases.each do |k,v| @allele_frequency[k] = v.to_f/self.base_coverage.to_f end @allele_frequency end |
#base_coverage ⇒ Object
231 232 233 234 235 236 237 |
# File 'lib/bio/db/pileup.rb', line 231 def base_coverage total = 0 @bases.each do |k,v| total += v end total end |
#bases ⇒ Object
223 224 225 226 227 228 229 |
# File 'lib/bio/db/pileup.rb', line 223 def bases return @bases if @bases @bases = self.non_refs #puts self.ref_count @bases[self.ref_base.upcase.to_sym] = self.ref_count @bases end |
#consensus ⇒ Object
returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string of all equally represented bases in alphabetical order
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 |
# File 'lib/bio/db/pileup.rb', line 91 def consensus if @consensus.nil? max = self.non_refs.values.max #if the ref base is in more than half the coverage.. if (self.ref_count / self.coverage) > 0.5 #..then the ref base is the concensus @consensus = self.ref_base ##not sure if the following will ever apply as the non_refs method also returns the ref base count, hence can never be over the max count #elsif self.ref_count > max # @consensus = self.ref_base else #get the base(s) and count(s) that has the max count arr = self.non_refs.select {|k,v| v == max } #just get the bases (remove the counts) bases = arr.collect {|b| b[0].to_s } #add the ref base if the ref base has a max count (commenting this out as it should already be in) #bases << self.ref_base if self.ref_count == max @consensus = bases.sort.join end end @consensus end |
#consensus_iuap(minumum_ratio_for_iup_consensus) ⇒ Object
returns the consensus (most frequent) base from the pileup, if there are equally represented bases returns a string of all equally represented bases in alphabetical order
251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 |
# File 'lib/bio/db/pileup.rb', line 251 def consensus_iuap(minumum_ratio_for_iup_consensus) tmp = [] if @consensus_iuap.nil? @consensus_iuap = self.ref_base.downcase bases = self.bases #tmp = String.new bases.each do |k,v| tmp << k[0].to_s if v/self.coverage.to_f > minumum_ratio_for_iup_consensus end if tmp.length > 0 tmp = tmp.collect{ |x| Bio::Sequence::NA.new(x) } # creates alignment object a = Bio::Alignment.new(tmp) # shows IUPAC consensus @consensus_iuap = a.consensus_iupac end end @consensus_iuap end |
#genotype_list ⇒ Object
177 178 179 180 181 182 183 |
# File 'lib/bio/db/pileup.rb', line 177 def genotype_list if self.ref_base == '*' return indel_gt else return snp_gt end end |
#non_ref_count ⇒ Object
returns the total non-reference bases in the reads at this position
75 76 77 78 79 80 |
# File 'lib/bio/db/pileup.rb', line 75 def non_ref_count if @non_ref_count.nil? @non_ref_count = @read_bases.count("ATGCatgc").to_f end @non_ref_count end |
#non_refs ⇒ Object
Calculate the total count of each non-reference nucleotide and return a hash of all 4 nt counts returns a hash pile.non_refs #=> 1, :C => 0, :T => 0, :G => 0
67 68 69 70 71 72 |
# File 'lib/bio/db/pileup.rb', line 67 def non_refs if @non_ref_count_hash.nil? @non_ref_count_hash = {:A => self.read_bases.count("Aa"), :C => self.read_bases.count("Cc"), :G => self.read_bases.count("Gg"), :T => self.read_bases.count("Tt")} end @non_ref_count_hash end |
#parse_indel(alt) ⇒ Object
identifies if the indel is an insertion or a deletion
200 201 202 203 204 205 206 207 |
# File 'lib/bio/db/pileup.rb', line 200 def parse_indel(alt) return "D#{$'.length}" if alt =~/^-/ if alt=~/^\+/ return "I#{$'}" elsif alt == '*' return nil end end |
#ref_count ⇒ Object
returns the count of reference-bases in the reads at this position
83 84 85 86 87 88 |
# File 'lib/bio/db/pileup.rb', line 83 def ref_count if @ref_count.nil? @ref_count = self.read_bases.count(".,") end @ref_count end |
#to_s ⇒ Object
returns pileup format line
211 212 213 214 215 216 217 218 219 220 |
# File 'lib/bio/db/pileup.rb', line 211 def to_s if @read_quals and !@consensus_quality #6col [@ref_name, @pos, @ref_base, @coverage.to_i, @read_bases, @read_quals].join("\t") elsif @indel_1 #13 cols [@ref_name, @pos, @ref_base, @consensus, @consensus_quality.to_i, @snp_quality.to_i, @rms_mapq.to_i, @coverage.to_i, @indel_1, @indel_2, @ar1, @ar2, @ar3].join("\t") else #10 cols [@ref_name, @pos, @ref_base, @consensus, @consensus_quality.to_i, @snp_quality.to_i, @rms_mapq.to_i, @coverage.to_i, @read_bases, @read_quals].join("\t") end end |
#to_vcf ⇒ Object
returns basic VCF string as per samtools/misc sam2vcf.pl except that it scrimps on the ref for indels, returning a ‘*’ instead of the reference allele
115 116 117 118 119 120 121 122 123 124 |
# File 'lib/bio/db/pileup.rb', line 115 def to_vcf alt,g = self.genotype_list alt = self.consensus.split(//).join(',') unless self.ref_base == '*' alt = '.' if alt == self.ref_base alt = alt.split(',') #if the reference base is in alt, remove it alt.delete(self.ref_base.to_s) alt = alt.join(',') [self.ref_name, self.pos, '.', self.ref_base, alt, self.snp_quality.to_i, "0", "DP=#{self.coverage.to_i}", "GT:GQ:DP", "#{g}:#{self.consensus_quality.to_i}:#{self.coverage.to_i}" ].join("\t") end |