Class: Cheripic::Pileup

Inherits:
Bio::DB::Pileup
  • Object
show all
Defined in:
lib/cheripic/pileup.rb

Overview

An extension of Bio::DB::Pileup object to process pileup information at a given position

Instance Method Summary collapse

Constructor Details

#initialize(string) ⇒ Pileup

creates a Pileup object using a pileup information as string

Parameters:

  • string (String)

    pileup information line for a given position



15
16
17
18
19
# File 'lib/cheripic/pileup.rb', line 15

def initialize(string)
  super(string)
  adj_read_bases
  @indelbases = 'acgtryswkmbdhvnACGTRYSWKMBDHVN'
end

Instance Method Details

#adj_read_basesObject

removes mapping quality information



22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/cheripic/pileup.rb', line 22

def adj_read_bases
  # mapping quality after '^' symbol is substituted
  # to avoid splitting at non indel + or - characters
  # read ends marking by '$' symbol is substituted
  # insertion and deletion marking by '*' symbol is substituted
  self.read_bases.gsub!(/\^./, '')
  self.read_bases.delete! '$'
  self.read_bases.delete! '*'
  # warn about reads with ambiguous codes
  # if self.read_bases.match(/[^atgcATGC,\.\+\-0-9]/)
  #   warn "Ambiguous nucleotide\t#{self.read_bases}"
  # end
end

#bases_hashObject

count bases matching reference and non-reference from snp variant and make a hash of bases with counts for indels return the read bases information instead



39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/cheripic/pileup.rb', line 39

def bases_hash
  if self.read_bases =~ /\+/
    bases_hash = indels_to_hash('+')
  elsif self.read_bases =~ /-/
    bases_hash = indels_to_hash('-')
  else
    bases_hash = snp_base_hash(self.read_bases)
  end
  # some indels will have ref base in the read and using
  # sum of hash values is going to give wrong additional coverage
  # from indels so including actual coverage from pileup
  # bases_hash keys are :A, :C, :G, :T, :N, :ref and :indel
  bases_hash
end

#count_indel_bases(delimiter) ⇒ Object

count bases from indels array of pileup bases is split at + / - and number after each + / - is counted



57
58
59
60
61
62
63
64
65
66
# File 'lib/cheripic/pileup.rb', line 57

def count_indel_bases(delimiter)
  array = self.read_bases.split(delimiter)
  number = 0
  array.shift
  array.each do |element|
    # deletions in reference could contain ambiguous codes,
    number += /^(\d+)[#{@indelbases}]/.match(element)[1].to_i
  end
  number
end

#is_varObject

check if the pileup has the parameters we are looking for



83
84
85
86
87
88
89
90
91
92
# File 'lib/cheripic/pileup.rb', line 83

def is_var
  ignore_reference_n = Options.ignore_reference_n
  min_depth  = Options.mindepth
  min_non_ref_count = Options.min_non_ref_count

  return false if self.ref_base == '*'
  return false if ignore_reference_n and self.ref_base =~ /^[nN]$/
  return true if self.coverage >= min_depth and self.non_ref_count >= min_non_ref_count
  false
end

#non_ref_countObject

count bases matching reference and non-reference and calculate ratio of non_ref allele to total bases



70
71
72
73
74
75
76
77
78
79
80
# File 'lib/cheripic/pileup.rb', line 70

def non_ref_count
  read_bases = self.read_bases
  if read_bases =~ /\+/
    non_ref_count = indel_non_ref_count('+')
  elsif read_bases =~ /-/
    non_ref_count = indel_non_ref_count('-')
  else
    non_ref_count = read_bases.count('atgcATGC')
  end
  non_ref_count
end

#non_ref_ratioObject

count bases matching reference and non-reference and calculate ratio of non_ref allele to total bases



96
97
98
# File 'lib/cheripic/pileup.rb', line 96

def non_ref_ratio
  self.non_ref_count.to_f / self.coverage.to_f
end

#var_base_fracObject

form hash of base information, [ATGC] counts for snp a hash of base proportion is calculated base proportion hash below a selected depth is empty base proportion below or equal to a noise factor are discarded



118
119
120
121
122
123
124
125
126
127
128
129
130
# File 'lib/cheripic/pileup.rb', line 118

def var_base_frac
  hash = self.bases_hash
  snp_hash = {}
  coverage = self.coverage
  return snp_hash if coverage < Options.mindepth
  # calculate proportion of each base in coverage
  hash.each_key do | base |
    freq = hash[base].to_f/coverage.to_f
    next if freq <= Options.noise
    snp_hash[base] = freq
  end
  snp_hash
end