Class: Bio::DB::Fasta::Region

Inherits:
Object
  • Object
show all
Defined in:
lib/bio/db/fastadb.rb

Overview

Class to wrap a region of a chromosome

Constant Summary collapse

BASE_COUNT_ZERO =
{:A => 0, :C => 0, :G => 0,  :T => 0}

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(args = {}) ⇒ Region

Returns a new instance of Region.



75
76
77
78
79
80
# File 'lib/bio/db/fastadb.rb', line 75

def initialize(args ={})
  @entry = args[:entry]
  @start = args[:start]
  @end = args[:end]
  @orientation = args[:orientation]
end

Instance Attribute Details

#allele_freqObject Also known as: base_ratios

Returns the value of attribute allele_freq.



73
74
75
# File 'lib/bio/db/fastadb.rb', line 73

def allele_freq
  @allele_freq
end

#average_coverageObject

Returns the value of attribute average_coverage.



73
74
75
# File 'lib/bio/db/fastadb.rb', line 73

def average_coverage
  @average_coverage
end

#basesObject

Returns the value of attribute bases.



73
74
75
# File 'lib/bio/db/fastadb.rb', line 73

def bases
  @bases
end

#calledObject

Returns the value of attribute called.



73
74
75
# File 'lib/bio/db/fastadb.rb', line 73

def called
  @called
end

#consensusObject

Returns the value of attribute consensus.



73
74
75
# File 'lib/bio/db/fastadb.rb', line 73

def consensus
  @consensus
end

#coveragesObject

Returns the value of attribute coverages.



73
74
75
# File 'lib/bio/db/fastadb.rb', line 73

def coverages
  @coverages
end

#endObject

Returns the value of attribute end.



71
72
73
# File 'lib/bio/db/fastadb.rb', line 71

def end
  @end
end

#entryObject

Returns the value of attribute entry.



71
72
73
# File 'lib/bio/db/fastadb.rb', line 71

def entry
  @entry
end

#orientationObject

Returns the value of attribute orientation.



71
72
73
# File 'lib/bio/db/fastadb.rb', line 71

def orientation
  @orientation
end

#pileupObject

Returns the value of attribute pileup.



73
74
75
# File 'lib/bio/db/fastadb.rb', line 73

def pileup
  @pileup
end

#referenceObject

Returns the value of attribute reference.



73
74
75
# File 'lib/bio/db/fastadb.rb', line 73

def reference
  @reference
end

#snpsObject

Returns the value of attribute snps.



73
74
75
# File 'lib/bio/db/fastadb.rb', line 73

def snps
  @snps
end

#startObject

Returns the value of attribute start.



71
72
73
# File 'lib/bio/db/fastadb.rb', line 71

def start
  @start
end

#total_covObject

Returns the value of attribute total_cov.



73
74
75
# File 'lib/bio/db/fastadb.rb', line 73

def total_cov
  @total_cov
end

Class Method Details

.parse_region(reg_str) ⇒ Object

Returns a region object from a string in form “name:start-end”

Raises:



144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/bio/db/fastadb.rb', line 144

def self.parse_region(reg_str)
  string = reg_str.delete("'")
  fields_1 = string.split(":")
  fields_2 = fields_1[1].split("-")
  raise FastaDBException.new(), "Invalid region. #{string}" if fields_1.length != 2 || fields_2.length != 2

  reg = Region.new(:entry=> fields_1[0], :start=>fields_2[0].to_i, :end=>fields_2[1].to_i)

  if reg.end < reg.start 
    reg.orientation = :reverse
  else
    reg.orientation = :forward
  end
  reg
end

Instance Method Details

#allele_freq_for_base(base) ⇒ Object Also known as: base_ratios_for_base

TODO: Debug, as it hasnt been tested in the actual code.



86
87
88
89
90
91
92
93
94
95
96
# File 'lib/bio/db/fastadb.rb', line 86

def allele_freq_for_base(base)
  @all_ratios = Hash.new unless @all_ratios
  unless @all_ratios[base]
    ratios = Array.new
    for i in (0..region.size-1)
      ratios << @allele_freq[i][base]
    end
    @all_ratios[base] = ratios
  end
  @all_ratios[base]
end

#calculate_stats_from_pile(opts = {}) ⇒ Object

Calculates the concensus, base ratios, coverages and total coverages in the region

  • min_cov minimum coverage to make a call (default 0)

  • min_per minimum representation to make make a call. If more than one base

can be called, the IUAPC ambiguity code is returned


105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# File 'lib/bio/db/fastadb.rb', line 105

def calculate_stats_from_pile(opts={})
  min_cov = opts[:min_cov] ? opts[:min_cov] : 0
  min_per =  opts[:min_per] ? opts[:min_per] : 0.20
  self.called = 0
  reference = self.reference.downcase

  self.allele_freq = Array.new(self.size, BASE_COUNT_ZERO) 
  self.bases = Array.new(self.size, BASE_COUNT_ZERO) 
  self.coverages = Array.new(self.size, 0)
  self.total_cov = 0

  self.pileup.each do | pile |

    if pile.coverage > min_cov
      self.allele_freq[pile.pos - self.start ] = pile.allele_freq
      reference[pile.pos - self.start   ] = pile.consensus_iuap(min_per).upcase
      self.coverages[pile.pos - self.start   ]  = pile.coverage.to_i
      self.bases[pile.pos - self.start       ]  = pile.bases
      self.called += 1 
    end
    #puts "#{pile.pos}\t#{bef}\t#{reference[pile.pos - region.start  - 1 ]} "
    self.total_cov += pile.coverage
  end

  self.consensus = Bio::Sequence.new(reference)
  self.consensus.na
  if self.orientation == :reverse
    self.consensus.reverse_complement!()
  end
  self.average_coverage = self.total_cov.to_f/self.size.to_f
  self
end

#sizeObject Also known as: length

Length of the region



161
162
163
# File 'lib/bio/db/fastadb.rb', line 161

def size
  @end - @start
end

#to_sObject



138
139
140
141
# File 'lib/bio/db/fastadb.rb', line 138

def to_s
  string = @entry + ":" + @start.to_s + "-" + @end.to_s 
  string
end