Class: Bio::DB::Fasta::FastaFile
- Inherits:
-
Object
- Object
- Bio::DB::Fasta::FastaFile
- Defined in:
- lib/bio/db/fastadb.rb
Overview
Class that holds the fasta file. It is used as a database.
Instance Attribute Summary collapse
-
#fasta_path ⇒ Object
readonly
Returns the value of attribute fasta_path.
Instance Method Summary collapse
-
#faidx(opts = {}) ⇒ Object
Index reference sequence in the FASTA format or extract subsequence from indexed reference sequence.
-
#fetch_sequence(region) ⇒ Object
The region needs to have a method to_region or a method to_s that ha the format “chromosome:start-end” as in samtools.
- #fetch_sequence_native(region) ⇒ Object
- #fetch_sequence_samtools(region) ⇒ Object
- #index ⇒ Object
-
#initialize(fasta: nil, samtools: false) ⇒ FastaFile
constructor
Initialize the fasta file.
-
#load_fai_entries ⇒ Object
Loads the fai entries.
Constructor Details
#initialize(fasta: nil, samtools: false) ⇒ FastaFile
Initialize the fasta file. If the fai file doesn't exists, it is generated at startup
-
fasta path to the fasta file
-
samtools path to samtools, if it is not provided, use the bundled version
190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
# File 'lib/bio/db/fastadb.rb', line 190 def initialize(fasta: nil, samtools: false) #puts "The arguments are: '#{fasta}':'#{samtools}'" @fasta_path = fasta @samtools = samtools @index = nil @fasta_file = nil @samtools = File.join(File.(File.dirname(__FILE__)),'sam','external','samtools') if samtools == true raise FastaDBException.new(), "No path for the refernce fasta file. " if @fasta_path.nil? @fai_file = @fasta_path + ".fai" unless File.file?(@fai_file) then command = "#{@samtools} faidx '#{@fasta_path}'" @last_command = command system(command) end end |
Instance Attribute Details
#fasta_path ⇒ Object (readonly)
Returns the value of attribute fasta_path
185 186 187 |
# File 'lib/bio/db/fastadb.rb', line 185 def fasta_path @fasta_path end |
Instance Method Details
#faidx(opts = {}) ⇒ Object
Index reference sequence in the FASTA format or extract subsequence from indexed reference sequence. If no region is specified, faidx will index the file and create <ref.fasta>.fai on the disk. If regions are speficified, the subsequences will be retrieved and printed to stdout in the FASTA format. Options - if a subsequence is required
-
chr - [STRING] the reference name of the subsequence
-
start - [INT] the start position for the subsequence
-
stop - [INT] the stop position for the subsequence
225 226 227 228 229 230 231 232 233 234 |
# File 'lib/bio/db/fastadb.rb', line 225 def faidx(opts={}) if opts.has_key?(:chr) and opts.has_key?(:start) and opts.has_key?(:stop) opts={:as_bio => false} self.fetch_reference(:chr,:start,:stop,opts) else command = "#{@samtools} faidx #{@fasta_path}" @last_command = command system(command) end end |
#fetch_sequence(region) ⇒ Object
The region needs to have a method to_region or a method to_s that ha the format “chromosome:start-end” as in samtools
282 283 284 285 286 287 288 289 290 291 292 293 294 |
# File 'lib/bio/db/fastadb.rb', line 282 def fetch_sequence(region) load_fai_entries region = Region.parse_region(region.to_s) unless region.is_a?(Region) entry = index.region_for_entry(region.entry) raise FastaDBException.new "Entry (#{region.entry})not found in reference" unless entry raise FastaDBException.new "Region in invalid range (#{region}): Valid range: #{entry.to_region.to_s} has a size of #{entry.size}." if region.end > entry.size or region.start < 1 seq = @samtools ? fetch_sequence_samtools(region): fetch_sequence_native(region) reference = Bio::Sequence::NA.new(seq) if region.respond_to? :orientation and region.orientation == :reverse reference.reverse_complement!() end reference end |
#fetch_sequence_native(region) ⇒ Object
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 |
# File 'lib/bio/db/fastadb.rb', line 262 def fetch_sequence_native(region) query = region query = Region.parse_region(region) unless region.is_a?(Region) seq = "" #In order to make this reentrant, if we want to make a multithreaded #version of this function, we need to get a lock. Currently, only one thred #can be assosiated with eache fastadb object @fasta_file = File.open(@fasta_path) unless @fasta_file entry = index.region_for_entry(query.entry) start_pointer = entry.get_base_coordinate(query.start) @fasta_file.seek(start_pointer, IO::SEEK_SET) end_pointer = entry.get_base_coordinate(query.end) to_read = end_pointer - start_pointer + 1 seq = @fasta_file.read(to_read) seq.gsub!(/\s+/, '') seq end |
#fetch_sequence_samtools(region) ⇒ Object
251 252 253 254 255 256 257 258 259 260 |
# File 'lib/bio/db/fastadb.rb', line 251 def fetch_sequence_samtools(region) query = region.to_s query = region.to_region.to_s if region.respond_to?(:to_region) command = "#{@samtools} faidx #{@fasta_path} '#{query}'" puts "Running: #{command}" if $DEBUG @last_command = command seq = "" yield_from_pipe(command, String, :text ) {|line| seq = seq + line unless line =~ /^>/} seq end |
#index ⇒ Object
236 237 238 239 240 241 242 243 244 245 246 247 248 249 |
# File 'lib/bio/db/fastadb.rb', line 236 def index return @index if @index if @samtools faidx else samtools = File.join(File.(File.dirname(__FILE__)),'sam','external','samtools') #TODO: make a ruby implementations command = "#{samtools} faidx #{@fasta_path}" @last_command = command system(command) end load_fai_entries return @index end |
#load_fai_entries ⇒ Object
Loads the fai entries
207 208 209 210 211 212 213 214 215 216 |
# File 'lib/bio/db/fastadb.rb', line 207 def load_fai_entries() return @index.length if @index @index = Index.new fai_file = @fai_file File.open(fai_file).each do | line | fields = line.split("\t") @index << Entry.new(fields[0], fields[1], fields[2], fields[3], fields[4]) end @index.length end |