Class: Bio::Tabix::TFile
- Inherits:
-
Object
- Object
- Bio::Tabix::TFile
- Includes:
- Binding
- Defined in:
- lib/bio/tabix/t_file.rb
Overview
The TFile class manages compressing, indexing, opening and parsing tab delimited files. The file must be position sorted prior to indexing.
Instance Attribute Summary collapse
-
#file ⇒ Object
ascii or compressed file name.
-
#index ⇒ Object
index name.
-
#options ⇒ Object
index build options.
-
#t_file ⇒ Object
TabixT created from open index.
-
#t_file_p ⇒ Object
pointer to TabixT.
Class Method Summary collapse
-
.build_index(f, opts = {}) ⇒ Object
Builds an index from the supplied filename and options - :s => sequence/group column [1] - :b => beginning range column [2] - :e => ending range column.
-
.compress(fi, fo) ⇒ Object
compresses the fi into fo using bgzip.
-
.open(*args) ⇒ Object
convenience method to create a new Tabix instance and open it.
Instance Method Summary collapse
-
#close ⇒ Object
closes the TabixT file.
-
#groups ⇒ Object
returns an array of the group names found in the index.
-
#header ⇒ Object
returns the header (skipped lines + comments).
-
#initialize(f, opts = {}) ⇒ TFile
constructor
Returns a new TFile.
-
#open ⇒ Object
opens the file checking for compression and corresponding index.
-
#process_region(group, pos1, pos2, user_proc) ⇒ Object
Iterates over the supplied region calling user_proc on each item a region is defined by a group name and range(pos1 - pos2) all overlapping intervals within the group will be processed in order.
Constructor Details
#initialize(f, opts = {}) ⇒ TFile
Returns a new TFile. If the file is not compressed, a new compressed file will be created with compress. If the index is not present a new index will be created with build_index.
63 64 65 66 67 68 |
# File 'lib/bio/tabix/t_file.rb', line 63 def initialize(f, opts={}) @file = f @options = opts @index = file+".tbi" return self end |
Instance Attribute Details
#file ⇒ Object
ascii or compressed file name
21 22 23 |
# File 'lib/bio/tabix/t_file.rb', line 21 def file @file end |
#index ⇒ Object
index name
23 24 25 |
# File 'lib/bio/tabix/t_file.rb', line 23 def index @index end |
#options ⇒ Object
index build options
29 30 31 |
# File 'lib/bio/tabix/t_file.rb', line 29 def @options end |
#t_file ⇒ Object
TabixT created from open index
25 26 27 |
# File 'lib/bio/tabix/t_file.rb', line 25 def t_file @t_file end |
#t_file_p ⇒ Object
pointer to TabixT
27 28 29 |
# File 'lib/bio/tabix/t_file.rb', line 27 def t_file_p @t_file_p end |
Class Method Details
.build_index(f, opts = {}) ⇒ Object
Builds an index from the supplied filename and options
-
:s => sequence/group column [1]
-
:b => beginning range column [2]
-
:e => ending range column. Can equal :b. [3]
-
:meta_char => comment character [#]
-
:line_skip => number of initial lines to ignore [0]
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 |
# File 'lib/bio/tabix/t_file.rb', line 40 def self.build_index(f, opts={}) conf = ConfT.new conf[:preset]=0 conf[:sc]=opts[:s] || 1 conf[:bc]=opts[:b] || 2 conf[:ec]=opts[:e] || 3 conf[:meta_char]=('#'||opts[:c][0]).ord conf[:line_skip]=(0||opts[:S]).to_i unless(Bio::Tabix::Binding.bgzf_is_bgzf(f)==1) puts "Compressing..." self.class.compress(f,f+".bgzf") f=f+".bgzf" end puts "Indexing with #{conf.get_hash}..." Bio::Tabix::Binding.ti_index_build2(f,conf,f+".tbi") end |
.compress(fi, fo) ⇒ Object
compresses the fi into fo using bgzip
31 32 33 |
# File 'lib/bio/tabix/t_file.rb', line 31 def self.compress(fi, fo) `#{File.join(File.(File.dirname(__FILE__)),'bgzip')} -c #{fi} > #{fo}` end |
.open(*args) ⇒ Object
convenience method to create a new Tabix instance and open it.
57 58 59 |
# File 'lib/bio/tabix/t_file.rb', line 57 def self.open(*args) self.new(*args).open end |
Instance Method Details
#close ⇒ Object
closes the TabixT file
103 104 105 106 107 108 109 110 111 112 |
# File 'lib/bio/tabix/t_file.rb', line 103 def close if(@t_file_p) begin ti_close(@t_file_p) @t_file_p = nil rescue puts "Error closing file" end end end |
#groups ⇒ Object
returns an array of the group names found in the index
114 115 116 117 118 119 120 |
# File 'lib/bio/tabix/t_file.rb', line 114 def groups load_index g_num = FFI::MemoryPointer.new(:int) g_ptr = ti_seqname(t_file[:idx],g_num) return [] if g_ptr.null? || g_num.null? g_ptr.get_array_of_string(0, g_num.read_int).compact end |
#header ⇒ Object
returns the header (skipped lines + comments)
122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
# File 'lib/bio/tabix/t_file.rb', line 122 def header load_index conf = ConfT.new(ti_get_conf(t_file[:idx])) iter = IterT.new(ti_query(t_file_p,nil,0,1)) len = FFI::MemoryPointer.new(:int) str = "" while( (s = ti_read(t_file_p, iter, len)) ) break if(s[0].ord != conf[:meta_char]) str << s str << "\n" end ti_iter_destroy(iter) @header = str end |
#open ⇒ Object
opens the file checking for compression and corresponding index.
70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
# File 'lib/bio/tabix/t_file.rb', line 70 def open # check existing if(@t_file) puts "Already open, closing and re-opening" self.close end # check datafile if file =~ /http:\/\/|ftp:\/\// puts "Expecting remote file: #{file}" else raise "FileNotFound #{file}" unless(File.exist?(file)) unless(bgzf_is_bgzf(file)==1) unless(bgzf_is_bgzf(file+".bgzf")==1) puts "Input does not look like a bgzip compressed file. Attempting compression..." self.class.compress(file,file+".bgzf") end @file = file+".bgzf" end end # check index if index =~ /http:\/\/|ftp:\/\// puts "Expecting remote index: #{index}" elsif !File.exist?(index) puts "Index #{index} not found. Building..." self.class.build_index(file,) end # open @t_file_p = ti_open(file,index) raise "FileAcessError #{file}" if @t_file_p.null? @t_file = TabixT.new(@t_file_p) return self end |
#process_region(group, pos1, pos2, user_proc) ⇒ Object
Iterates over the supplied region calling user_proc on each item a region is defined by a group name and range(pos1 - pos2) all overlapping intervals within the group will be processed in order
139 140 141 142 143 144 145 146 147 |
# File 'lib/bio/tabix/t_file.rb', line 139 def process_region(group, pos1, pos2, user_proc) iter = IterT.new(ti_query(t_file_p,group,pos1,pos2)) return if iter.null? len = FFI::MemoryPointer.new(:int) while( (s = ti_read(t_file_p, iter, len)) ) user_proc.call(s,len) end ti_iter_destroy(iter) end |