Class: Bio::Tabix::TFile

Inherits:
Object
  • Object
show all
Includes:
Binding
Defined in:
lib/bio/tabix/t_file.rb

Overview

The TFile class manages compressing, indexing, opening and parsing tab delimited files. The file must be position sorted prior to indexing.

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(f, opts = {}) ⇒ TFile

Returns a new TFile. If the file is not compressed, a new compressed file will be created with compress. If the index is not present a new index will be created with build_index.



63
64
65
66
67
68
# File 'lib/bio/tabix/t_file.rb', line 63

def initialize(f, opts={})
  @file = f
  @options = opts
  @index = file+".tbi"
  return self
end

Instance Attribute Details

#fileObject

ascii or compressed file name



21
22
23
# File 'lib/bio/tabix/t_file.rb', line 21

def file
  @file
end

#indexObject

index name



23
24
25
# File 'lib/bio/tabix/t_file.rb', line 23

def index
  @index
end

#optionsObject

index build options



29
30
31
# File 'lib/bio/tabix/t_file.rb', line 29

def options
  @options
end

#t_fileObject

TabixT created from open index



25
26
27
# File 'lib/bio/tabix/t_file.rb', line 25

def t_file
  @t_file
end

#t_file_pObject

pointer to TabixT



27
28
29
# File 'lib/bio/tabix/t_file.rb', line 27

def t_file_p
  @t_file_p
end

Class Method Details

.build_index(f, opts = {}) ⇒ Object

Builds an index from the supplied filename and options

  • :s => sequence/group column [1]

  • :b => beginning range column [2]

  • :e => ending range column. Can equal :b. [3]

  • :meta_char => comment character [#]

  • :line_skip => number of initial lines to ignore [0]



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
# File 'lib/bio/tabix/t_file.rb', line 40

def self.build_index(f, opts={})
  conf = ConfT.new
  conf[:preset]=0
  conf[:sc]=opts[:s] || 1
  conf[:bc]=opts[:b] || 2
  conf[:ec]=opts[:e] || 3
  conf[:meta_char]=('#'||opts[:c][0]).ord
  conf[:line_skip]=(0||opts[:S]).to_i
  unless(Bio::Tabix::Binding.bgzf_is_bgzf(f)==1)
    puts "Compressing..."
    self.class.compress(f,f+".bgzf")
    f=f+".bgzf"
  end
  puts "Indexing with #{conf.get_hash}..."
  Bio::Tabix::Binding.ti_index_build2(f,conf,f+".tbi")
end

.compress(fi, fo) ⇒ Object

compresses the fi into fo using bgzip



31
32
33
# File 'lib/bio/tabix/t_file.rb', line 31

def self.compress(fi, fo)
  `#{File.join(File.expand_path(File.dirname(__FILE__)),'bgzip')} -c #{fi} > #{fo}`
end

.open(*args) ⇒ Object

convenience method to create a new Tabix instance and open it.



57
58
59
# File 'lib/bio/tabix/t_file.rb', line 57

def self.open(*args)
  self.new(*args).open
end

Instance Method Details

#closeObject

closes the TabixT file



103
104
105
106
107
108
109
110
111
112
# File 'lib/bio/tabix/t_file.rb', line 103

def close
  if(@t_file_p)
    begin
      ti_close(@t_file_p)
      @t_file_p = nil
    rescue
      puts "Error closing file"
    end
  end
end

#groupsObject

returns an array of the group names found in the index



114
115
116
117
118
119
120
# File 'lib/bio/tabix/t_file.rb', line 114

def groups
  load_index
  g_num = FFI::MemoryPointer.new(:int)
  g_ptr = ti_seqname(t_file[:idx],g_num)
  return [] if g_ptr.null? || g_num.null?
  g_ptr.get_array_of_string(0, g_num.read_int).compact
end

#headerObject

returns the header (skipped lines + comments)



122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/bio/tabix/t_file.rb', line 122

def header
  load_index
  conf = ConfT.new(ti_get_conf(t_file[:idx]))
  iter = IterT.new(ti_query(t_file_p,nil,0,1))
  len = FFI::MemoryPointer.new(:int)
  str = ""
  while( (s = ti_read(t_file_p, iter, len)) )
    break if(s[0].ord != conf[:meta_char]) 
    str << s
    str << "\n"
  end
  ti_iter_destroy(iter)
  @header = str        
end

#openObject

opens the file checking for compression and corresponding index.



70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/bio/tabix/t_file.rb', line 70

def open
  # check existing
  if(@t_file)
    puts "Already open, closing and re-opening"
    self.close
  end
  # check datafile
  if file =~ /http:\/\/|ftp:\/\//
    puts "Expecting remote file: #{file}"
  else
    raise "FileNotFound #{file}" unless(File.exist?(file))
    unless(bgzf_is_bgzf(file)==1)
      unless(bgzf_is_bgzf(file+".bgzf")==1)
        puts "Input does not look like a bgzip compressed file. Attempting compression..."
        self.class.compress(file,file+".bgzf")
      end
      @file = file+".bgzf"
    end
  end
  # check index
  if index =~ /http:\/\/|ftp:\/\//
    puts "Expecting remote index: #{index}"
  elsif !File.exist?(index)
    puts "Index #{index} not found. Building..."
    self.class.build_index(file,options)
  end
  # open
  @t_file_p = ti_open(file,index)
  raise "FileAcessError #{file}" if @t_file_p.null?
  @t_file = TabixT.new(@t_file_p)
  return self
end

#process_region(group, pos1, pos2, user_proc) ⇒ Object

Iterates over the supplied region calling user_proc on each item a region is defined by a group name and range(pos1 - pos2) all overlapping intervals within the group will be processed in order



139
140
141
142
143
144
145
146
147
# File 'lib/bio/tabix/t_file.rb', line 139

def process_region(group, pos1, pos2, user_proc)
  iter = IterT.new(ti_query(t_file_p,group,pos1,pos2))
  return if iter.null?
  len = FFI::MemoryPointer.new(:int)
  while( (s = ti_read(t_file_p, iter, len)) )
    user_proc.call(s,len)
  end
  ti_iter_destroy(iter)
end