Module: Bgzf

Defined in:
lib/rbbt/util/misc/bgzf.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#block_cache_sizeObject

Returns the value of attribute block_cache_size.



6
7
8
# File 'lib/rbbt/util/misc/bgzf.rb', line 6

def block_cache_size
  @block_cache_size
end

#compressed_streamObject

Returns the value of attribute compressed_stream.



6
7
8
# File 'lib/rbbt/util/misc/bgzf.rb', line 6

def compressed_stream
  @compressed_stream
end

#data_offsetObject

Returns the value of attribute data_offset.



6
7
8
# File 'lib/rbbt/util/misc/bgzf.rb', line 6

def data_offset
  @data_offset
end

Class Method Details

.bgzip_cmdObject



8
9
10
11
12
13
14
15
16
17
18
19
# File 'lib/rbbt/util/misc/bgzf.rb', line 8

def self.bgzip_cmd
  @@bgzip_cmd ||= begin
                    path = `bash -c "type -p bgzip"`.strip
                    if path.empty?
                      Rbbt.claim Rbbt.software.opt.htslib, :install, Rbbt.share.install.software.HTSLIB.find(:lib)
                      Rbbt.software.opt.htslib.produce
                      Rbbt.software.opt.htslib.bin.bgzip.find
                    else
                      path
                    end
                  end
end

.setup(compressed_stream) ⇒ Object



21
22
23
24
25
26
27
28
# File 'lib/rbbt/util/misc/bgzf.rb', line 21

def self.setup(compressed_stream)
  require 'bio-bgzf'
  reader = Bio::BGZF::Reader.new(compressed_stream)
  reader.extend Bgzf
  reader.compressed_stream = compressed_stream
  reader.data_offset = 0
  reader
end

Instance Method Details

#_get_block(vo) ⇒ Object



136
137
138
139
140
141
142
143
144
145
146
# File 'lib/rbbt/util/misc/bgzf.rb', line 136

def _get_block(vo)
  @blocks ||= {}
  @access ||= []
  @access << vo
  if @blocks.include? vo
    @blocks[vo]
  else
    _purge_cache
    @blocks[vo] ||= read_block_at vo
  end
end

#_indexObject



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/rbbt/util/misc/bgzf.rb', line 50

def _index
  @_index ||= begin
                prefix_code = "BGZF index" + (filename || "").sub(/.bgz$/,'')
                index = Persist.persist(prefix_code, :marshal) do
                  index = []
                  pos = 0
                  while true do
                    blockdata_offset = tell
                    block = begin
                              read_block
                            rescue Exception
                              raise "BGZF seems to be buggy so some compressed files will not decompress right. Try uncompressing #{filename}" if $!.message =~ /BGFZ.*expected/
                              raise $!
                            end
                    break unless block
                    index << [pos, blockdata_offset]
                    pos += block.length
                  end
                  index
                end
                @block_cache_size = Math.log(index.length + 1).to_i + 1
                index
             end
end

#_purge_cacheObject



128
129
130
131
132
133
134
# File 'lib/rbbt/util/misc/bgzf.rb', line 128

def _purge_cache
  if @blocks.length > @block_cache_size
    @access.uniq!
    oldest = @access.last
    @blocks.delete oldest
  end
end

#block_offsetObject



120
121
122
123
124
125
126
# File 'lib/rbbt/util/misc/bgzf.rb', line 120

def block_offset
  pos = data_offset
  i = closest_page(data_offset)
  page = _index[i][1]
  offset = pos - _index[i][0]
  [page, offset]
end

#closeObject



40
41
42
43
44
# File 'lib/rbbt/util/misc/bgzf.rb', line 40

def close
  @compressed_stream.close unless @compressed_stream.closed?
  @access.clear if @access
  @blocks.clear if @blocks
end

#closed?Boolean

Returns:

  • (Boolean)


36
37
38
# File 'lib/rbbt/util/misc/bgzf.rb', line 36

def closed?
  @compressed_stream.closed?
end

#closest_page(pos) ⇒ Object



89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/rbbt/util/misc/bgzf.rb', line 89

def closest_page(pos)
  upper = _index.size - 1
  lower = 0
  @_index_pos ||= _index.collect{|v| v.first }

  return -1 if upper < lower

  while(upper >= lower) do
    idx = (idx.nil? and @last_idx) ? @last_idx : (lower + (upper - lower) / 2)
    pos_idx = @_index_pos[idx]

    case pos <=> pos_idx
    when 0
      break
    when -1
      upper = idx - 1
    when 1
      lower = idx + 1
    end
  end

  @last_idx = idx

  if pos_idx > pos
    idx = idx - 1
  end


  idx.to_i
end

#filenameObject



30
31
32
33
34
# File 'lib/rbbt/util/misc/bgzf.rb', line 30

def filename
  @filename ||= begin
                  compressed_stream.respond_to?(:filename) ? compressed_stream.filename : rand(1000000000).to_s
                end
end

#get_blockObject



148
149
150
151
152
# File 'lib/rbbt/util/misc/bgzf.rb', line 148

def get_block
  block_vo, offset = block_offset
  block = _get_block block_vo
  block[offset..-1]
end

#getcObject



171
172
173
# File 'lib/rbbt/util/misc/bgzf.rb', line 171

def getc
  read(1)
end

#getsObject



175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
# File 'lib/rbbt/util/misc/bgzf.rb', line 175

def gets
  str = nil
  current = @data_offset
  while true
    block = read(Misc::BLOCK_SIZE)
    break if block.empty?
    str = "" if str.nil?
    if i = block.index("\n")
      str << block[0..i]
      break
    else
      str << block
    end
  end
  return nil if str.nil?

  @data_offset = current + str.length

  str
end

#initObject



85
86
87
# File 'lib/rbbt/util/misc/bgzf.rb', line 85

def init
  _index
end

#read(size = nil) ⇒ Object



154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
# File 'lib/rbbt/util/misc/bgzf.rb', line 154

def read(size=nil)
  return read_all if size.nil?

  block = get_block 
  return "" if block.nil? or block.empty?
  len = block.length
  if len >= size
    @data_offset += size
    return block[0..size-1]
  else
    @data_offset += len
    str = block
    str << read(size - len)
    str
  end
end

#read_allObject



75
76
77
78
79
80
81
82
83
# File 'lib/rbbt/util/misc/bgzf.rb', line 75

def read_all
  str = ""
  while true
    block = read_block
    break if block.nil?
    str << block
  end
  str
end

#seek(off) ⇒ Object



46
47
48
# File 'lib/rbbt/util/misc/bgzf.rb', line 46

def seek(off)
  @data_offset = off
end