Module: Bgzf
- Defined in:
- lib/rbbt/util/misc/bgzf.rb
Instance Attribute Summary collapse
-
#block_cache_size ⇒ Object
Returns the value of attribute block_cache_size.
-
#compressed_stream ⇒ Object
Returns the value of attribute compressed_stream.
-
#data_offset ⇒ Object
Returns the value of attribute data_offset.
Class Method Summary collapse
Instance Method Summary collapse
- #_get_block(vo) ⇒ Object
- #_index ⇒ Object
- #_purge_cache ⇒ Object
- #block_offset ⇒ Object
- #close ⇒ Object
- #closed? ⇒ Boolean
- #closest_page(pos) ⇒ Object
- #filename ⇒ Object
- #get_block ⇒ Object
- #getc ⇒ Object
- #gets ⇒ Object
- #init ⇒ Object
- #read(size = nil) ⇒ Object
- #read_all ⇒ Object
- #seek(off) ⇒ Object
Instance Attribute Details
#block_cache_size ⇒ Object
Returns the value of attribute block_cache_size.
5 6 7 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 5 def block_cache_size @block_cache_size end |
#compressed_stream ⇒ Object
Returns the value of attribute compressed_stream.
5 6 7 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 5 def compressed_stream @compressed_stream end |
#data_offset ⇒ Object
Returns the value of attribute data_offset.
5 6 7 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 5 def data_offset @data_offset end |
Class Method Details
.setup(compressed_stream) ⇒ Object
7 8 9 10 11 12 13 14 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 7 def self.setup(compressed_stream) require 'bio-bgzf' reader = Bio::BGZF::Reader.new(compressed_stream) reader.extend Bgzf reader.compressed_stream = compressed_stream reader.data_offset = 0 reader end |
Instance Method Details
#_get_block(vo) ⇒ Object
121 122 123 124 125 126 127 128 129 130 131 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 121 def _get_block(vo) @blocks ||= {} @access ||= [] @access << vo if @blocks.include? vo @blocks[vo] else _purge_cache @blocks[vo] ||= read_block_at vo end end |
#_index ⇒ Object
36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 36 def _index @_index ||= begin index = Persist.persist("BGZF index" + (filename || "").sub(/.bgz$/,''), :marshal, :dir => Rbbt.var.bgzf_index) do index = [] pos = 0 while true do blockdata_offset = tell block = begin read_block rescue Exception raise "BGZF seems to be buggy so some compressed files will not decompress right. Try uncompressing #{filename}" if $!. =~ /BGFZ.*expected/ raise $! end break unless block index << [pos, blockdata_offset] pos += block.length end index end @block_cache_size = Math.log(index.length).to_i + 1 index end end |
#_purge_cache ⇒ Object
113 114 115 116 117 118 119 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 113 def _purge_cache if @blocks.length > @block_cache_size @access.uniq! oldest = @access.last @blocks.delete oldest end end |
#block_offset ⇒ Object
105 106 107 108 109 110 111 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 105 def block_offset pos = data_offset i = closest_page(data_offset) page = _index[i][1] offset = pos - _index[i][0] [page, offset] end |
#close ⇒ Object
26 27 28 29 30 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 26 def close @compressed_stream.close unless @compressed_stream.closed? @access.clear if @access @blocks.clear if @blocks end |
#closed? ⇒ Boolean
22 23 24 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 22 def closed? @compressed_stream.closed? end |
#closest_page(pos) ⇒ Object
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 74 def closest_page(pos) upper = _index.size - 1 lower = 0 @_index_pos ||= _index.collect{|v| v.first } return -1 if upper < lower while(upper >= lower) do idx = (idx.nil? and @last_idx) ? @last_idx : (lower + (upper - lower) / 2) pos_idx = @_index_pos[idx] case pos <=> pos_idx when 0 break when -1 upper = idx - 1 when 1 lower = idx + 1 end end @last_idx = idx if pos_idx > pos idx = idx - 1 end idx.to_i end |
#filename ⇒ Object
16 17 18 19 20 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 16 def filename @filename ||= begin compressed_stream.respond_to?(:filename) ? compressed_stream.filename : nil end end |
#get_block ⇒ Object
133 134 135 136 137 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 133 def get_block block_vo, offset = block_offset block = _get_block block_vo block[offset..-1] end |
#getc ⇒ Object
156 157 158 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 156 def getc read(1) end |
#gets ⇒ Object
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 160 def gets str = nil current = @data_offset while true block = read(1024) break if block.empty? str = "" if str.nil? if i = block.index("\n") str << block[0..i] break else str << block end end return nil if str.nil? @data_offset = current + str.length str end |
#init ⇒ Object
70 71 72 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 70 def init _index end |
#read(size = nil) ⇒ Object
139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 139 def read(size=nil) return read_all if size.nil? block = get_block return "" if block.nil? or block.empty? len = block.length if len >= size @data_offset += size return block[0..size-1] else @data_offset += len str = block str << read(size - len) str end end |
#read_all ⇒ Object
60 61 62 63 64 65 66 67 68 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 60 def read_all str = "" while true block = read_block break if block.nil? str << block end str end |
#seek(off) ⇒ Object
32 33 34 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 32 def seek(off) @data_offset = off end |