Module: Bgzf
- Defined in:
- lib/rbbt/util/misc/bgzf.rb
Instance Attribute Summary collapse
-
#block_cache_size ⇒ Object
Returns the value of attribute block_cache_size.
-
#compressed_stream ⇒ Object
Returns the value of attribute compressed_stream.
-
#data_offset ⇒ Object
Returns the value of attribute data_offset.
Class Method Summary collapse
Instance Method Summary collapse
- #_get_block(vo) ⇒ Object
- #_index ⇒ Object
- #_purge_cache ⇒ Object
- #block_offset ⇒ Object
- #close ⇒ Object
- #closed? ⇒ Boolean
- #closest_page(pos) ⇒ Object
- #filename ⇒ Object
- #get_block ⇒ Object
- #getc ⇒ Object
- #gets ⇒ Object
- #init ⇒ Object
- #read(size = nil) ⇒ Object
- #read_all ⇒ Object
- #seek(off) ⇒ Object
Instance Attribute Details
#block_cache_size ⇒ Object
Returns the value of attribute block_cache_size.
6 7 8 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 6 def block_cache_size @block_cache_size end |
#compressed_stream ⇒ Object
Returns the value of attribute compressed_stream.
6 7 8 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 6 def compressed_stream @compressed_stream end |
#data_offset ⇒ Object
Returns the value of attribute data_offset.
6 7 8 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 6 def data_offset @data_offset end |
Class Method Details
.bgzip_cmd ⇒ Object
8 9 10 11 12 13 14 15 16 17 18 19 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 8 def self.bgzip_cmd @@bgzip_cmd ||= begin path = `bash -c "type -p bgzip"`.strip if path.empty? Rbbt.claim Rbbt.software.opt.htslib, :install, Rbbt.share.install.software.HTSLIB.find(:lib) Rbbt.software.opt.htslib.produce Rbbt.software.opt.htslib.bin.bgzip.find else path end end end |
.setup(compressed_stream) ⇒ Object
21 22 23 24 25 26 27 28 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 21 def self.setup(compressed_stream) require 'bio-bgzf' reader = Bio::BGZF::Reader.new(compressed_stream) reader.extend Bgzf reader.compressed_stream = compressed_stream reader.data_offset = 0 reader end |
Instance Method Details
#_get_block(vo) ⇒ Object
136 137 138 139 140 141 142 143 144 145 146 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 136 def _get_block(vo) @blocks ||= {} @access ||= [] @access << vo if @blocks.include? vo @blocks[vo] else _purge_cache @blocks[vo] ||= read_block_at vo end end |
#_index ⇒ Object
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 50 def _index @_index ||= begin prefix_code = "BGZF index" + (filename || "").sub(/.bgz$/,'') index = Persist.persist(prefix_code, :marshal) do index = [] pos = 0 while true do blockdata_offset = tell block = begin read_block rescue Exception raise "BGZF seems to be buggy so some compressed files will not decompress right. Try uncompressing #{filename}" if $!. =~ /BGFZ.*expected/ raise $! end break unless block index << [pos, blockdata_offset] pos += block.length end index end @block_cache_size = Math.log(index.length + 1).to_i + 1 index end end |
#_purge_cache ⇒ Object
128 129 130 131 132 133 134 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 128 def _purge_cache if @blocks.length > @block_cache_size @access.uniq! oldest = @access.last @blocks.delete oldest end end |
#block_offset ⇒ Object
120 121 122 123 124 125 126 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 120 def block_offset pos = data_offset i = closest_page(data_offset) page = _index[i][1] offset = pos - _index[i][0] [page, offset] end |
#close ⇒ Object
40 41 42 43 44 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 40 def close @compressed_stream.close unless @compressed_stream.closed? @access.clear if @access @blocks.clear if @blocks end |
#closed? ⇒ Boolean
36 37 38 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 36 def closed? @compressed_stream.closed? end |
#closest_page(pos) ⇒ Object
89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 89 def closest_page(pos) upper = _index.size - 1 lower = 0 @_index_pos ||= _index.collect{|v| v.first } return -1 if upper < lower while(upper >= lower) do idx = (idx.nil? and @last_idx) ? @last_idx : (lower + (upper - lower) / 2) pos_idx = @_index_pos[idx] case pos <=> pos_idx when 0 break when -1 upper = idx - 1 when 1 lower = idx + 1 end end @last_idx = idx if pos_idx > pos idx = idx - 1 end idx.to_i end |
#filename ⇒ Object
30 31 32 33 34 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 30 def filename @filename ||= begin compressed_stream.respond_to?(:filename) ? compressed_stream.filename : rand(1000000000).to_s end end |
#get_block ⇒ Object
148 149 150 151 152 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 148 def get_block block_vo, offset = block_offset block = _get_block block_vo block[offset..-1] end |
#getc ⇒ Object
171 172 173 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 171 def getc read(1) end |
#gets ⇒ Object
175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 175 def gets str = nil current = @data_offset while true block = read(Misc::BLOCK_SIZE) break if block.empty? str = "" if str.nil? if i = block.index("\n") str << block[0..i] break else str << block end end return nil if str.nil? @data_offset = current + str.length str end |
#init ⇒ Object
85 86 87 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 85 def init _index end |
#read(size = nil) ⇒ Object
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 154 def read(size=nil) return read_all if size.nil? block = get_block return "" if block.nil? or block.empty? len = block.length if len >= size @data_offset += size return block[0..size-1] else @data_offset += len str = block str << read(size - len) str end end |
#read_all ⇒ Object
75 76 77 78 79 80 81 82 83 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 75 def read_all str = "" while true block = read_block break if block.nil? str << block end str end |
#seek(off) ⇒ Object
46 47 48 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 46 def seek(off) @data_offset = off end |