Module: Bgzf
- Defined in:
- lib/rbbt/util/misc/bgzf.rb
Instance Attribute Summary collapse
-
#block_cache_size ⇒ Object
Returns the value of attribute block_cache_size.
-
#compressed_stream ⇒ Object
Returns the value of attribute compressed_stream.
-
#data_offset ⇒ Object
Returns the value of attribute data_offset.
Class Method Summary collapse
Instance Method Summary collapse
- #_get_block(vo) ⇒ Object
- #_index ⇒ Object
- #_purge_cache ⇒ Object
- #block_offset ⇒ Object
- #close ⇒ Object
- #closed? ⇒ Boolean
- #closest_page(pos) ⇒ Object
- #filename ⇒ Object
- #get_block ⇒ Object
- #getc ⇒ Object
- #gets ⇒ Object
- #init ⇒ Object
- #read(size = nil) ⇒ Object
- #read_all ⇒ Object
- #seek(off) ⇒ Object
Instance Attribute Details
#block_cache_size ⇒ Object
Returns the value of attribute block_cache_size.
6 7 8 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 6 def block_cache_size @block_cache_size end |
#compressed_stream ⇒ Object
Returns the value of attribute compressed_stream.
6 7 8 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 6 def compressed_stream @compressed_stream end |
#data_offset ⇒ Object
Returns the value of attribute data_offset.
6 7 8 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 6 def data_offset @data_offset end |
Class Method Details
.bgzip_cmd ⇒ Object
8 9 10 11 12 13 14 15 16 17 18 19 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 8 def self.bgzip_cmd @@bgzip_cmd ||= begin path = `bash -c "type -p bgzip"`.strip if path.empty? Rbbt.claim Rbbt.software.opt.htslib, :install, Rbbt.share.install.software.HTSLIB.find(:lib) Rbbt.software.opt.htslib.produce Rbbt.software.opt.htslib.bin.bgzip.find else path end end end |
.setup(compressed_stream) ⇒ Object
21 22 23 24 25 26 27 28 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 21 def self.setup(compressed_stream) require 'bio-bgzf' reader = Bio::BGZF::Reader.new(compressed_stream) reader.extend Bgzf reader.compressed_stream = compressed_stream reader.data_offset = 0 reader end |
Instance Method Details
#_get_block(vo) ⇒ Object
135 136 137 138 139 140 141 142 143 144 145 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 135 def _get_block(vo) @blocks ||= {} @access ||= [] @access << vo if @blocks.include? vo @blocks[vo] else _purge_cache @blocks[vo] ||= read_block_at vo end end |
#_index ⇒ Object
50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 50 def _index @_index ||= begin index = Persist.persist("BGZF index" + (filename || "").sub(/.bgz$/,''), :marshal, :dir => Rbbt.var.bgzf_index) do index = [] pos = 0 while true do blockdata_offset = tell block = begin read_block rescue Exception raise "BGZF seems to be buggy so some compressed files will not decompress right. Try uncompressing #{filename}" if $!. =~ /BGFZ.*expected/ raise $! end break unless block index << [pos, blockdata_offset] pos += block.length end index end @block_cache_size = Math.log(index.length).to_i + 1 index end end |
#_purge_cache ⇒ Object
127 128 129 130 131 132 133 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 127 def _purge_cache if @blocks.length > @block_cache_size @access.uniq! oldest = @access.last @blocks.delete oldest end end |
#block_offset ⇒ Object
119 120 121 122 123 124 125 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 119 def block_offset pos = data_offset i = closest_page(data_offset) page = _index[i][1] offset = pos - _index[i][0] [page, offset] end |
#close ⇒ Object
40 41 42 43 44 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 40 def close @compressed_stream.close unless @compressed_stream.closed? @access.clear if @access @blocks.clear if @blocks end |
#closed? ⇒ Boolean
36 37 38 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 36 def closed? @compressed_stream.closed? end |
#closest_page(pos) ⇒ Object
88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 88 def closest_page(pos) upper = _index.size - 1 lower = 0 @_index_pos ||= _index.collect{|v| v.first } return -1 if upper < lower while(upper >= lower) do idx = (idx.nil? and @last_idx) ? @last_idx : (lower + (upper - lower) / 2) pos_idx = @_index_pos[idx] case pos <=> pos_idx when 0 break when -1 upper = idx - 1 when 1 lower = idx + 1 end end @last_idx = idx if pos_idx > pos idx = idx - 1 end idx.to_i end |
#filename ⇒ Object
30 31 32 33 34 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 30 def filename @filename ||= begin compressed_stream.respond_to?(:filename) ? compressed_stream.filename : nil end end |
#get_block ⇒ Object
147 148 149 150 151 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 147 def get_block block_vo, offset = block_offset block = _get_block block_vo block[offset..-1] end |
#getc ⇒ Object
170 171 172 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 170 def getc read(1) end |
#gets ⇒ Object
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 174 def gets str = nil current = @data_offset while true block = read(Misc::BLOCK_SIZE) break if block.empty? str = "" if str.nil? if i = block.index("\n") str << block[0..i] break else str << block end end return nil if str.nil? @data_offset = current + str.length str end |
#init ⇒ Object
84 85 86 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 84 def init _index end |
#read(size = nil) ⇒ Object
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 153 def read(size=nil) return read_all if size.nil? block = get_block return "" if block.nil? or block.empty? len = block.length if len >= size @data_offset += size return block[0..size-1] else @data_offset += len str = block str << read(size - len) str end end |
#read_all ⇒ Object
74 75 76 77 78 79 80 81 82 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 74 def read_all str = "" while true block = read_block break if block.nil? str << block end str end |
#seek(off) ⇒ Object
46 47 48 |
# File 'lib/rbbt/util/misc/bgzf.rb', line 46 def seek(off) @data_offset = off end |