Module: Bio::BGZF
- Defined in:
- lib/bio-bgzf/constants.rb,
lib/bio-bgzf/writer.rb,
lib/bio-bgzf/unpack.rb,
lib/bio-bgzf/reader.rb,
lib/bio-bgzf/block.rb,
lib/bio-bgzf/pack.rb,
lib/bio-bgzf/vo.rb
Defined Under Namespace
Classes: FormatError, NotBGZFError, Reader, Writer
Constant Summary collapse
- ID1 =
31
- ID2 =
139
- CM =
8
- FLG =
4
- SI1 =
66
- SI2 =
67
- SLEN =
2
- MTIME =
0
- XFL =
0
- OS =
255
- XLEN =
6
- MAX_BYTES =
65536
Class Method Summary collapse
- .decompress_block(f) ⇒ Object
-
.pack(str, level = Zlib::BEST_COMPRESSION) ⇒ Object
Packs
str
into a BGZF block using given compressionlevel
. - .read_bgzf_block(f) ⇒ Object
-
.unpack(str) ⇒ Object
Unpacks compressed data, NOT a BGZF block.
- .vo_block_offset(vo) ⇒ Object
- .vo_data_offset(vo) ⇒ Object
Class Method Details
.decompress_block(f) ⇒ Object
42 43 44 45 46 47 48 49 50 51 52 53 54 |
# File 'lib/bio-bgzf/block.rb', line 42 def decompress_block(f) cdata, in_size, expected_crc = read_bgzf_block(f) return nil if cdata == nil data = unpack(cdata) if data.bytesize != in_size raise FormatError, "Expected #{in_size} bytes from BGZF block at #{pos}, but got #{data.bytesize} bytes!" end crc = Zlib.crc32(data, 0) if crc != expected_crc raise FormatError, "CRC error: expected #{expected_crc.to_s(16)}, got #{crc.to_s(16)}" end return data end |
.pack(str, level = Zlib::BEST_COMPRESSION) ⇒ Object
Packs str
into a BGZF block using given compression level
.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/bio-bgzf/pack.rb', line 7 def pack(str, level=Zlib::BEST_COMPRESSION) zs = Zlib::Deflate.new level, -15 cdata = zs.deflate str, Zlib::FINISH zs.close crc32 = Zlib.crc32 str, 0 isize = str.length bsize = cdata.length + 19 + XLEN array = [ ID1, ID2, CM, FLG, MTIME, XFL, OS, XLEN, SI1, SI2, SLEN, bsize, cdata, crc32, isize ] array.pack('CCCCVCCvCCvva*VV') end |
.read_bgzf_block(f) ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
# File 'lib/bio-bgzf/block.rb', line 9 def read_bgzf_block(f) hstart = f.read(12) return nil if hstart == nil # EOF? magic, gzip_extra_length = hstart.unpack('Vxxxxxxv') raise NotBGZFError, "wrong BGZF magic: #{sprintf('%08x', magic)}" unless magic == 0x04088B1F len = 0 bsize = nil while len < gzip_extra_length do si1, si2, slen = f.read(4).unpack('CCv') if si1 == 66 and si2 == 67 then raise FormatError, "BC subfield length is #{slen} but must be 2" if slen != 2 raise FormatError, 'duplicate field with block size' unless bsize.nil? bsize = f.read(2).unpack('v')[0] f.seek(slen - 2, IO::SEEK_CUR) else f.seek(slen, IO::SEEK_CUR) end len += 4 + slen end if len != gzip_extra_length then raise FormatError, "total length of subfields is #{len} bytes but must be #{gzip_extra_length}" end raise NotBGZFError, 'block size was not found in any subfield' if bsize.nil? compressed_data = f.read(bsize - gzip_extra_length - 19) crc32, input_size = f.read(8).unpack('VV') return compressed_data, input_size, crc32 end |
.unpack(str) ⇒ Object
Unpacks compressed data, NOT a BGZF block.
5 6 7 8 |
# File 'lib/bio-bgzf/unpack.rb', line 5 def unpack(str) zs = Zlib::Inflate.new(-15) zs.inflate(str) end |
.vo_block_offset(vo) ⇒ Object
2 3 4 |
# File 'lib/bio-bgzf/vo.rb', line 2 def vo_block_offset(vo) vo >> 16 end |
.vo_data_offset(vo) ⇒ Object
7 8 9 |
# File 'lib/bio-bgzf/vo.rb', line 7 def vo_data_offset(vo) vo & 0xFFFF end |