Class: Warc::Stream::Gzip

Inherits:
Warc::Stream show all
Defined in:
lib/warc/stream/gzip.rb

Constant Summary

Constants inherited from Warc::Stream

DEFAULT_OPTS

Instance Attribute Summary

Attributes inherited from Warc::Stream

#parser

Instance Method Summary collapse

Methods inherited from Warc::Stream

#close, #each, #record, #size

Constructor Details

#initialize(fh, options = {}, &block) ⇒ Gzip

Returns a new instance of Gzip.



6
7
8
9
# File 'lib/warc/stream/gzip.rb', line 6

def initialize(fh,options={},&block)
  @ext = ".warc.gz"
  super(fh,options,&block)
end

Instance Method Details

#read_recordObject



11
12
13
14
15
16
17
18
19
20
21
22
23
24
# File 'lib/warc/stream/gzip.rb', line 11

def read_record
  begin
    gz = ::Zlib::GzipReader.new(@file_handle)
    rec = self.parser.parse(gz)
    loop {gz.readline} # Make sure we read the whole gzip
  
  rescue EOFError # End of gzipped record
    @file_handle.pos -= gz.unused.length unless gz.unused.nil? # We move the cursor back if extra bytes were read
    return rec # We return the record
  
  rescue ::Zlib::Error => e # Raised when there's no more gzipped data to read
    return nil
  end
end

#write_record(record) ⇒ Object



26
27
28
29
30
31
32
33
# File 'lib/warc/stream/gzip.rb', line 26

def write_record(record)
  super
  
  # Prepare gzip IO object
  gz = ::Zlib::GzipWriter.new(@file_handle)
  record.dump_to(gz)
  gz.finish # Need to close GzipWriter for it to write the gzip footer
end