Class: Warc::Stream

Inherits:
Object
  • Object
show all
Includes:
Enumerable
Defined in:
lib/warc/stream.rb

Direct Known Subclasses

Gzip, Plain

Defined Under Namespace

Classes: Gzip, Plain

Constant Summary collapse

DEFAULT_OPTS =
{
  # Maximum file size 
  :max_filesize => 10**9
}

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(fh, options = {}, &block) ⇒ Stream

Returns a new instance of Stream.



28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/warc/stream.rb', line 28

def initialize(fh,options={},&block)
  @options = DEFAULT_OPTS.merge options
  @index = 0
  fh = case fh
  when ::File
    @name = ::File.basename(fh)
    fh
  when String
    @name = fh
    @naming_proc = block || lambda {|name,index| "#{name}.#{sprintf('%06d',index)}"} 
    next_file_handle
  end
  @file_handle=fh
  @parser = ::Warc::Parser.new
end

Instance Attribute Details

#parserObject (readonly)

Returns the value of attribute parser.



21
22
23
# File 'lib/warc/stream.rb', line 21

def parser
  @parser
end

Instance Method Details

#closeObject



67
68
69
# File 'lib/warc/stream.rb', line 67

def close
  @file_handle.close
end

#each(offset = 0, &block) ⇒ Object



44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/warc/stream.rb', line 44

def each(offset=0,&block)
  @file_handle.seek(offset,::IO::SEEK_SET)
  loop do
    position = @file_handle.tell
    rec = self.read_record
    if rec
      rec.offset = position
      if block_given?
        block.call(rec)
      else
        yield rec
      end
    else
      break
    end
  end
end

#read_recordObject

Raises:

  • (StandardError)


71
72
73
# File 'lib/warc/stream.rb', line 71

def read_record
  raise StandardError
end

#record(offset = 0) ⇒ Object



62
63
64
65
# File 'lib/warc/stream.rb', line 62

def record(offset=0)
  @file_handle.seek(offset,::IO::SEEK_SET)
  self.read_record
end

#sizeObject



83
84
85
# File 'lib/warc/stream.rb', line 83

def size
  @file_handle.stat.size
end

#write_record(record) ⇒ Object



75
76
77
78
79
80
81
# File 'lib/warc/stream.rb', line 75

def write_record(record)
  # Go to end of file
  @file_handle.seek(0,::IO::SEEK_END)
  expected_size = record.header.content_length + @file_handle.tell
  next_file_handle if (expected_size > @options[:max_filesize])
  record.offset = @file_handle.tell
end