Class: Warc::Parser

Inherits:
Object
  • Object
show all
Defined in:
lib/warc/parser.rb

Instance Method Summary collapse

Instance Method Details

#parse(stream) ⇒ Object



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/warc/parser.rb', line 3

def parse(stream)
  # Find next item
  loop do
    offset = stream.tell
    version_line = stream.readline
    break if version_line.chomp("\r\n") == "WARC/1.0"
  end

  # Prepare to read headers
  rec = Warc::Record.new

  while m = /^(.*): (.*)/.match(stream.readline)
    rec.header[m.captures[0]] = m.captures[1].chomp("\r")
  end

  rec.content = stream.read(rec.header.content_length)

  #stream.seek(rec.header["content-length"].to_i,IO::SEEK_CUR)
  return rec
end