Class: Warc::Record

Inherits:
Object
  • Object
show all
Defined in:
lib/warc/record.rb

Defined Under Namespace

Classes: Header, Validator

Constant Summary collapse

VERSION =
"WARC/1.0"

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(h = {}, content = nil) ⇒ Record

Returns a new instance of Record.



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/warc/record.rb', line 7

def initialize(h={},content=nil)
  @content=content
  case h
  when Hash
    @header = Header.new(self,h)
  when WEBrick::HTTPResponse
    @header = Header.new(self)
    @header["WARC-Type"] = "response"
    @header["WARC-Target-URI"] = h.request_uri.to_s
    @header["Content-Type"] = "application/http;msgtype=response"
    #@header["WARC-IP-Address"]
    body,crfl = String.new,"\r\n"
    body  << h.status_line
    h.header.each do |k,v|
      body << "#{k}: #{v}" + crfl
    end
    body  << crfl + h.body
    self.content = body
    self.header.block_digest
    @header["WARC-Payload-Digest"] = self.header.compute_digest(h.body)
  end
end

Instance Attribute Details

#contentObject

Returns the value of attribute content.



5
6
7
# File 'lib/warc/record.rb', line 5

def content
  @content
end

#headerObject (readonly)

Returns the value of attribute header.



6
7
8
# File 'lib/warc/record.rb', line 6

def header
  @header
end

#offsetObject

Returns the value of attribute offset.



5
6
7
# File 'lib/warc/record.rb', line 5

def offset
  @offset
end

Instance Method Details

#dump_to(out) ⇒ Object



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/warc/record.rb', line 40

def dump_to(out)
  #
  #      warc-file    = 1*warc-record
  #      warc-record  = header CRLF
  #                     block CRLF CRLF
  #      header       = version CRLF
  #                     warc-fields
  #      version      = "WARC/0.16" CRLF
  #      warc-fields  = *named-field CRLF
  #      block        = *OCTET
  #
  crfl = "\r\n"

  out.write(VERSION + crfl)
  out.write(self.header.to_s)
  out.write(crfl)
  out.write(self.content + crfl*2)
end

#to_httpObject



30
31
32
33
34
35
36
37
38
# File 'lib/warc/record.rb', line 30

def to_http
  if @header["Content-Type"] == "application/http;msgtype=response"
    url = @header["WARC-Target-URI"]
    socket = Net::BufferedIO.new(content)
    r=Net::HTTPResponse.read_new(socket)
    r.reading_body(socket,true) {}
    return r
  end
end