Class: Warc::Record
- Inherits:
-
Object
- Object
- Warc::Record
- Defined in:
- lib/warc/record.rb
Defined Under Namespace
Constant Summary collapse
- VERSION =
"WARC/1.0"
Instance Attribute Summary collapse
-
#content ⇒ Object
Returns the value of attribute content.
-
#header ⇒ Object
readonly
Returns the value of attribute header.
-
#offset ⇒ Object
Returns the value of attribute offset.
Instance Method Summary collapse
- #dump_to(out) ⇒ Object
-
#initialize(h = {}, content = nil) ⇒ Record
constructor
A new instance of Record.
- #to_http ⇒ Object
Constructor Details
#initialize(h = {}, content = nil) ⇒ Record
Returns a new instance of Record.
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 |
# File 'lib/warc/record.rb', line 7 def initialize(h={},content=nil) @content=content case h when Hash @header = Header.new(self,h) when WEBrick::HTTPResponse @header = Header.new(self) @header["WARC-Type"] = "response" @header["WARC-Target-URI"] = h.request_uri.to_s @header["Content-Type"] = "application/http;msgtype=response" #@header["WARC-IP-Address"] body,crfl = String.new,"\r\n" body << h.status_line h.header.each do |k,v| body << "#{k}: #{v}" + crfl end body << crfl + h.body self.content = body self.header.block_digest @header["WARC-Payload-Digest"] = self.header.compute_digest(h.body) end end |
Instance Attribute Details
#content ⇒ Object
Returns the value of attribute content.
5 6 7 |
# File 'lib/warc/record.rb', line 5 def content @content end |
#header ⇒ Object (readonly)
Returns the value of attribute header.
6 7 8 |
# File 'lib/warc/record.rb', line 6 def header @header end |
#offset ⇒ Object
Returns the value of attribute offset.
5 6 7 |
# File 'lib/warc/record.rb', line 5 def offset @offset end |
Instance Method Details
#dump_to(out) ⇒ Object
40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
# File 'lib/warc/record.rb', line 40 def dump_to(out) # # warc-file = 1*warc-record # warc-record = header CRLF # block CRLF CRLF # header = version CRLF # warc-fields # version = "WARC/0.16" CRLF # warc-fields = *named-field CRLF # block = *OCTET # crfl = "\r\n" out.write(VERSION + crfl) out.write(self.header.to_s) out.write(crfl) out.write(self.content + crfl*2) end |
#to_http ⇒ Object
30 31 32 33 34 35 36 37 38 |
# File 'lib/warc/record.rb', line 30 def to_http if @header["Content-Type"] == "application/http;msgtype=response" url = @header["WARC-Target-URI"] socket = Net::BufferedIO.new(content) r=Net::HTTPResponse.read_new(socket) r.reading_body(socket,true) {} return r end end |