Class: Warc::Record::Header
- Inherits:
-
HeaderHash
show all
- Includes:
- ActiveModel::Validations
- Defined in:
- lib/warc/record/header.rb
Constant Summary
collapse
- NAMED_FIELDS =
Set of field names defined in the spec
[
"WARC-Type",
"WARC-Record-ID",
"WARC-Date",
"Content-Length",
"Content-Type",
"ARC-Concurrent-To",
"WARC-Block-Digest",
"WARC-Payload-Digest",
"WARC-IP-Address",
"WARC-Refers-To",
"WARC-Target-URI",
"WARC-Truncated",
"WARC-Warcinfo-ID",
"WARC-Filename", "WARC-Profile", "WARC-Identified-Payload-Type",
"WARC-Segment-Origin-ID", "WARC-Segment-Number",
"WARC-Segment-Total-Length" ]
- REQUIRED_FIELDS =
["WARC-Record-ID","Content-Length","WARC-Date","WARC-Type"]
Instance Attribute Summary collapse
Instance Method Summary
collapse
Methods inherited from HeaderHash
#[], #[]=, #delete, #each, #include?, #merge, #merge!, #replace, #to_hash
Constructor Details
#initialize(record, h = {}) ⇒ Header
Returns a new instance of Header.
38
39
40
41
|
# File 'lib/warc/record/header.rb', line 38
def initialize(record,h={})
@record=record
super(h)
end
|
Instance Attribute Details
#record ⇒ Object
9
10
11
|
# File 'lib/warc/record/header.rb', line 9
def record
@record
end
|
Instance Method Details
#block_digest ⇒ Object
63
64
65
|
# File 'lib/warc/record/header.rb', line 63
def block_digest
self["warc-block-digest"] ||= compute_digest(self.record.content)
end
|
#compute_digest(content) ⇒ Object
67
68
69
|
# File 'lib/warc/record/header.rb', line 67
def compute_digest(content)
"sha256:" + (Digest::SHA256.hexdigest(content))
end
|
#content_length ⇒ Object
43
44
45
|
# File 'lib/warc/record/header.rb', line 43
def content_length
(self["content-length"] ||= self.record.content.length rescue 0).to_i
end
|
#date ⇒ Object
47
48
49
|
# File 'lib/warc/record/header.rb', line 47
def date
Time.parse(self["warc-date"]).iso8601 ||= Time.now.iso8601
end
|
#date=(d) ⇒ Object
51
52
53
|
# File 'lib/warc/record/header.rb', line 51
def date=(d)
self["warc-date"] = Time.parse(d).iso8601
end
|
#record_id ⇒ Object
59
60
61
|
# File 'lib/warc/record/header.rb', line 59
def record_id
self["warc-record-id"] ||= sprintf("<urn:uuid:%s>",UUID.generate)
end
|
#to_s ⇒ Object
75
76
77
78
79
80
81
82
83
84
85
86
|
# File 'lib/warc/record/header.rb', line 75
def to_s
crfl="\r\n"
str = String.new
str << "WARC-Type: #{self.type}" + crfl
str << "WARC-Record-ID: #{self.record_id}" + crfl
str << "WARC-Date: #{self.date}" + crfl
str << "Content-Length: #{self.content_length}" + crfl
each do |k,v|
str << "#{k}: #{v}#{crfl}" unless REQUIRED_FIELDS.map(&:downcase).include?(k)
end
return str
end
|
#type ⇒ Object
55
56
57
|
# File 'lib/warc/record/header.rb', line 55
def type
self["warc-type"]
end
|
#uri ⇒ Object
71
72
73
|
# File 'lib/warc/record/header.rb', line 71
def uri
self["warc-target-uri"]
end
|