Module: Spidr::Headers
- Included in:
- Page
- Defined in:
- lib/spidr/headers.rb
Constant Summary collapse
- RESERVED_COOKIE_NAMES =
Reserved names used within Cookie strings
Set['path', 'expires', 'domain']
Instance Method Summary collapse
-
#atom? ⇒ Boolean
Determines if the page is an Atom feed.
-
#bad_request? ⇒ Boolean
Determines if the response code is
400
. -
#code ⇒ Integer
The response code from the page.
-
#content_charset ⇒ String?
The charset included in the Content-Type.
-
#content_type ⇒ String
The Content-Type of the page.
-
#content_types ⇒ Array<String>
The content types of the page.
-
#cookie ⇒ String
(also: #raw_cookie)
The raw Cookie String sent along with the page.
-
#cookie_params ⇒ Hash{String => String}
The Cookie key -> value pairs returned with the response.
-
#cookies ⇒ Array<String>
The Cookie values sent along with the page.
-
#css? ⇒ Boolean
Determines if the page is a CSS stylesheet.
-
#directory? ⇒ Boolean
Determines if the page is a Directory Listing.
-
#had_internal_server_error? ⇒ Boolean
Determines if the response code is
500
. -
#html? ⇒ Boolean
Determines if the page is HTML document.
-
#is_content_type?(type) ⇒ Boolean
Determines if any of the content-types of the page include a given type.
-
#is_forbidden? ⇒ Boolean
(also: #forbidden?)
Determines if the response code is
403
. -
#is_missing? ⇒ Boolean
(also: #missing?)
Determines if the response code is
404
. -
#is_ok? ⇒ Boolean
(also: #ok?)
Determines if the response code is
200
. -
#is_unauthorized? ⇒ Boolean
(also: #unauthorized?)
Determines if the response code is
401
. -
#javascript? ⇒ Boolean
Determines if the page is JavaScript.
-
#json? ⇒ Boolean
Determines if the page is JSON.
-
#ms_word? ⇒ Boolean
Determines if the page is a MS Word document.
-
#pdf? ⇒ Boolean
Determines if the page is a PDF document.
-
#plain_text? ⇒ Boolean
(also: #txt?)
Determines if the page is plain-text.
-
#rss? ⇒ Boolean
Determines if the page is a RSS feed.
-
#timedout? ⇒ Boolean
Determines if the response code is
308
. -
#xml? ⇒ Boolean
Determines if the page is XML document.
-
#xsl? ⇒ Boolean
Determines if the page is XML Stylesheet (XSL).
-
#zip? ⇒ Boolean
Determines if the page is a ZIP archive.
Instance Method Details
#atom? ⇒ Boolean
Determines if the page is an Atom feed.
284 285 286 |
# File 'lib/spidr/headers.rb', line 284 def atom? is_content_type?('application/atom+xml') end |
#bad_request? ⇒ Boolean
Determines if the response code is 400
.
46 47 48 |
# File 'lib/spidr/headers.rb', line 46 def bad_request? code == 400 end |
#code ⇒ Integer
The response code from the page.
14 15 16 |
# File 'lib/spidr/headers.rb', line 14 def code response.code.to_i end |
#content_charset ⇒ String?
The charset included in the Content-Type.
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
# File 'lib/spidr/headers.rb', line 126 def content_charset content_types.each do |value| if value.include?(';') value.split(';').each do |param| param.strip! if param.start_with?('charset=') return param.split('=',2).last end end end end return nil end |
#content_type ⇒ String
The Content-Type of the page.
102 103 104 |
# File 'lib/spidr/headers.rb', line 102 def content_type (response['Content-Type'] || '') end |
#content_types ⇒ Array<String>
The content types of the page.
114 115 116 |
# File 'lib/spidr/headers.rb', line 114 def content_types (headers['content-type'] || []) end |
#cookie ⇒ String Also known as:
The raw Cookie String sent along with the page.
326 327 328 |
# File 'lib/spidr/headers.rb', line 326 def (response['Set-Cookie'] || '') end |
#cookie_params ⇒ Hash{String => String}
The Cookie key -> value pairs returned with the response.
352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 |
# File 'lib/spidr/headers.rb', line 352 def params = {} .each do |value| value.split(';').each do |param| param.strip! name, value = param.split('=',2) unless RESERVED_COOKIE_NAMES.include?(name) params[name] = (value || '') end end end return params end |
#cookies ⇒ Array<String>
The Cookie values sent along with the page.
340 341 342 |
# File 'lib/spidr/headers.rb', line 340 def (headers['set-cookie'] || []) end |
#css? ⇒ Boolean
Determines if the page is a CSS stylesheet.
263 264 265 |
# File 'lib/spidr/headers.rb', line 263 def css? is_content_type?('text/css') end |
#directory? ⇒ Boolean
Determines if the page is a Directory Listing.
199 200 201 |
# File 'lib/spidr/headers.rb', line 199 def directory? is_content_type?('text/directory') end |
#had_internal_server_error? ⇒ Boolean
Determines if the response code is 500
.
92 93 94 |
# File 'lib/spidr/headers.rb', line 92 def had_internal_server_error? code == 500 end |
#html? ⇒ Boolean
Determines if the page is HTML document.
209 210 211 |
# File 'lib/spidr/headers.rb', line 209 def html? is_content_type?('text/html') end |
#is_content_type?(type) ⇒ Boolean
Determines if any of the content-types of the page include a given type.
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
# File 'lib/spidr/headers.rb', line 160 def is_content_type?(type) if type.include?('/') # otherwise only match the first param content_types.any? do |value| value = value.split(';',2).first value == type end else # otherwise only match the sub-type content_types.any? do |value| value = value.split(';',2).first value = value.split('/',2).last value == type end end end |
#is_forbidden? ⇒ Boolean Also known as: forbidden?
Determines if the response code is 403
.
68 69 70 |
# File 'lib/spidr/headers.rb', line 68 def is_forbidden? code == 403 end |
#is_missing? ⇒ Boolean Also known as: missing?
Determines if the response code is 404
.
80 81 82 |
# File 'lib/spidr/headers.rb', line 80 def is_missing? code == 404 end |
#is_ok? ⇒ Boolean Also known as: ok?
Determines if the response code is 200
.
24 25 26 |
# File 'lib/spidr/headers.rb', line 24 def is_ok? code == 200 end |
#is_unauthorized? ⇒ Boolean Also known as:
Determines if the response code is 401
.
56 57 58 |
# File 'lib/spidr/headers.rb', line 56 def code == 401 end |
#javascript? ⇒ Boolean
Determines if the page is JavaScript.
240 241 242 243 |
# File 'lib/spidr/headers.rb', line 240 def javascript? is_content_type?('text/javascript') || \ is_content_type?('application/javascript') end |
#json? ⇒ Boolean
Determines if the page is JSON.
253 254 255 |
# File 'lib/spidr/headers.rb', line 253 def json? is_content_type?('application/json') end |
#ms_word? ⇒ Boolean
Determines if the page is a MS Word document.
294 295 296 |
# File 'lib/spidr/headers.rb', line 294 def ms_word? is_content_type?('application/msword') end |
#pdf? ⇒ Boolean
Determines if the page is a PDF document.
304 305 306 |
# File 'lib/spidr/headers.rb', line 304 def pdf? is_content_type?('application/pdf') end |
#plain_text? ⇒ Boolean Also known as: txt?
Determines if the page is plain-text.
185 186 187 |
# File 'lib/spidr/headers.rb', line 185 def plain_text? is_content_type?('text/plain') end |
#rss? ⇒ Boolean
Determines if the page is a RSS feed.
273 274 275 276 |
# File 'lib/spidr/headers.rb', line 273 def rss? is_content_type?('application/rss+xml') || \ is_content_type?('application/rdf+xml') end |
#timedout? ⇒ Boolean
Determines if the response code is 308
.
36 37 38 |
# File 'lib/spidr/headers.rb', line 36 def timedout? code == 308 end |
#xml? ⇒ Boolean
Determines if the page is XML document.
219 220 221 222 |
# File 'lib/spidr/headers.rb', line 219 def xml? is_content_type?('text/xml') || \ is_content_type?('application/xml') end |
#xsl? ⇒ Boolean
Determines if the page is XML Stylesheet (XSL).
230 231 232 |
# File 'lib/spidr/headers.rb', line 230 def xsl? is_content_type?('text/xsl') end |
#zip? ⇒ Boolean
Determines if the page is a ZIP archive.
314 315 316 |
# File 'lib/spidr/headers.rb', line 314 def zip? is_content_type?('application/zip') end |