Module: Spidr::Headers

Included in:
Page
Defined in:
lib/spidr/headers.rb

Constant Summary collapse

Set['path', 'expires', 'domain']

Instance Method Summary collapse

Instance Method Details

#atom?Boolean

Determines if the page is an Atom feed.

Returns:

  • (Boolean)

    Specifies whether the page is an Atom feed.



223
224
225
# File 'lib/spidr/headers.rb', line 223

def atom?
  is_content_type?('application/atom+xml')
end

#bad_request?Boolean

Determines if the response code is 400.

Returns:

  • (Boolean)

    Specifies whether the response code is 400.



46
47
48
# File 'lib/spidr/headers.rb', line 46

def bad_request?
  code == 400
end

#codeInteger

The response code from the page.

Returns:

  • (Integer)

    Response code from the page.



14
15
16
# File 'lib/spidr/headers.rb', line 14

def code
  response.code.to_i
end

#content_typeString

The Content-Type of the page.

Returns:

  • (String)

    The Content-Type of the page.



102
103
104
# File 'lib/spidr/headers.rb', line 102

def content_type
  (response['Content-Type'] || '')
end

#content_typesArray<String>

The content types of the page.

Returns:

  • (Array<String>)

    The values within the Content-Type header.

Since:

  • 0.2.2



114
115
116
# File 'lib/spidr/headers.rb', line 114

def content_types
  (headers['content-type'] || [])
end

The raw Cookie String sent along with the page.

Returns:

  • (String)

    The raw Cookie from the response.

Since:

  • 0.2.7



265
266
267
# File 'lib/spidr/headers.rb', line 265

def cookie
  (response['Set-Cookie'] || '')
end

The Cookie key -> value pairs returned with the response.

Returns:

  • (Hash{String => String})

    The cookie keys and values.

Since:

  • 0.2.2



291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
# File 'lib/spidr/headers.rb', line 291

def cookie_params
  params = {}

  cookies.each do |cookie|
    cookie.split('; ').each do |key_value|
      key, value = key_value.split('=',2)

      unless RESERVED_COOKIE_NAMES.include?(key)
        params[key] = (value || '')
      end
    end
  end

  return params
end

#cookiesArray<String>

The Cookie values sent along with the page.

Returns:

  • (Array<String>)

    The Cookies from the response.

Since:

  • 0.2.2



279
280
281
# File 'lib/spidr/headers.rb', line 279

def cookies
  (headers['set-cookie'] || [])
end

#css?Boolean

Determines if the page is a CSS stylesheet.

Returns:

  • (Boolean)

    Specifies whether the page is a CSS stylesheet.



202
203
204
# File 'lib/spidr/headers.rb', line 202

def css?
  is_content_type?('text/css')
end

#directory?Boolean

Determines if the page is a Directory Listing.

Returns:

  • (Boolean)

    Specifies whether the page is a Directory Listing.

Since:

  • 0.3.0



138
139
140
# File 'lib/spidr/headers.rb', line 138

def directory?
  is_content_type?('text/directory')
end

#had_internal_server_error?Boolean

Determines if the response code is 500.

Returns:

  • (Boolean)

    Specifies whether the response code is 500.



92
93
94
# File 'lib/spidr/headers.rb', line 92

def had_internal_server_error?
  code == 500
end

#html?Boolean

Determines if the page is HTML document.

Returns:

  • (Boolean)

    Specifies whether the page is HTML document.



148
149
150
# File 'lib/spidr/headers.rb', line 148

def html?
  is_content_type?('text/html')
end

#is_content_type?(type) ⇒ Boolean (protected)

Determines if any of the content-types of the page include a given type.

Parameters:

  • type (String)

    The content-type to test for.

Returns:

  • (Boolean)

    Specifies whether the page includes the given content-type.

Since:

  • 0.2.4



321
322
323
# File 'lib/spidr/headers.rb', line 321

def is_content_type?(type)
  content_types.any? { |content| content.include?(type) }
end

#is_forbidden?Boolean Also known as: forbidden?

Determines if the response code is 403.

Returns:

  • (Boolean)

    Specifies whether the response code is 403.



68
69
70
# File 'lib/spidr/headers.rb', line 68

def is_forbidden?
  code == 403
end

#is_missing?Boolean Also known as: missing?

Determines if the response code is 404.

Returns:

  • (Boolean)

    Specifies whether the response code is 404.



80
81
82
# File 'lib/spidr/headers.rb', line 80

def is_missing?
  code == 404
end

#is_ok?Boolean Also known as: ok?

Determines if the response code is 200.

Returns:

  • (Boolean)

    Specifies whether the response code is 200.



24
25
26
# File 'lib/spidr/headers.rb', line 24

def is_ok?
  code == 200
end

#is_unauthorized?Boolean Also known as: unauthorized?

Determines if the response code is 401.

Returns:

  • (Boolean)

    Specifies whether the response code is 401.



56
57
58
# File 'lib/spidr/headers.rb', line 56

def is_unauthorized?
  code == 401
end

#javascript?Boolean

Determines if the page is JavaScript.

Returns:

  • (Boolean)

    Specifies whether the page is JavaScript.



179
180
181
182
# File 'lib/spidr/headers.rb', line 179

def javascript?
  is_content_type?('text/javascript') || \
    is_content_type?('application/javascript')
end

#json?Boolean

Determines if the page is JSON.

Returns:

  • (Boolean)

    Specifies whether the page is JSON.

Since:

  • 0.3.0



192
193
194
# File 'lib/spidr/headers.rb', line 192

def json?
  is_content_type?('application/json')
end

#ms_word?Boolean

Determines if the page is a MS Word document.

Returns:

  • (Boolean)

    Specifies whether the page is a MS Word document.



233
234
235
# File 'lib/spidr/headers.rb', line 233

def ms_word?
  is_content_type?('application/msword')
end

#pdf?Boolean

Determines if the page is a PDF document.

Returns:

  • (Boolean)

    Specifies whether the page is a PDF document.



243
244
245
# File 'lib/spidr/headers.rb', line 243

def pdf?
  is_content_type?('application/pdf')
end

#plain_text?Boolean Also known as: txt?

Determines if the page is plain-text.

Returns:

  • (Boolean)

    Specifies whether the page is plain-text.



124
125
126
# File 'lib/spidr/headers.rb', line 124

def plain_text?
  is_content_type?('text/plain')
end

#rss?Boolean

Determines if the page is a RSS feed.

Returns:

  • (Boolean)

    Specifies whether the page is a RSS feed.



212
213
214
215
# File 'lib/spidr/headers.rb', line 212

def rss?
  is_content_type?('application/rss+xml') || \
    is_content_type?('application/rdf+xml')
end

#timedout?Boolean

Determines if the response code is 308.

Returns:

  • (Boolean)

    Specifies whether the response code is 308.



36
37
38
# File 'lib/spidr/headers.rb', line 36

def timedout?
  code == 308
end

#xml?Boolean

Determines if the page is XML document.

Returns:

  • (Boolean)

    Specifies whether the page is XML document.



158
159
160
161
# File 'lib/spidr/headers.rb', line 158

def xml?
  is_content_type?('text/xml') || \
    is_content_type?('application/xml')
end

#xsl?Boolean

Determines if the page is XML Stylesheet (XSL).

Returns:

  • (Boolean)

    Specifies whether the page is XML Stylesheet (XSL).



169
170
171
# File 'lib/spidr/headers.rb', line 169

def xsl?
  is_content_type?('text/xsl')
end

#zip?Boolean

Determines if the page is a ZIP archive.

Returns:

  • (Boolean)

    Specifies whether the page is a ZIP archive.



253
254
255
# File 'lib/spidr/headers.rb', line 253

def zip?
  is_content_type?('application/zip')
end