Module: Spidr::Headers

Included in:
Page
Defined in:
lib/spidr/headers.rb

Constant Summary collapse

Set['path', 'expires', 'domain']

Instance Method Summary collapse

Instance Method Details

#atom?Boolean

Determines if the page is an Atom feed.

Returns:

  • (Boolean)

    Specifies whether the page is an Atom feed.



284
285
286
# File 'lib/spidr/headers.rb', line 284

def atom?
  is_content_type?('application/atom+xml')
end

#bad_request?Boolean

Determines if the response code is 400.

Returns:

  • (Boolean)

    Specifies whether the response code is 400.



46
47
48
# File 'lib/spidr/headers.rb', line 46

def bad_request?
  code == 400
end

#codeInteger

The response code from the page.

Returns:

  • (Integer)

    Response code from the page.



14
15
16
# File 'lib/spidr/headers.rb', line 14

def code
  response.code.to_i
end

#content_charsetString?

The charset included in the Content-Type.

Returns:

  • (String, nil)

    The charset of the content.

Since:

  • 0.4.0



126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/spidr/headers.rb', line 126

def content_charset
  content_types.each do |value|
    if value.include?(';')
      value.split(';').each do |param|
        param.strip!

        if param.start_with?('charset=')
          return param.split('=',2).last
        end
      end
    end
  end

  return nil
end

#content_typeString

The Content-Type of the page.

Returns:

  • (String)

    The Content-Type of the page.



102
103
104
# File 'lib/spidr/headers.rb', line 102

def content_type
  (response['Content-Type'] || '')
end

#content_typesArray<String>

The content types of the page.

Returns:

  • (Array<String>)

    The values within the Content-Type header.

Since:

  • 0.2.2



114
115
116
# File 'lib/spidr/headers.rb', line 114

def content_types
  (headers['content-type'] || [])
end

The raw Cookie String sent along with the page.

Returns:

  • (String)

    The raw Cookie from the response.

Since:

  • 0.2.7



326
327
328
# File 'lib/spidr/headers.rb', line 326

def cookie
  (response['Set-Cookie'] || '')
end

The Cookie key -> value pairs returned with the response.

Returns:

  • (Hash{String => String})

    The cookie keys and values.

Since:

  • 0.2.2



352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
# File 'lib/spidr/headers.rb', line 352

def cookie_params
  params = {}

  cookies.each do |value|
    value.split(';').each do |param|
      param.strip!

      name, value = param.split('=',2)

      unless RESERVED_COOKIE_NAMES.include?(name)
        params[name] = (value || '')
      end
    end
  end

  return params
end

#cookiesArray<String>

The Cookie values sent along with the page.

Returns:

  • (Array<String>)

    The Cookies from the response.

Since:

  • 0.2.2



340
341
342
# File 'lib/spidr/headers.rb', line 340

def cookies
  (headers['set-cookie'] || [])
end

#css?Boolean

Determines if the page is a CSS stylesheet.

Returns:

  • (Boolean)

    Specifies whether the page is a CSS stylesheet.



263
264
265
# File 'lib/spidr/headers.rb', line 263

def css?
  is_content_type?('text/css')
end

#directory?Boolean

Determines if the page is a Directory Listing.

Returns:

  • (Boolean)

    Specifies whether the page is a Directory Listing.

Since:

  • 0.3.0



199
200
201
# File 'lib/spidr/headers.rb', line 199

def directory?
  is_content_type?('text/directory')
end

#had_internal_server_error?Boolean

Determines if the response code is 500.

Returns:

  • (Boolean)

    Specifies whether the response code is 500.



92
93
94
# File 'lib/spidr/headers.rb', line 92

def had_internal_server_error?
  code == 500
end

#html?Boolean

Determines if the page is HTML document.

Returns:

  • (Boolean)

    Specifies whether the page is HTML document.



209
210
211
# File 'lib/spidr/headers.rb', line 209

def html?
  is_content_type?('text/html')
end

#is_content_type?(type) ⇒ Boolean

Determines if any of the content-types of the page include a given type.

Examples:

Match the Content-Type

page.is_content_type?('application/json')

Match the sub-type of the Content-Type

page.is_content_type?('json')

Parameters:

  • type (String)

    The content-type to test for.

Returns:

  • (Boolean)

    Specifies whether the page includes the given content-type.

Since:

  • 0.4.0



160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# File 'lib/spidr/headers.rb', line 160

def is_content_type?(type)
  if type.include?('/')
    # otherwise only match the first param
    content_types.any? do |value|
      value = value.split(';',2).first

      value == type
    end
  else
    # otherwise only match the sub-type
    content_types.any? do |value|
      value = value.split(';',2).first
      value = value.split('/',2).last

      value == type
    end
  end
end

#is_forbidden?Boolean Also known as: forbidden?

Determines if the response code is 403.

Returns:

  • (Boolean)

    Specifies whether the response code is 403.



68
69
70
# File 'lib/spidr/headers.rb', line 68

def is_forbidden?
  code == 403
end

#is_missing?Boolean Also known as: missing?

Determines if the response code is 404.

Returns:

  • (Boolean)

    Specifies whether the response code is 404.



80
81
82
# File 'lib/spidr/headers.rb', line 80

def is_missing?
  code == 404
end

#is_ok?Boolean Also known as: ok?

Determines if the response code is 200.

Returns:

  • (Boolean)

    Specifies whether the response code is 200.



24
25
26
# File 'lib/spidr/headers.rb', line 24

def is_ok?
  code == 200
end

#is_unauthorized?Boolean Also known as: unauthorized?

Determines if the response code is 401.

Returns:

  • (Boolean)

    Specifies whether the response code is 401.



56
57
58
# File 'lib/spidr/headers.rb', line 56

def is_unauthorized?
  code == 401
end

#javascript?Boolean

Determines if the page is JavaScript.

Returns:

  • (Boolean)

    Specifies whether the page is JavaScript.



240
241
242
243
# File 'lib/spidr/headers.rb', line 240

def javascript?
  is_content_type?('text/javascript') || \
    is_content_type?('application/javascript')
end

#json?Boolean

Determines if the page is JSON.

Returns:

  • (Boolean)

    Specifies whether the page is JSON.

Since:

  • 0.3.0



253
254
255
# File 'lib/spidr/headers.rb', line 253

def json?
  is_content_type?('application/json')
end

#ms_word?Boolean

Determines if the page is a MS Word document.

Returns:

  • (Boolean)

    Specifies whether the page is a MS Word document.



294
295
296
# File 'lib/spidr/headers.rb', line 294

def ms_word?
  is_content_type?('application/msword')
end

#pdf?Boolean

Determines if the page is a PDF document.

Returns:

  • (Boolean)

    Specifies whether the page is a PDF document.



304
305
306
# File 'lib/spidr/headers.rb', line 304

def pdf?
  is_content_type?('application/pdf')
end

#plain_text?Boolean Also known as: txt?

Determines if the page is plain-text.

Returns:

  • (Boolean)

    Specifies whether the page is plain-text.



185
186
187
# File 'lib/spidr/headers.rb', line 185

def plain_text?
  is_content_type?('text/plain')
end

#rss?Boolean

Determines if the page is a RSS feed.

Returns:

  • (Boolean)

    Specifies whether the page is a RSS feed.



273
274
275
276
# File 'lib/spidr/headers.rb', line 273

def rss?
  is_content_type?('application/rss+xml') || \
    is_content_type?('application/rdf+xml')
end

#timedout?Boolean

Determines if the response code is 308.

Returns:

  • (Boolean)

    Specifies whether the response code is 308.



36
37
38
# File 'lib/spidr/headers.rb', line 36

def timedout?
  code == 308
end

#xml?Boolean

Determines if the page is XML document.

Returns:

  • (Boolean)

    Specifies whether the page is XML document.



219
220
221
222
# File 'lib/spidr/headers.rb', line 219

def xml?
  is_content_type?('text/xml') || \
    is_content_type?('application/xml')
end

#xsl?Boolean

Determines if the page is XML Stylesheet (XSL).

Returns:

  • (Boolean)

    Specifies whether the page is XML Stylesheet (XSL).



230
231
232
# File 'lib/spidr/headers.rb', line 230

def xsl?
  is_content_type?('text/xsl')
end

#zip?Boolean

Determines if the page is a ZIP archive.

Returns:

  • (Boolean)

    Specifies whether the page is a ZIP archive.



314
315
316
# File 'lib/spidr/headers.rb', line 314

def zip?
  is_content_type?('application/zip')
end