Class: Spidr::SessionCache

Inherits:
Object
  • Object
show all
Includes:
Spidr::Settings::Proxy, Spidr::Settings::Timeouts
Defined in:
lib/spidr/session_cache.rb

Overview

Stores active HTTP Sessions organized by scheme, host-name and port.

Instance Attribute Summary

Attributes included from Spidr::Settings::Timeouts

#continue_timeout, #keep_alive_timeout, #open_timeout, #read_timeout, #ssl_timeout

Instance Method Summary collapse

Methods included from Spidr::Settings::Proxy

#disable_proxy!, #proxy, #proxy=

Constructor Details

#initialize(proxy: Spidr.proxy, open_timeout: Spidr.open_timeout, ssl_timeout: Spidr.ssl_timeout, read_timeout: Spidr.read_timeout, continue_timeout: Spidr.continue_timeout, keep_alive_timeout: Spidr.keep_alive_timeout) ⇒ SessionCache

Creates a new session cache.

Parameters:

  • proxy (Hash) (defaults to: Spidr.proxy)

    Proxy options.

  • open_timeout (Integer) (defaults to: Spidr.open_timeout)

    Optional open connection timeout.

  • ssl_timeout (Integer) (defaults to: Spidr.ssl_timeout)

    Optional SSL connection timeout.

  • read_timeout (Integer) (defaults to: Spidr.read_timeout)

    Optional read timeout.

  • continue_timeout (Integer) (defaults to: Spidr.continue_timeout)

    Optional Continue timeout.

  • keep_alive_timeout (Integer) (defaults to: Spidr.keep_alive_timeout)

    Optional Keep-Alive timeout.

Since:

  • 0.6.0



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/spidr/session_cache.rb', line 42

def initialize(proxy:              Spidr.proxy,
               open_timeout:       Spidr.open_timeout,
               ssl_timeout:        Spidr.ssl_timeout,
               read_timeout:       Spidr.read_timeout,
               continue_timeout:   Spidr.continue_timeout,
               keep_alive_timeout: Spidr.keep_alive_timeout)
  self.proxy = proxy

  self.open_timeout       = open_timeout
  self.ssl_timeout        = ssl_timeout
  self.read_timeout       = read_timeout
  self.continue_timeout   = continue_timeout
  self.keep_alive_timeout = keep_alive_timeout

  @sessions = {}
end

Instance Method Details

#[](url) ⇒ Net::HTTP

Provides an active HTTP session for a given URL.

Parameters:

  • url (URI::HTTP, String)

    The URL which will be requested later.

Returns:

  • (Net::HTTP)

    The active HTTP session object.



89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/spidr/session_cache.rb', line 89

def [](url)
  # normalize the url
  url = URI(url)

  # session key
  key = key_for(url)

  unless @sessions[key]
    session = Net::HTTP::Proxy(
      @proxy.host,
      @proxy.port,
      @proxy.user,
      @proxy.password
    ).new(url.host,url.port)

    session.open_timeout       = @open_timeout       if @open_timeout
    session.read_timeout       = @read_timeout       if @read_timeout
    session.continue_timeout   = @continue_timeout   if @continue_timeout
    session.keep_alive_timeout = @keep_alive_timeout if @keep_alive_timeout

    if url.scheme == 'https'
      session.use_ssl     = true
      session.verify_mode = OpenSSL::SSL::VERIFY_NONE
      session.ssl_timeout = @ssl_timeout
      session.start
    end

    @sessions[key] = session
  end

  return @sessions[key]
end

#active?(url) ⇒ Boolean

Determines if there is an active HTTP session for a given URL.

Parameters:

  • url (URI::HTTP, String)

    The URL that represents a session.

Returns:

  • (Boolean)

    Specifies whether there is an active HTTP session.

Since:

  • 0.2.3



70
71
72
73
74
75
76
77
78
# File 'lib/spidr/session_cache.rb', line 70

def active?(url)
  # normalize the url
  url = URI(url)

  # session key
  key = key_for(url)

  return @sessions.has_key?(key)
end

#clearSessionCache

Clears the session cache.

Returns:

Since:

  • 0.2.2



157
158
159
160
161
162
163
164
165
166
167
# File 'lib/spidr/session_cache.rb', line 157

def clear
  @sessions.each_value do |session|
    begin
      session.finish
    rescue IOError
    end
  end

  @sessions.clear
  return self
end

#kill!(url) ⇒ nil

Destroys an HTTP session for the given scheme, host and port.

Parameters:

  • url (URI::HTTP, String)

    The URL of the requested session.

Returns:

  • (nil)

Since:

  • 0.2.2



132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
# File 'lib/spidr/session_cache.rb', line 132

def kill!(url)
  # normalize the url
  url = URI(url)

  # session key
  key = key_for(url)

  if (sess = @sessions[key])
    begin
      sess.finish
    rescue IOError
    end

    @sessions.delete(key)
  end
end