Module: OpenURI

Defined in:
lib/open-uri.rb

Overview

OpenURI is an easy-to-use wrapper for Net::HTTP, Net::HTTPS and Net::FTP.

Example

It is possible to open an http, https or ftp URL as though it were a file:

open("http://www.ruby-lang.org/") {|f|
  f.each_line {|line| p line}
}

The opened file has several getter methods for its meta-information, as follows, since it is extended by OpenURI::Meta.

open("http://www.ruby-lang.org/en") {|f|
  f.each_line {|line| p line}
  p f.base_uri         # <URI::HTTP:0x40e6ef2 URL:http://www.ruby-lang.org/en/>
  p f.content_type     # "text/html"
  p f.charset          # "iso-8859-1"
  p f.content_encoding # []
  p f.last_modified    # Thu Dec 05 02:45:02 UTC 2002
}

Additional header fields can be specified by an optional hash argument.

open("http://www.ruby-lang.org/en/",
  "User-Agent" => "Ruby/#{RUBY_VERSION}",
  "From" => "[email protected]",
  "Referer" => "http://www.ruby-lang.org/") {|f|
  # ...
}

The environment variables such as http_proxy, https_proxy and ftp_proxy are in effect by default. Here we disable proxy:

open("http://www.ruby-lang.org/en/", :proxy => nil) {|f|
  # ...
}

See OpenURI::OpenRead.open and Kernel#open for more on available options.

URI objects can be opened in a similar way.

uri = URI.parse("http://www.ruby-lang.org/en/")
uri.open {|f|
  # ...
}

URI objects can be read directly. The returned string is also extended by OpenURI::Meta.

str = uri.read
p str.base_uri
Author

Tanaka Akira <[email protected]>

Defined Under Namespace

Modules: Meta, OpenRead Classes: Buffer, HTTPError, HTTPRedirect

Constant Summary collapse

Options =
{
  :proxy => true,
  :proxy_http_basic_authentication => true,
  :progress_proc => true,
  :content_length_proc => true,
  :http_basic_authentication => true,
  :read_timeout => true,
  :open_timeout => true,
  :ssl_ca_cert => nil,
  :ssl_verify_mode => nil,
  :ftp_active_mode => false,
  :redirect => true,
}

Class Method Summary collapse

Class Method Details

.check_options(options) ⇒ Object

:nodoc:



112
113
114
115
116
117
118
119
# File 'lib/open-uri.rb', line 112

def OpenURI.check_options(options) # :nodoc:
  options.each {|k, v|
    next unless Symbol === k
    unless Options.include? k
      raise ArgumentError, "unrecognized option: #{k}"
    end
  }
end

.open_http(buf, target, proxy, options) ⇒ Object

:nodoc:



254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
# File 'lib/open-uri.rb', line 254

def OpenURI.open_http(buf, target, proxy, options) # :nodoc:
  if proxy
    proxy_uri, proxy_user, proxy_pass = proxy
    raise "Non-HTTP proxy URI: #{proxy_uri}" if proxy_uri.class != URI::HTTP
  end

  if target.userinfo
    raise ArgumentError, "userinfo not supported.  [RFC3986]"
  end

  header = {}
  options.each {|k, v| header[k] = v if String === k }

  require 'net/http'
  klass = Net::HTTP
  if URI::HTTP === target
    # HTTP or HTTPS
    if proxy
      if proxy_user && proxy_pass
        klass = Net::HTTP::Proxy(proxy_uri.hostname, proxy_uri.port, proxy_user, proxy_pass)
      else
        klass = Net::HTTP::Proxy(proxy_uri.hostname, proxy_uri.port)
      end
    end
    target_host = target.hostname
    target_port = target.port
    request_uri = target.request_uri
  else
    # FTP over HTTP proxy
    target_host = proxy_uri.hostname
    target_port = proxy_uri.port
    request_uri = target.to_s
    if proxy_user && proxy_pass
      header["Proxy-Authorization"] = 'Basic ' + ["#{proxy_user}:#{proxy_pass}"].pack('m').delete("\r\n")
    end
  end

  http = proxy ? klass.new(target_host, target_port) : klass.new(target_host, target_port, nil)
  if target.class == URI::HTTPS
    require 'net/https'
    http.use_ssl = true
    http.verify_mode = options[:ssl_verify_mode] || OpenSSL::SSL::VERIFY_PEER
    store = OpenSSL::X509::Store.new
    if options[:ssl_ca_cert]
      Array(options[:ssl_ca_cert]).each do |cert|
        if File.directory? cert
          store.add_path cert
        else
          store.add_file cert
        end
      end
    else
      store.set_default_paths
    end
    http.cert_store = store
  end
  if options.include? :read_timeout
    http.read_timeout = options[:read_timeout]
  end
  if options.include? :open_timeout
    http.open_timeout = options[:open_timeout]
  end

  resp = nil
  http.start {
    req = Net::HTTP::Get.new(request_uri, header)
    if options.include? :http_basic_authentication
      user, pass = options[:http_basic_authentication]
      req.basic_auth user, pass
    end
    http.request(req) {|response|
      resp = response
      if options[:content_length_proc] && Net::HTTPSuccess === resp
        if resp.key?('Content-Length')
          options[:content_length_proc].call(resp['Content-Length'].to_i)
        else
          options[:content_length_proc].call(nil)
        end
      end
      resp.read_body {|str|
        buf << str
        if options[:progress_proc] && Net::HTTPSuccess === resp
          options[:progress_proc].call(buf.size)
        end
      }
    }
  }
  io = buf.io
  io.rewind
  io.status = [resp.code, resp.message]
  resp.each_name {|name| buf.io.meta_add_field2 name, resp.get_fields(name) }
  case resp
  when Net::HTTPSuccess
  when Net::HTTPMovedPermanently, # 301
       Net::HTTPFound, # 302
       Net::HTTPSeeOther, # 303
       Net::HTTPTemporaryRedirect # 307
    begin
      loc_uri = URI.parse(resp['location'])
    rescue URI::InvalidURIError
      raise OpenURI::HTTPError.new(io.status.join(' ') + ' (Invalid Location URI)', io)
    end
    throw :open_uri_redirect, loc_uri
  else
    raise OpenURI::HTTPError.new(io.status.join(' '), io)
  end
end

.open_loop(uri, options) ⇒ Object

:nodoc:



167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
# File 'lib/open-uri.rb', line 167

def OpenURI.open_loop(uri, options) # :nodoc:
  proxy_opts = []
  proxy_opts << :proxy_http_basic_authentication if options.include? :proxy_http_basic_authentication
  proxy_opts << :proxy if options.include? :proxy
  proxy_opts.compact!
  if 1 < proxy_opts.length
    raise ArgumentError, "multiple proxy options specified"
  end
  case proxy_opts.first
  when :proxy_http_basic_authentication
    opt_proxy, proxy_user, proxy_pass = options.fetch(:proxy_http_basic_authentication)
    proxy_user = proxy_user.to_str
    proxy_pass = proxy_pass.to_str
    if opt_proxy == true
      raise ArgumentError.new("Invalid authenticated proxy option: #{options[:proxy_http_basic_authentication].inspect}")
    end
  when :proxy
    opt_proxy = options.fetch(:proxy)
    proxy_user = nil
    proxy_pass = nil
  when nil
    opt_proxy = true
    proxy_user = nil
    proxy_pass = nil
  end
  case opt_proxy
  when true
    find_proxy = lambda {|u| pxy = u.find_proxy; pxy ? [pxy, nil, nil] : nil}
  when nil, false
    find_proxy = lambda {|u| nil}
  when String
    opt_proxy = URI.parse(opt_proxy)
    find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]}
  when URI::Generic
    find_proxy = lambda {|u| [opt_proxy, proxy_user, proxy_pass]}
  else
    raise ArgumentError.new("Invalid proxy option: #{opt_proxy}")
  end

  uri_set = {}
  buf = nil
  while true
    redirect = catch(:open_uri_redirect) {
      buf = Buffer.new
      uri.buffer_open(buf, find_proxy.call(uri), options)
      nil
    }
    if redirect
      if redirect.relative?
        # Although it violates RFC2616, Location: field may have relative
        # URI.  It is converted to absolute URI using uri as a base URI.
        redirect = uri + redirect
      end
      if !options.fetch(:redirect, true)
        raise HTTPRedirect.new(buf.io.status.join(' '), buf.io, redirect)
      end
      unless OpenURI.redirectable?(uri, redirect)
        raise "redirection forbidden: #{uri} -> #{redirect}"
      end
      if options.include? :http_basic_authentication
        # send authentication only for the URI directly specified.
        options = options.dup
        options.delete :http_basic_authentication
      end
      uri = redirect
      raise "HTTP redirection loop: #{uri}" if uri_set.include? uri.to_s
      uri_set[uri.to_s] = true
    else
      break
    end
  end
  io = buf.io
  io.base_uri = uri
  io
end

.open_uri(name, *rest) ⇒ Object

:nodoc:

Raises:

  • (ArgumentError)


131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/open-uri.rb', line 131

def OpenURI.open_uri(name, *rest) # :nodoc:
  uri = URI::Generic === name ? name : URI.parse(name)
  mode, _, rest = OpenURI.scan_open_optional_arguments(*rest)
  options = rest.shift if !rest.empty? && Hash === rest.first
  raise ArgumentError.new("extra arguments") if !rest.empty?
  options ||= {}
  OpenURI.check_options(options)

  if /\Arb?(?:\Z|:([^:]+))/ =~ mode
    encoding, = $1,Encoding.find($1) if $1
    mode = nil
  end

  unless mode == nil ||
         mode == 'r' || mode == 'rb' ||
         mode == File::RDONLY
    raise ArgumentError.new("invalid access mode #{mode} (#{uri.class} resource is read only.)")
  end

  io = open_loop(uri, options)
  io.set_encoding(encoding) if encoding
  if block_given?
    begin
      yield io
    ensure
      if io.respond_to? :close!
        io.close! # Tempfile
      else
        io.close if !io.closed?
      end
    end
  else
    io
  end
end

.redirectable?(uri1, uri2) ⇒ Boolean

:nodoc:

Returns:

  • (Boolean)


243
244
245
246
247
248
249
250
251
252
# File 'lib/open-uri.rb', line 243

def OpenURI.redirectable?(uri1, uri2) # :nodoc:
  # This test is intended to forbid a redirection from http://... to
  # file:///etc/passwd, file:///dev/zero, etc.  CVE-2011-1521
  # https to http redirect is also forbidden intentionally.
  # It avoids sending secure cookie or referer by non-secure HTTP protocol.
  # (RFC 2109 4.3.1, RFC 2965 3.3, RFC 2616 15.1.3)
  # However this is ad hoc.  It should be extensible/configurable.
  uri1.scheme.downcase == uri2.scheme.downcase ||
  (/\A(?:http|ftp)\z/i =~ uri1.scheme && /\A(?:http|ftp)\z/i =~ uri2.scheme)
end

.scan_open_optional_arguments(*rest) ⇒ Object

:nodoc:



121
122
123
124
125
126
127
128
129
# File 'lib/open-uri.rb', line 121

def OpenURI.scan_open_optional_arguments(*rest) # :nodoc:
  if !rest.empty? && (String === rest.first || Integer === rest.first)
    mode = rest.shift
    if !rest.empty? && Integer === rest.first
      perm = rest.shift
    end
  end
  return mode, perm, rest
end