Module: Gem::URI

Included in:
Generic
Defined in:
lib/rubygems/vendor/uri/lib/uri.rb,
lib/rubygems/vendor/uri/lib/uri/ws.rb,
lib/rubygems/vendor/uri/lib/uri/ftp.rb,
lib/rubygems/vendor/uri/lib/uri/wss.rb,
lib/rubygems/vendor/uri/lib/uri/file.rb,
lib/rubygems/vendor/uri/lib/uri/http.rb,
lib/rubygems/vendor/uri/lib/uri/ldap.rb,
lib/rubygems/vendor/uri/lib/uri/https.rb,
lib/rubygems/vendor/uri/lib/uri/ldaps.rb,
lib/rubygems/vendor/uri/lib/uri/common.rb,
lib/rubygems/vendor/uri/lib/uri/mailto.rb,
lib/rubygems/vendor/uri/lib/uri/generic.rb,
lib/rubygems/vendor/uri/lib/uri/version.rb,
lib/rubygems/vendor/uri/lib/uri/rfc2396_parser.rb,
lib/rubygems/vendor/uri/lib/uri/rfc3986_parser.rb

Overview

uri/common.rb

Author

Akira Yamada <[email protected]>

License

You can redistribute it and/or modify it under the same term as Ruby.

See Gem::URI for general documentation

Defined Under Namespace

Modules: RFC2396_REGEXP, Util Classes: BadURIError, Error, FTP, File, Generic, HTTP, HTTPS, InvalidComponentError, InvalidURIError, LDAP, LDAPS, MailTo, RFC2396_Parser, RFC3986_Parser, WS, WSS

Constant Summary collapse

RFC2396_PARSER =
RFC2396_Parser.new
RFC3986_PARSER =
RFC3986_Parser.new
DEFAULT_PARSER =
RFC3986_PARSER
TBLENCWWWCOMP_ =

:nodoc:

{}
TBLENCURICOMP_ =
TBLENCWWWCOMP_.dup.freeze
TBLDECWWWCOMP_ =

:nodoc:

{}
VERSION_CODE =

:stopdoc:

'010002'.freeze
VERSION =
VERSION_CODE.scan(/../).collect{|n| n.to_i}.join('.').freeze

Class Method Summary collapse

Class Method Details

.const_missing(const) ⇒ Object



43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/rubygems/vendor/uri/lib/uri/common.rb', line 43

def self.const_missing(const)
  if const == :REGEXP
    warn "Gem::URI::REGEXP is obsolete. Use Gem::URI::RFC2396_REGEXP explicitly.", uplevel: 1 if $VERBOSE
    Gem::URI::RFC2396_REGEXP
  elsif value = RFC2396_PARSER.regexp[const]
    warn "Gem::URI::#{const} is obsolete. Use RFC2396_PARSER.regexp[#{const.inspect}] explicitly.", uplevel: 1 if $VERBOSE
    value
  elsif value = RFC2396_Parser.const_get(const)
    warn "Gem::URI::#{const} is obsolete. Use RFC2396_Parser::#{const} explicitly.", uplevel: 1 if $VERBOSE
    value
  else
    super
  end
end

.decode_uri_component(str, enc = Encoding::UTF_8) ⇒ Object

Like Gem::URI.decode_www_form_component, except that '+' is preserved.



402
403
404
# File 'lib/rubygems/vendor/uri/lib/uri/common.rb', line 402

def self.decode_uri_component(str, enc=Encoding::UTF_8)
  _decode_uri_component(/%\h\h/, str, enc)
end

.decode_www_form(str, enc = Encoding::UTF_8, separator: '&', use__charset_: false, isindex: false) ⇒ Object

Returns name/value pairs derived from the given string str, which must be an ASCII string.

The method may be used to decode the body of Net::HTTPResponse object res for which res['Content-Type'] is 'application/x-www-form-urlencoded'.

The returned data is an array of 2-element subarrays; each subarray is a name/value pair (both are strings). Each returned string has encoding enc, and has had invalid characters removed via String#scrub.

A simple example:

Gem::URI.decode_www_form('foo=0&bar=1&baz')
# => [["foo", "0"], ["bar", "1"], ["baz", ""]]

The returned strings have certain conversions, similar to those performed in Gem::URI.decode_www_form_component:

Gem::URI.decode_www_form('f%23o=%2F&b-r=%24&b+z=%40')
# => [["f#o", "/"], ["b-r", "$"], ["b z", "@"]]

The given string may contain consecutive separators:

Gem::URI.decode_www_form('foo=0&&bar=1&&baz=2')
# => [["foo", "0"], ["", ""], ["bar", "1"], ["", ""], ["baz", "2"]]

A different separator may be specified:

Gem::URI.decode_www_form('foo=0--bar=1--baz', separator: '--')
# => [["foo", "0"], ["bar", "1"], ["baz", ""]]

Raises:

  • (ArgumentError)


577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
# File 'lib/rubygems/vendor/uri/lib/uri/common.rb', line 577

def self.decode_www_form(str, enc=Encoding::UTF_8, separator: '&', use__charset_: false, isindex: false)
  raise ArgumentError, "the input of #{self.name}.#{__method__} must be ASCII only string" unless str.ascii_only?
  ary = []
  return ary if str.empty?
  enc = Encoding.find(enc)
  str.b.each_line(separator) do |string|
    string.chomp!(separator)
    key, sep, val = string.partition('=')
    if isindex
      if sep.empty?
        val = key
        key = +''
      end
      isindex = false
    end

    if use__charset_ and key == '_charset_' and e = get_encoding(val)
      enc = e
      use__charset_ = false
    end

    key.gsub!(/\+|%\h\h/, TBLDECWWWCOMP_)
    if val
      val.gsub!(/\+|%\h\h/, TBLDECWWWCOMP_)
    else
      val = +''
    end

    ary << [key, val]
  end
  ary.each do |k, v|
    k.force_encoding(enc)
    k.scrub!
    v.force_encoding(enc)
    v.scrub!
  end
  ary
end

.decode_www_form_component(str, enc = Encoding::UTF_8) ⇒ Object

Returns a string decoded from the given URL-encoded string str.

The given string is first encoded as Encoding::ASCII-8BIT (using String#b), then decoded (as below), and finally force-encoded to the given encoding enc.

The returned string:

  • Preserves:

    • Characters '*', '.', '-', and '_'.

    • Character in ranges 'a'..'z', 'A'..'Z', and '0'..'9'.

    Example:

    Gem::URI.decode_www_form_component('*.-_azAZ09')
    # => "*.-_azAZ09"
    
  • Converts:

    • Character '+' to character ' '.

    • Each “percent notation” to an ASCII character.

    Example:

    Gem::URI.decode_www_form_component('Here+are+some+punctuation+characters%3A+%2C%3B%3F%3A')
    # => "Here are some punctuation characters: ,;?:"
    

Related: Gem::URI.decode_uri_component (preserves '+').



391
392
393
# File 'lib/rubygems/vendor/uri/lib/uri/common.rb', line 391

def self.decode_www_form_component(str, enc=Encoding::UTF_8)
  _decode_uri_component(/\+|%\h\h/, str, enc)
end

.encode_uri_component(str, enc = nil) ⇒ Object

Like Gem::URI.encode_www_form_component, except that ' ' (space) is encoded as '%20' (instead of '+').



397
398
399
# File 'lib/rubygems/vendor/uri/lib/uri/common.rb', line 397

def self.encode_uri_component(str, enc=nil)
  _encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCURICOMP_, str, enc)
end

.encode_www_form(enum, enc = nil) ⇒ Object

Returns a URL-encoded string derived from the given Enumerable enum.

The result is suitable for use as form data for an HTTP request whose Content-Type is 'application/x-www-form-urlencoded'.

The returned string consists of the elements of enum, each converted to one or more URL-encoded strings, and all joined with character '&'.

Simple examples:

Gem::URI.encode_www_form([['foo', 0], ['bar', 1], ['baz', 2]])
# => "foo=0&bar=1&baz=2"
Gem::URI.encode_www_form({foo: 0, bar: 1, baz: 2})
# => "foo=0&bar=1&baz=2"

The returned string is formed using method Gem::URI.encode_www_form_component, which converts certain characters:

Gem::URI.encode_www_form('f#o': '/', 'b-r': '$', 'b z': '@')
# => "f%23o=%2F&b-r=%24&b+z=%40"

When enum is Array-like, each element ele is converted to a field:

  • If ele is an array of two or more elements, the field is formed from its first two elements (and any additional elements are ignored):

    name = Gem::URI.encode_www_form_component(ele[0], enc)
    value = Gem::URI.encode_www_form_component(ele[1], enc)
    "#{name}=#{value}"
    

    Examples:

    Gem::URI.encode_www_form([%w[foo bar], %w[baz bat bah]])
    # => "foo=bar&baz=bat"
    Gem::URI.encode_www_form([['foo', 0], ['bar', :baz, 'bat']])
    # => "foo=0&bar=baz"
    
  • If ele is an array of one element, the field is formed from ele[0]:

    Gem::URI.encode_www_form_component(ele[0])
    

    Example:

    Gem::URI.encode_www_form([['foo'], [:bar], [0]])
    # => "foo&bar&0"
    
  • Otherwise the field is formed from ele:

    Gem::URI.encode_www_form_component(ele)
    

    Example:

    Gem::URI.encode_www_form(['foo', :bar, 0])
    # => "foo&bar&0"
    

The elements of an Array-like enum may be mixture:

Gem::URI.encode_www_form([['foo', 0], ['bar', 1, 2], ['baz'], :bat])
# => "foo=0&bar=1&baz&bat"

When enum is Hash-like, each key/value pair is converted to one or more fields:

  • If value is Array-convertible, each element ele in value is paired with key to form a field:

    name = Gem::URI.encode_www_form_component(key, enc)
    value = Gem::URI.encode_www_form_component(ele, enc)
    "#{name}=#{value}"
    

    Example:

    Gem::URI.encode_www_form({foo: [:bar, 1], baz: [:bat, :bam, 2]})
    # => "foo=bar&foo=1&baz=bat&baz=bam&baz=2"
    
  • Otherwise, key and value are paired to form a field:

    name = Gem::URI.encode_www_form_component(key, enc)
    value = Gem::URI.encode_www_form_component(value, enc)
    "#{name}=#{value}"
    

    Example:

    Gem::URI.encode_www_form({foo: 0, bar: 1, baz: 2})
    # => "foo=0&bar=1&baz=2"
    

The elements of a Hash-like enum may be mixture:

Gem::URI.encode_www_form({foo: [0, 1], bar: 2})
# => "foo=0&foo=1&bar=2"


524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
# File 'lib/rubygems/vendor/uri/lib/uri/common.rb', line 524

def self.encode_www_form(enum, enc=nil)
  enum.map do |k,v|
    if v.nil?
      encode_www_form_component(k, enc)
    elsif v.respond_to?(:to_ary)
      v.to_ary.map do |w|
        str = encode_www_form_component(k, enc)
        unless w.nil?
          str << '='
          str << encode_www_form_component(w, enc)
        end
      end.join('&')
    else
      str = encode_www_form_component(k, enc)
      str << '='
      str << encode_www_form_component(v, enc)
    end
  end.join('&')
end

.encode_www_form_component(str, enc = nil) ⇒ Object

Returns a URL-encoded string derived from the given string str.

The returned string:

  • Preserves:

    • Characters '*', '.', '-', and '_'.

    • Character in ranges 'a'..'z', 'A'..'Z', and '0'..'9'.

    Example:

    Gem::URI.encode_www_form_component('*.-_azAZ09')
    # => "*.-_azAZ09"
    
  • Converts:

    • Character ' ' to character '+'.

    • Any other character to “percent notation”; the percent notation for character c is '%%%X' % c.ord.

    Example:

    Gem::URI.encode_www_form_component('Here are some punctuation characters: ,;?:')
    # => "Here+are+some+punctuation+characters%3A+%2C%3B%3F%3A"
    

Encoding:

  • If str has encoding Encoding::ASCII_8BIT, argument enc is ignored.

  • Otherwise str is converted first to Encoding::UTF_8 (with suitable character replacements), and then to encoding enc.

In either case, the returned string has forced encoding Encoding::US_ASCII.

Related: Gem::URI.encode_uri_component (encodes ' ' as '%20').



358
359
360
# File 'lib/rubygems/vendor/uri/lib/uri/common.rb', line 358

def self.encode_www_form_component(str, enc=nil)
  _encode_uri_component(/[^*\-.0-9A-Z_a-z]/, TBLENCWWWCOMP_, str, enc)
end

.extract(str, schemes = nil, &block) ⇒ Object

Synopsis

Gem::URI::extract(str[, schemes][,&blk])

Args

str

String to extract URIs from.

schemes

Limit Gem::URI matching to specific schemes.

Description

Extracts URIs from a string. If block given, iterates through all matched URIs. Returns nil if block given or array with matches.

Usage

require "rubygems/vendor/uri/lib/uri"

Gem::URI.extract("text here http://foo.example.org/bla and here mailto:[email protected] and here also.")
# => ["http://foo.example.com/bla", "mailto:[email protected]"]


262
263
264
265
# File 'lib/rubygems/vendor/uri/lib/uri/common.rb', line 262

def self.extract(str, schemes = nil, &block) # :nodoc:
  warn "Gem::URI.extract is obsolete", uplevel: 1 if $VERBOSE
  DEFAULT_PARSER.extract(str, schemes, &block)
end

.for(scheme, *arguments, default: Generic) ⇒ Object

Returns a new object constructed from the given scheme, arguments, and default:

  • The new object is an instance of Gem::URI.scheme_list[scheme.upcase].

  • The object is initialized by calling the class initializer using scheme and arguments. See Gem::URI::Generic.new.

Examples:

values = ['john.doe', 'www.example.com', '123', nil, '/forum/questions/', nil, 'tag=networking&order=newest', 'top']
Gem::URI.for('https', *values)
# => #<Gem::URI::HTTPS https://[email protected]:123/forum/questions/?tag=networking&order=newest#top>
Gem::URI.for('foo', *values, default: Gem::URI::HTTP)
# => #<Gem::URI::HTTP foo://[email protected]:123/forum/questions/?tag=networking&order=newest#top>


146
147
148
149
150
151
152
153
154
155
156
# File 'lib/rubygems/vendor/uri/lib/uri/common.rb', line 146

def self.for(scheme, *arguments, default: Generic)
  const_name = scheme.to_s.upcase

  uri_class = INITIAL_SCHEMES[const_name]
  uri_class ||= if /\A[A-Z]\w*\z/.match?(const_name) && Schemes.const_defined?(const_name, false)
    Schemes.const_get(const_name, false)
  end
  uri_class ||= default

  return uri_class.new(scheme, *arguments)
end

.join(*str) ⇒ Object

Merges the given Gem::URI strings str per RFC 2396.

Each string in str is converted to an RFC3986 Gem::URI before being merged.

Examples:

Gem::URI.join("http://example.com/","main.rbx")
# => #<Gem::URI::HTTP http://example.com/main.rbx>

Gem::URI.join('http://example.com', 'foo')
# => #<Gem::URI::HTTP http://example.com/foo>

Gem::URI.join('http://example.com', '/foo', '/bar')
# => #<Gem::URI::HTTP http://example.com/bar>

Gem::URI.join('http://example.com', '/foo', 'bar')
# => #<Gem::URI::HTTP http://example.com/bar>

Gem::URI.join('http://example.com', '/foo/', 'bar')
# => #<Gem::URI::HTTP http://example.com/foo/bar>


234
235
236
# File 'lib/rubygems/vendor/uri/lib/uri/common.rb', line 234

def self.join(*str)
  DEFAULT_PARSER.join(*str)
end

.parse(uri) ⇒ Object

Returns a new Gem::URI object constructed from the given string uri:

Gem::URI.parse('https://[email protected]:123/forum/questions/?tag=networking&order=newest#top')
# => #<Gem::URI::HTTPS https://[email protected]:123/forum/questions/?tag=networking&order=newest#top>
Gem::URI.parse('http://[email protected]:123/forum/questions/?tag=networking&order=newest#top')
# => #<Gem::URI::HTTP http://[email protected]:123/forum/questions/?tag=networking&order=newest#top>

It’s recommended to first ::escape string uri if it may contain invalid Gem::URI characters.



207
208
209
# File 'lib/rubygems/vendor/uri/lib/uri/common.rb', line 207

def self.parse(uri)
  DEFAULT_PARSER.parse(uri)
end

.parser=(parser = RFC3986_PARSER) ⇒ Object



25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/rubygems/vendor/uri/lib/uri/common.rb', line 25

def self.parser=(parser = RFC3986_PARSER)
  remove_const(:Parser) if defined?(::Gem::URI::Parser)
  const_set("Parser", parser.class)

  remove_const(:REGEXP) if defined?(::Gem::URI::REGEXP)
  remove_const(:PATTERN) if defined?(::Gem::URI::PATTERN)
  if Parser == RFC2396_Parser
    const_set("REGEXP", Gem::URI::RFC2396_REGEXP)
    const_set("PATTERN", Gem::URI::RFC2396_REGEXP::PATTERN)
  end

  Parser.new.regexp.each_pair do |sym, str|
    remove_const(sym) if const_defined?(sym, false)
    const_set(sym, str)
  end
end

.regexp(schemes = nil) ⇒ Object

Synopsis

Gem::URI::regexp([match_schemes])

Args

match_schemes

Array of schemes. If given, resulting regexp matches to URIs whose scheme is one of the match_schemes.

Description

Returns a Regexp object which matches to Gem::URI-like strings. The Regexp object returned by this method includes arbitrary number of capture group (parentheses). Never rely on its number.

Usage

require 'rubygems/vendor/uri/lib/uri'

# extract first Gem::URI from html_string
html_string.slice(Gem::URI.regexp)

# remove ftp URIs
html_string.sub(Gem::URI.regexp(['ftp']), '')

# You should not rely on the number of parentheses
html_string.scan(Gem::URI.regexp) do |*matches|
  p $&
end


299
300
301
302
# File 'lib/rubygems/vendor/uri/lib/uri/common.rb', line 299

def self.regexp(schemes = nil)# :nodoc:
  warn "Gem::URI.regexp is obsolete", uplevel: 1 if $VERBOSE
  DEFAULT_PARSER.make_regexp(schemes)
end

.register_scheme(scheme, klass) ⇒ Object

Registers the given klass as the class to be instantiated when parsing a Gem::URI with the given scheme:

Gem::URI.register_scheme('MS_SEARCH', Gem::URI::Generic) # => Gem::URI::Generic
Gem::URI.scheme_list['MS_SEARCH']                   # => Gem::URI::Generic

Note that after calling String#upcase on scheme, it must be a valid constant name.



102
103
104
# File 'lib/rubygems/vendor/uri/lib/uri/common.rb', line 102

def self.register_scheme(scheme, klass)
  Schemes.const_set(scheme.to_s.upcase, klass)
end

.scheme_listObject

Returns a hash of the defined schemes:

Gem::URI.scheme_list
# =>
{"MAILTO"=>Gem::URI::MailTo,
 "LDAPS"=>Gem::URI::LDAPS,
 "WS"=>Gem::URI::WS,
 "HTTP"=>Gem::URI::HTTP,
 "HTTPS"=>Gem::URI::HTTPS,
 "LDAP"=>Gem::URI::LDAP,
 "FILE"=>Gem::URI::File,
 "FTP"=>Gem::URI::FTP}

Related: Gem::URI.register_scheme.



120
121
122
123
124
# File 'lib/rubygems/vendor/uri/lib/uri/common.rb', line 120

def self.scheme_list
  Schemes.constants.map { |name|
    [name.to_s.upcase, Schemes.const_get(name)]
  }.to_h
end

.split(uri) ⇒ Object

Returns a 9-element array representing the parts of the Gem::URI formed from the string uri; each array element is a string or nil:

names = %w[scheme userinfo host port registry path opaque query fragment]
values = Gem::URI.split('https://[email protected]:123/forum/questions/?tag=networking&order=newest#top')
names.zip(values)
# =>
[["scheme", "https"],
 ["userinfo", "john.doe"],
 ["host", "www.example.com"],
 ["port", "123"],
 ["registry", nil],
 ["path", "/forum/questions/"],
 ["opaque", nil],
 ["query", "tag=networking&order=newest"],
 ["fragment", "top"]]


193
194
195
# File 'lib/rubygems/vendor/uri/lib/uri/common.rb', line 193

def self.split(uri)
  DEFAULT_PARSER.split(uri)
end