Class: URI::RFC2396_Parser

Inherits:
Object
  • Object
show all
Includes:
RFC2396_REGEXP
Defined in:
lib/uri/rfc2396_parser.rb

Overview

class that Parses String’s into URI’s

It contains a Hash set of patterns and Regexp’s that match and validate.

Constant Summary collapse

@@to_s =
Kernel.instance_method(:to_s)

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(opts = {}) ⇒ RFC2396_Parser

Synopsis

URI::Parser.new([opts])

Args

The constructor accepts a hash as options for parser. Keys of options are pattern names of URI components and values of options are pattern strings. The constructor generates set of regexps for parsing URIs.

You can use the following keys:

* :ESCAPED (URI::PATTERN::ESCAPED in default)
* :UNRESERVED (URI::PATTERN::UNRESERVED in default)
* :DOMLABEL (URI::PATTERN::DOMLABEL in default)
* :TOPLABEL (URI::PATTERN::TOPLABEL in default)
* :HOSTNAME (URI::PATTERN::HOSTNAME in default)

Examples

p = URI::Parser.new(:ESCAPED => "(?:%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})")
u = p.parse("http://example.jp/%uABCD") #=> #<URI::HTTP:0xb78cf4f8 URL:http://example.jp/%uABCD>
URI.parse(u.to_s) #=> raises URI::InvalidURIError

s = "http://example.com/ABCD"
u1 = p.parse(s) #=> #<URI::HTTP:0xb78c3220 URL:http://example.com/ABCD>
u2 = URI.parse(s) #=> #<URI::HTTP:0xb78b6d54 URL:http://example.com/ABCD>
u1 == u2 #=> true
u1.eql?(u2) #=> false


100
101
102
103
104
105
106
107
108
# File 'lib/uri/rfc2396_parser.rb', line 100

def initialize(opts = {})
  @pattern = initialize_pattern(opts)
  @pattern.each_value(&:freeze)
  @pattern.freeze

  @regexp = initialize_regexp(@pattern)
  @regexp.each_value(&:freeze)
  @regexp.freeze
end

Instance Attribute Details

#patternObject (readonly)

The Hash of patterns.

see also URI::Parser.initialize_pattern



113
114
115
# File 'lib/uri/rfc2396_parser.rb', line 113

def pattern
  @pattern
end

#regexpObject (readonly)

The Hash of Regexp

see also URI::Parser.initialize_regexp



118
119
120
# File 'lib/uri/rfc2396_parser.rb', line 118

def regexp
  @regexp
end

Instance Method Details

#escape(str, unsafe = @regexp[:UNSAFE]) ⇒ Object

:call-seq:

escape( str )
escape( str, unsafe )

Args

str

String to make safe

unsafe

Regexp to apply. Defaults to self.regexp

Description

constructs a safe String from str, removing unsafe characters, replacing them with codes.



300
301
302
303
304
305
306
307
308
309
310
311
312
313
# File 'lib/uri/rfc2396_parser.rb', line 300

def escape(str, unsafe = @regexp[:UNSAFE])
  unless unsafe.kind_of?(Regexp)
    # perhaps unsafe is String object
    unsafe = Regexp.new("[#{Regexp.quote(unsafe)}]", false)
  end
  str.gsub(unsafe) do
    us = $&
    tmp = ''
    us.each_byte do |uc|
      tmp << sprintf('%%%02X', uc)
    end
    tmp
  end.force_encoding(Encoding::US_ASCII)
end

#extract(str, schemes = nil) ⇒ Object

:call-seq:

extract( str )
extract( str, schemes )
extract( str, schemes ) {|item| block }

Args

str

String to search

schemes

Patterns to apply to str

Description

Attempts to parse and merge a set of URIs If no block given , then returns the result, else it calls block for each element in result.

see also URI::Parser.make_regexp



262
263
264
265
266
267
268
269
270
271
# File 'lib/uri/rfc2396_parser.rb', line 262

def extract(str, schemes = nil)
  if block_given?
    str.scan(make_regexp(schemes)) { yield $& }
    nil
  else
    result = []
    str.scan(make_regexp(schemes)) { result.push $& }
    result
  end
end

#inspectObject



336
337
338
# File 'lib/uri/rfc2396_parser.rb', line 336

def inspect
  @@to_s.bind(self).call
end

#join(*uris) ⇒ Object

Args

uris

an Array of Strings

Description

Attempts to parse and merge a set of URIs



236
237
238
239
# File 'lib/uri/rfc2396_parser.rb', line 236

def join(*uris)
  uris[0] = convert_to_uri(uris[0])
  uris.inject :merge
end

#make_regexp(schemes = nil) ⇒ Object

returns Regexp that is default self.regexp, unless schemes is provided. Then it is a Regexp.union with self.pattern



275
276
277
278
279
280
281
# File 'lib/uri/rfc2396_parser.rb', line 275

def make_regexp(schemes = nil)
  unless schemes
    @regexp[:ABS_URI_REF]
  else
    /(?=#{Regexp.union(*schemes)}:)#{@pattern[:X_ABS_URI]}/x
  end
end

#parse(uri) ⇒ Object

Args

uri

String

Description

parses uri and constructs either matching URI scheme object (FTP, HTTP, HTTPS, LDAP, LDAPS, or MailTo) or URI::Generic

Usage

p = URI::Parser.new
p.parse("ldap://ldap.example.com/dc=example?user=john")
#=> #<URI::LDAP:0x00000000b9e7e8 URL:ldap://ldap.example.com/dc=example?user=john>


210
211
212
213
214
215
216
217
218
219
220
221
222
223
# File 'lib/uri/rfc2396_parser.rb', line 210

def parse(uri)
  scheme, userinfo, host, port,
    registry, path, opaque, query, fragment = self.split(uri)

  if scheme && URI.scheme_list.include?(scheme.upcase)
    URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port,
                                       registry, path, opaque, query,
                                       fragment, self)
  else
    Generic.new(scheme, userinfo, host, port,
                registry, path, opaque, query,
                fragment, self)
  end
end

#split(uri) ⇒ Object

Returns a split URI against regexp



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
# File 'lib/uri/rfc2396_parser.rb', line 121

def split(uri)
  case uri
  when ''
    # null uri

  when @regexp[:ABS_URI]
    scheme, opaque, userinfo, host, port,
      registry, path, query, fragment = $~[1..-1]

    # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]

    # absoluteURI   = scheme ":" ( hier_part | opaque_part )
    # hier_part     = ( net_path | abs_path ) [ "?" query ]
    # opaque_part   = uric_no_slash *uric

    # abs_path      = "/"  path_segments
    # net_path      = "//" authority [ abs_path ]

    # authority     = server | reg_name
    # server        = [ [ userinfo "@" ] hostport ]

    if !scheme
      raise InvalidURIError,
        "bad URI(absolute but no scheme): #{uri}"
    end
    if !opaque && (!path && (!host && !registry))
      raise InvalidURIError,
        "bad URI(absolute but no path): #{uri}"
    end

  when @regexp[:REL_URI]
    scheme = nil
    opaque = nil

    userinfo, host, port, registry,
      rel_segment, abs_path, query, fragment = $~[1..-1]
    if rel_segment && abs_path
      path = rel_segment + abs_path
    elsif rel_segment
      path = rel_segment
    elsif abs_path
      path = abs_path
    end

    # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]

    # relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]

    # net_path      = "//" authority [ abs_path ]
    # abs_path      = "/"  path_segments
    # rel_path      = rel_segment [ abs_path ]

    # authority     = server | reg_name
    # server        = [ [ userinfo "@" ] hostport ]

  else
    raise InvalidURIError, "bad URI(is not URI?): #{uri}"
  end

  path = '' if !path && !opaque # (see RFC2396 Section 5.2)
  ret = [
    scheme,
    userinfo, host, port,         # X
    registry,                     # X
    path,                         # Y
    opaque,                       # Y
    query,
    fragment
  ]
  return ret
end

#unescape(str, escaped = @regexp[:ESCAPED]) ⇒ Object

:call-seq:

unescape( str )
unescape( str, unsafe )

Args

str

String to remove escapes from

unsafe

Regexp to apply. Defaults to self.regexp

Description

Removes escapes from str



331
332
333
# File 'lib/uri/rfc2396_parser.rb', line 331

def unescape(str, escaped = @regexp[:ESCAPED])
  str.gsub(escaped) { [$&[1, 2].hex].pack('C') }.force_encoding(str.encoding)
end