Class: URI::RFC2396_Parser

Inherits:
Object
  • Object
show all
Includes:
RFC2396_REGEXP
Defined in:
lib/uri/rfc2396_parser.rb

Overview

class that Parses String’s into URI’s

It contains a Hash set of patterns and Regexp’s that match and validate.

Constant Summary collapse

@@to_s =
Kernel.instance_method(:to_s)

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(opts = {}) ⇒ RFC2396_Parser

Synopsis

URI::Parser.new([opts])

Args

The constructor accepts a hash as options for parser. Keys of options are pattern names of URI components and values of options are pattern strings. The constructor generates set of regexps for parsing URIs.

You can use the following keys:

* :ESCAPED (URI::PATTERN::ESCAPED in default)
* :UNRESERVED (URI::PATTERN::UNRESERVED in default)
* :DOMLABEL (URI::PATTERN::DOMLABEL in default)
* :TOPLABEL (URI::PATTERN::TOPLABEL in default)
* :HOSTNAME (URI::PATTERN::HOSTNAME in default)

Examples

p = URI::Parser.new(:ESCAPED => "(?:%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})")
u = p.parse("http://example.jp/%uABCD") #=> #<URI::HTTP:0xb78cf4f8 URL:http://example.jp/%uABCD>
URI.parse(u.to_s) #=> raises URI::InvalidURIError

s = "http://example.com/ABCD"
u1 = p.parse(s) #=> #<URI::HTTP:0xb78c3220 URL:http://example.com/ABCD>
u2 = URI.parse(s) #=> #<URI::HTTP:0xb78b6d54 URL:http://example.com/ABCD>
u1 == u2 #=> true
u1.eql?(u2) #=> false


99
100
101
102
103
104
105
106
107
# File 'lib/uri/rfc2396_parser.rb', line 99

def initialize(opts = {})
  @pattern = initialize_pattern(opts)
  @pattern.each_value(&:freeze)
  @pattern.freeze

  @regexp = initialize_regexp(@pattern)
  @regexp.each_value(&:freeze)
  @regexp.freeze
end

Instance Attribute Details

#patternObject (readonly)

The Hash of patterns.

see also URI::Parser.initialize_pattern



112
113
114
# File 'lib/uri/rfc2396_parser.rb', line 112

def pattern
  @pattern
end

#regexpObject (readonly)

The Hash of Regexp

see also URI::Parser.initialize_regexp



117
118
119
# File 'lib/uri/rfc2396_parser.rb', line 117

def regexp
  @regexp
end

Instance Method Details

#escape(str, unsafe = @regexp[:UNSAFE]) ⇒ Object

:call-seq:

escape( str )
escape( str, unsafe )

Args

str

String to make safe

unsafe

Regexp to apply. Defaults to self.regexp

Description

constructs a safe String from str, removing unsafe characters, replacing them with codes.



299
300
301
302
303
304
305
306
307
308
309
310
311
312
# File 'lib/uri/rfc2396_parser.rb', line 299

def escape(str, unsafe = @regexp[:UNSAFE])
  unless unsafe.kind_of?(Regexp)
    # perhaps unsafe is String object
    unsafe = Regexp.new("[#{Regexp.quote(unsafe)}]", false)
  end
  str.gsub(unsafe) do
    us = $&
    tmp = ''
    us.each_byte do |uc|
      tmp << sprintf('%%%02X', uc)
    end
    tmp
  end.force_encoding(Encoding::US_ASCII)
end

#extract(str, schemes = nil) ⇒ Object

:call-seq:

extract( str )
extract( str, schemes )
extract( str, schemes ) {|item| block }

Args

str

String to search

schemes

Patterns to apply to str

Description

Attempts to parse and merge a set of URIs If no block given , then returns the result, else it calls block for each element in result.

see also URI::Parser.make_regexp



261
262
263
264
265
266
267
268
269
270
# File 'lib/uri/rfc2396_parser.rb', line 261

def extract(str, schemes = nil)
  if block_given?
    str.scan(make_regexp(schemes)) { yield $& }
    nil
  else
    result = []
    str.scan(make_regexp(schemes)) { result.push $& }
    result
  end
end

#inspectObject



335
336
337
# File 'lib/uri/rfc2396_parser.rb', line 335

def inspect
  @@to_s.bind(self).call
end

#join(*uris) ⇒ Object

Args

uris

an Array of Strings

Description

Attempts to parse and merge a set of URIs



235
236
237
238
# File 'lib/uri/rfc2396_parser.rb', line 235

def join(*uris)
  uris[0] = convert_to_uri(uris[0])
  uris.inject :merge
end

#make_regexp(schemes = nil) ⇒ Object

returns Regexp that is default self.regexp, unless schemes is provided. Then it is a Regexp.union with self.pattern



274
275
276
277
278
279
280
# File 'lib/uri/rfc2396_parser.rb', line 274

def make_regexp(schemes = nil)
  unless schemes
    @regexp[:ABS_URI_REF]
  else
    /(?=#{Regexp.union(*schemes)}:)#{@pattern[:X_ABS_URI]}/x
  end
end

#parse(uri) ⇒ Object

Args

uri

String

Description

parses uri and constructs either matching URI scheme object (FTP, HTTP, HTTPS, LDAP, LDAPS, or MailTo) or URI::Generic

Usage

p = URI::Parser.new
p.parse("ldap://ldap.example.com/dc=example?user=john")
#=> #<URI::LDAP:0x00000000b9e7e8 URL:ldap://ldap.example.com/dc=example?user=john>


209
210
211
212
213
214
215
216
217
218
219
220
221
222
# File 'lib/uri/rfc2396_parser.rb', line 209

def parse(uri)
  scheme, userinfo, host, port,
    registry, path, opaque, query, fragment = self.split(uri)

  if scheme && URI.scheme_list.include?(scheme.upcase)
    URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port,
                                       registry, path, opaque, query,
                                       fragment, self)
  else
    Generic.new(scheme, userinfo, host, port,
                registry, path, opaque, query,
                fragment, self)
  end
end

#split(uri) ⇒ Object

Returns a split URI against regexp



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# File 'lib/uri/rfc2396_parser.rb', line 120

def split(uri)
  case uri
  when ''
    # null uri

  when @regexp[:ABS_URI]
    scheme, opaque, userinfo, host, port,
      registry, path, query, fragment = $~[1..-1]

    # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]

    # absoluteURI   = scheme ":" ( hier_part | opaque_part )
    # hier_part     = ( net_path | abs_path ) [ "?" query ]
    # opaque_part   = uric_no_slash *uric

    # abs_path      = "/"  path_segments
    # net_path      = "//" authority [ abs_path ]

    # authority     = server | reg_name
    # server        = [ [ userinfo "@" ] hostport ]

    if !scheme
      raise InvalidURIError,
        "bad URI(absolute but no scheme): #{uri}"
    end
    if !opaque && (!path && (!host && !registry))
      raise InvalidURIError,
        "bad URI(absolute but no path): #{uri}"
    end

  when @regexp[:REL_URI]
    scheme = nil
    opaque = nil

    userinfo, host, port, registry,
      rel_segment, abs_path, query, fragment = $~[1..-1]
    if rel_segment && abs_path
      path = rel_segment + abs_path
    elsif rel_segment
      path = rel_segment
    elsif abs_path
      path = abs_path
    end

    # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]

    # relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]

    # net_path      = "//" authority [ abs_path ]
    # abs_path      = "/"  path_segments
    # rel_path      = rel_segment [ abs_path ]

    # authority     = server | reg_name
    # server        = [ [ userinfo "@" ] hostport ]

  else
    raise InvalidURIError, "bad URI(is not URI?): #{uri}"
  end

  path = '' if !path && !opaque # (see RFC2396 Section 5.2)
  ret = [
    scheme,
    userinfo, host, port,         # X
    registry,                     # X
    path,                         # Y
    opaque,                       # Y
    query,
    fragment
  ]
  return ret
end

#unescape(str, escaped = @regexp[:ESCAPED]) ⇒ Object

:call-seq:

unescape( str )
unescape( str, unsafe )

Args

str

String to remove escapes from

unsafe

Regexp to apply. Defaults to self.regexp

Description

Removes escapes from str



330
331
332
# File 'lib/uri/rfc2396_parser.rb', line 330

def unescape(str, escaped = @regexp[:ESCAPED])
  str.gsub(escaped) { [$&[1, 2].hex].pack('C') }.force_encoding(str.encoding)
end