Class: URI::RFC2396_Parser

Inherits:
Object
  • Object
show all
Includes:
RFC2396_REGEXP
Defined in:
lib/uri/rfc2396_parser.rb

Overview

Class that parses String’s into URI’s.

It contains a Hash set of patterns and Regexp’s that match and validate.

Constant Summary collapse

@@to_s =
Kernel.instance_method(:to_s)

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(opts = {}) ⇒ RFC2396_Parser

Synopsis

URI::Parser.new([opts])

Args

The constructor accepts a hash as options for parser. Keys of options are pattern names of URI components and values of options are pattern strings. The constructor generates set of regexps for parsing URIs.

You can use the following keys:

* :ESCAPED (URI::PATTERN::ESCAPED in default)
* :UNRESERVED (URI::PATTERN::UNRESERVED in default)
* :DOMLABEL (URI::PATTERN::DOMLABEL in default)
* :TOPLABEL (URI::PATTERN::TOPLABEL in default)
* :HOSTNAME (URI::PATTERN::HOSTNAME in default)

Examples

p = URI::Parser.new(:ESCAPED => "(?:%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})")
u = p.parse("http://example.jp/%uABCD") #=> #<URI::HTTP http://example.jp/%uABCD>
URI.parse(u.to_s) #=> raises URI::InvalidURIError

s = "http://example.com/ABCD"
u1 = p.parse(s) #=> #<URI::HTTP http://example.com/ABCD>
u2 = URI.parse(s) #=> #<URI::HTTP http://example.com/ABCD>
u1 == u2 #=> true
u1.eql?(u2) #=> false


99
100
101
102
103
104
105
106
107
# File 'lib/uri/rfc2396_parser.rb', line 99

def initialize(opts = {})
  @pattern = initialize_pattern(opts)
  @pattern.each_value(&:freeze)
  @pattern.freeze

  @regexp = initialize_regexp(@pattern)
  @regexp.each_value(&:freeze)
  @regexp.freeze
end

Instance Attribute Details

#patternObject (readonly)

The Hash of patterns.

See also URI::Parser.initialize_pattern.



112
113
114
# File 'lib/uri/rfc2396_parser.rb', line 112

def pattern
  @pattern
end

#regexpObject (readonly)

The Hash of Regexp.

See also URI::Parser.initialize_regexp.



117
118
119
# File 'lib/uri/rfc2396_parser.rb', line 117

def regexp
  @regexp
end

Instance Method Details

#escape(str, unsafe = @regexp[:UNSAFE]) ⇒ Object

:call-seq:

escape( str )
escape( str, unsafe )

Args

str

String to make safe

unsafe

Regexp to apply. Defaults to self.regexp

Description

Constructs a safe String from str, removing unsafe characters, replacing them with codes.



287
288
289
290
291
292
293
294
295
296
297
298
299
300
# File 'lib/uri/rfc2396_parser.rb', line 287

def escape(str, unsafe = @regexp[:UNSAFE])
  unless unsafe.kind_of?(Regexp)
    # perhaps unsafe is String object
    unsafe = Regexp.new("[#{Regexp.quote(unsafe)}]", false)
  end
  str.gsub(unsafe) do
    us = $&
    tmp = ''
    us.each_byte do |uc|
      tmp << sprintf('%%%02X', uc)
    end
    tmp
  end.force_encoding(Encoding::US_ASCII)
end

#extract(str, schemes = nil) ⇒ Object

:call-seq:

extract( str )
extract( str, schemes )
extract( str, schemes ) {|item| block }

Args

str

String to search

schemes

Patterns to apply to str

Description

Attempts to parse and merge a set of URIs. If no block given, then returns the result, else it calls block for each element in result.

See also URI::Parser.make_regexp.



249
250
251
252
253
254
255
256
257
258
# File 'lib/uri/rfc2396_parser.rb', line 249

def extract(str, schemes = nil)
  if block_given?
    str.scan(make_regexp(schemes)) { yield $& }
    nil
  else
    result = []
    str.scan(make_regexp(schemes)) { result.push $& }
    result
  end
end

#inspectObject



325
326
327
# File 'lib/uri/rfc2396_parser.rb', line 325

def inspect
  @@to_s.bind_call(self)
end

#join(*uris) ⇒ Object

Args

uris

an Array of Strings

Description

Attempts to parse and merge a set of URIs.



223
224
225
226
# File 'lib/uri/rfc2396_parser.rb', line 223

def join(*uris)
  uris[0] = convert_to_uri(uris[0])
  uris.inject :merge
end

#make_regexp(schemes = nil) ⇒ Object

Returns Regexp that is default self.regexp, unless schemes is provided. Then it is a Regexp.union with self.pattern.



262
263
264
265
266
267
268
# File 'lib/uri/rfc2396_parser.rb', line 262

def make_regexp(schemes = nil)
  unless schemes
    @regexp[:ABS_URI_REF]
  else
    /(?=#{Regexp.union(*schemes)}:)#{@pattern[:X_ABS_URI]}/x
  end
end

#parse(uri) ⇒ Object

Args

uri

String

Description

Parses uri and constructs either matching URI scheme object (File, FTP, HTTP, HTTPS, LDAP, LDAPS, or MailTo) or URI::Generic.

Usage

p = URI::Parser.new
p.parse("ldap://ldap.example.com/dc=example?user=john")
#=> #<URI::LDAP ldap://ldap.example.com/dc=example?user=john>


209
210
211
# File 'lib/uri/rfc2396_parser.rb', line 209

def parse(uri)
  URI.for(*self.split(uri), self)
end

#split(uri) ⇒ Object

Returns a split URI against regexp.



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# File 'lib/uri/rfc2396_parser.rb', line 120

def split(uri)
  case uri
  when ''
    # null uri

  when @regexp[:ABS_URI]
    scheme, opaque, userinfo, host, port,
      registry, path, query, fragment = $~[1..-1]

    # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]

    # absoluteURI   = scheme ":" ( hier_part | opaque_part )
    # hier_part     = ( net_path | abs_path ) [ "?" query ]
    # opaque_part   = uric_no_slash *uric

    # abs_path      = "/"  path_segments
    # net_path      = "//" authority [ abs_path ]

    # authority     = server | reg_name
    # server        = [ [ userinfo "@" ] hostport ]

    if !scheme
      raise InvalidURIError,
        "bad URI(absolute but no scheme): #{uri}"
    end
    if !opaque && (!path && (!host && !registry))
      raise InvalidURIError,
        "bad URI(absolute but no path): #{uri}"
    end

  when @regexp[:REL_URI]
    scheme = nil
    opaque = nil

    userinfo, host, port, registry,
      rel_segment, abs_path, query, fragment = $~[1..-1]
    if rel_segment && abs_path
      path = rel_segment + abs_path
    elsif rel_segment
      path = rel_segment
    elsif abs_path
      path = abs_path
    end

    # URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]

    # relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]

    # net_path      = "//" authority [ abs_path ]
    # abs_path      = "/"  path_segments
    # rel_path      = rel_segment [ abs_path ]

    # authority     = server | reg_name
    # server        = [ [ userinfo "@" ] hostport ]

  else
    raise InvalidURIError, "bad URI(is not URI?): #{uri}"
  end

  path = '' if !path && !opaque # (see RFC2396 Section 5.2)
  ret = [
    scheme,
    userinfo, host, port,         # X
    registry,                     # X
    path,                         # Y
    opaque,                       # Y
    query,
    fragment
  ]
  return ret
end

#unescape(str, escaped = @regexp[:ESCAPED]) ⇒ Object

:call-seq:

unescape( str )
unescape( str, escaped )

Args

str

String to remove escapes from

escaped

Regexp to apply. Defaults to self.regexp

Description

Removes escapes from str.



318
319
320
321
322
# File 'lib/uri/rfc2396_parser.rb', line 318

def unescape(str, escaped = @regexp[:ESCAPED])
  enc = str.encoding
  enc = Encoding::UTF_8 if enc == Encoding::US_ASCII
  str.gsub(escaped) { [$&[1, 2]].pack('H2').force_encoding(enc) }
end