Class: URI::Parser

Inherits:
Object show all
Includes:
REGEXP
Defined in:
lib/extensions/uri/uri/common.rb

Overview

REGEXP

Constant Summary collapse

@@to_s =
Kernel.instance_method(:to_s)

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(opts = {}) ⇒ Parser

Synopsis

URI::Parser.new([opts])

Args

The constructor accepts a hash as options for parser. Keys of options are pattern names of URI components and values of options are pattern strings. The constructor generetes set of regexps for parsing URIs.

You can use the following keys:

* <tt>:ESCAPED</tt> (URI::PATTERN::ESCAPED in default)
* <tt>:UNRESERVED</tt> (URI::PATTERN::UNRESERVED in default)
* <tt>:DOMLABEL</tt> (URI::PATTERN::DOMLABEL in default)
* <tt>:TOPLABEL</tt> (URI::PATTERN::TOPLABEL in default)
* <tt>:HOSTNAME</tt> (URI::PATTERN::HOSTNAME in default)

Examples

p = URI::Parser.new(:ESCPAED => "(?:%[a-fA-F0-9]{2}|%u[a-fA-F0-9]{4})"
u = p.parse("http://example.jp/%uABCD") #=> #<URI::HTTP:0xb78cf4f8 URL:http://example.jp/%uABCD>
URI.parse(u.to_s) #=> raises URI::InvalidURIError

s = "http://examle.com/ABCD"
u1 = p.parse(s) #=> #<URI::HTTP:0xb78c3220 URL:http://example.com/ABCD>
u2 = URI.parse(s) #=> #<URI::HTTP:0xb78b6d54 URL:http://example.com/ABCD>
u1 == u2 #=> true
u1.eql?(u2) #=> false


89
90
91
92
93
94
95
96
97
# File 'lib/extensions/uri/uri/common.rb', line 89

def initialize(opts = {})
  @pattern = initialize_pattern(opts)
  @pattern.each_value {|v| v.freeze}
  @pattern.freeze

  @regexp = initialize_regexp(@pattern)
  @regexp.each_value {|v| v.freeze}
  @regexp.freeze
end

Instance Attribute Details

#patternObject (readonly)

Returns the value of attribute pattern.



98
99
100
# File 'lib/extensions/uri/uri/common.rb', line 98

def pattern
  @pattern
end

#regexpObject (readonly)

Returns the value of attribute regexp.



98
99
100
# File 'lib/extensions/uri/uri/common.rb', line 98

def regexp
  @regexp
end

Instance Method Details

#escape(str, unsafe = ) ⇒ Object



214
215
216
217
218
219
220
221
222
223
224
225
226
227
# File 'lib/extensions/uri/uri/common.rb', line 214

def escape(str, unsafe = @regexp[:UNSAFE])
  unless unsafe.kind_of?(Regexp)
    # perhaps unsafe is String object
    unsafe = Regexp.new("[#{Regexp.quote(unsafe)}]", false)
  end
  str.gsub(unsafe) do
    us = $&
    tmp = ''
    us.each_byte do |uc|
      tmp << sprintf('%%%02X', uc)
    end
    tmp
  end.force_encoding("US-ASCII")#Encoding::US_ASCII)
end

#extract(str, schemes = nil, &block) ⇒ Object



195
196
197
198
199
200
201
202
203
204
# File 'lib/extensions/uri/uri/common.rb', line 195

def extract(str, schemes = nil, &block)
  if block_given?
   	str.scan(make_regexp(schemes)) { yield $& }
	nil
  else
	result = []
	str.scan(make_regexp(schemes)) { result.push $& }
	result
  end
end

#inspectObject



234
235
236
# File 'lib/extensions/uri/uri/common.rb', line 234

def inspect
  @@to_s.bind(self).call
end

#join(*str) ⇒ Object



187
188
189
190
191
192
193
# File 'lib/extensions/uri/uri/common.rb', line 187

def join(*str)
  u = self.parse(str[0])
  str[1 .. -1].each do |x|
	u = u.merge(x)
  end
  u
end

#make_regexp(schemes = nil) ⇒ Object



206
207
208
209
210
211
212
# File 'lib/extensions/uri/uri/common.rb', line 206

def make_regexp(schemes = nil)
  unless schemes
   	@regexp[:ABS_URI_REF]
  else
	/(?=#{Regexp.union(*schemes)}:)#{@pattern[:X_ABS_URI]}/x
  end
end

#parse(uri) ⇒ Object



172
173
174
175
176
177
178
179
180
181
182
183
184
185
# File 'lib/extensions/uri/uri/common.rb', line 172

def parse(uri)
  scheme, userinfo, host, port, 
   	registry, path, opaque, query, fragment = self.split(uri)

  if scheme && URI.scheme_list.include?(scheme.upcase)
	URI.scheme_list[scheme.upcase].new(scheme, userinfo, host, port, 
                                       registry, path, opaque, query, 
                                       fragment, self)
  else
	Generic.new(scheme, userinfo, host, port, 
	    registry, path, opaque, query, 
 	    fragment, self)
  end
end

#split(uri) ⇒ Object



100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# File 'lib/extensions/uri/uri/common.rb', line 100

def split(uri)
  case uri
  when ''
	# null uri

  when @regexp[:ABS_URI]
	scheme, opaque, userinfo, host, port, 
	  registry, path, query, fragment = $~[1..-1]

	# URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]

	# absoluteURI   = scheme ":" ( hier_part | opaque_part )
	# hier_part     = ( net_path | abs_path ) [ "?" query ]
	# opaque_part   = uric_no_slash *uric

	# abs_path      = "/"  path_segments
	# net_path      = "//" authority [ abs_path ]

	# authority     = server | reg_name
	# server        = [ [ userinfo "@" ] hostport ]

	if !scheme
	  raise InvalidURIError, 
 "bad URI(absolute but no scheme): #{uri}"
	end
	if !opaque && (!path && (!host && !registry))
	  raise InvalidURIError,
 "bad URI(absolute but no path): #{uri}" 
	end

  when @regexp[:REL_URI]
	scheme = nil
	opaque = nil

	userinfo, host, port, registry, 
	  rel_segment, abs_path, query, fragment = $~[1..-1]
	if rel_segment && abs_path
	  path = rel_segment + abs_path
	elsif rel_segment
	  path = rel_segment
	elsif abs_path
	  path = abs_path
	end

	# URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ]

	# relativeURI   = ( net_path | abs_path | rel_path ) [ "?" query ]

	# net_path      = "//" authority [ abs_path ]
	# abs_path      = "/"  path_segments
	# rel_path      = rel_segment [ abs_path ]

	# authority     = server | reg_name
	# server        = [ [ userinfo "@" ] hostport ]

  else
	raise InvalidURIError, "bad URI(is not URI?): #{uri}"
  end

  path = '' if !path && !opaque # (see RFC2396 Section 5.2)
  ret = [
	scheme, 
	userinfo, host, port,         # X
	registry,                     # X
	path,                         # Y
	opaque,                       # Y
	query,
	fragment
  ]
  return ret
end

#unescape(str, escaped = ) ⇒ Object



229
230
231
# File 'lib/extensions/uri/uri/common.rb', line 229

def unescape(str, escaped = @regexp[:ESCAPED])
  str.gsub(escaped) { [$&[1, 2].hex].pack('C') }.force_encoding(str.encoding)
end