Module: CGI::Util

Included in:
CGI, CGI
Defined in:
lib/cgi/core.rb,
lib/cgi/util.rb,
lib/cgi/util.rb,
ext/cgi/escape/escape.c

Constant Summary collapse

TABLE_FOR_ESCAPE_HTML__ =

The set of special characters and their escaped values

{
  "'" => ''',
  '&' => '&',
  '"' => '"',
  '<' => '&lt;',
  '>' => '&gt;',
}
@@accept_charset =

:nodoc:

Encoding::UTF_8

Instance Method Summary collapse

Instance Method Details

#escape(string) ⇒ Object

URL-encode a string into application/x-www-form-urlencoded. Space characters (+“ ”+) are encoded with plus signs (+“+”+)

url_encoded_string = CGI.escape("'Stop!' said Fred")
   # => "%27Stop%21%27+said+Fred"


14
15
16
17
18
19
20
21
22
# File 'lib/cgi/util.rb', line 14

def escape(string)
  encoding = string.encoding
  buffer = string.b
  buffer.gsub!(/([^ a-zA-Z0-9_.\-~]+)/) do |m|
    '%' + m.unpack('H2' * m.bytesize).join('%').upcase
  end
  buffer.tr!(' ', '+')
  buffer.force_encoding(encoding)
end

#escapeElement(string, *elements) ⇒ Object Also known as: escape_element

Escape only the tags of certain HTML elements in string.

Takes an element or elements or array of elements. Each element is specified by the name of the element, without angle brackets. This matches both the start and the end tag of that element. The attribute list of the open tag will also be escaped (for instance, the double-quotes surrounding attribute values).

print CGI.escapeElement('<BR><A HREF="url"></A>', "A", "IMG")
  # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"

print CGI.escapeElement('<BR><A HREF="url"></A>', ["A", "IMG"])
  # "<BR>&lt;A HREF=&quot;url&quot;&gt;&lt;/A&gt"


184
185
186
187
188
189
190
191
192
193
# File 'lib/cgi/util.rb', line 184

def escapeElement(string, *elements)
  elements = elements[0] if elements[0].kind_of?(Array)
  unless elements.empty?
    string.gsub(/<\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?>/i) do
      CGI.escapeHTML($&)
    end
  else
    string
  end
end

#escapeHTML(string) ⇒ Object Also known as: escape_html, h

Escape special characters in HTML, namely ‘&"<>

CGI.escapeHTML('Usage: foo "bar" <baz>')
   # => "Usage: foo &quot;bar&quot; &lt;baz&gt;"


77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/cgi/util.rb', line 77

def escapeHTML(string)
  enc = string.encoding
  unless enc.ascii_compatible?
    if enc.dummy?
      origenc = enc
      enc = Encoding::Converter.asciicompat_encoding(enc)
      string = enc ? string.encode(enc) : string.b
    end
    table = Hash[TABLE_FOR_ESCAPE_HTML__.map {|pair|pair.map {|s|s.encode(enc)}}]
    string = string.gsub(/#{"['&\"<>]".encode(enc)}/, table)
    string.encode!(origenc) if origenc
    string
  else
    string = string.b
    string.gsub!(/['&\"<>]/, TABLE_FOR_ESCAPE_HTML__)
    string.force_encoding(enc)
  end
end

#escapeURIComponent(string) ⇒ Object Also known as: escape_uri_component

URL-encode a string following RFC 3986 Space characters (+“ ”+) are encoded with (+“%20”+)

url_encoded_string = CGI.escapeURIComponent("'Stop!' said Fred")
   # => "%27Stop%21%27%20said%20Fred"


41
42
43
44
45
46
47
48
# File 'lib/cgi/util.rb', line 41

def escapeURIComponent(string)
  encoding = string.encoding
  buffer = string.b
  buffer.gsub!(/([^a-zA-Z0-9_.\-~]+)/) do |m|
    '%' + m.unpack('H2' * m.bytesize).join('%').upcase
  end
  buffer.force_encoding(encoding)
end

#pretty(string, shift = " ") ⇒ Object

Prettify (indent) an HTML string.

string is the HTML string to indent. shift is the indentation unit to use; it defaults to two spaces.

print CGI.pretty("<HTML><BODY></BODY></HTML>")
  # <HTML>
  #   <BODY>
  #   </BODY>
  # </HTML>

print CGI.pretty("<HTML><BODY></BODY></HTML>", "\t")
  # <HTML>
  #         <BODY>
  #         </BODY>
  # </HTML>


246
247
248
249
250
251
252
253
254
255
# File 'lib/cgi/util.rb', line 246

def pretty(string, shift = "  ")
  lines = string.gsub(/(?!\A)<.*?>/m, "\n\\0").gsub(/<.*?>(?!\n)/m, "\\0\n")
  end_pos = 0
  while end_pos = lines.index(/^<\/(\w+)/, end_pos)
    element = $1.dup
    start_pos = lines.rindex(/^\s*<#{element}/i, end_pos)
    lines[start_pos ... end_pos] = "__" + lines[start_pos ... end_pos].gsub(/\n(?!\z)/, "\n" + shift) + "__"
  end
  lines.gsub(/^((?:#{Regexp::quote(shift)})*)__(?=<\/?\w)/, '\1')
end

#rfc1123_date(time) ⇒ Object

Format a Time object as a String using the format specified by RFC 1123.

CGI.rfc1123_date(Time.now)
  # Sat, 01 Jan 2000 00:00:00 GMT


225
226
227
# File 'lib/cgi/util.rb', line 225

def rfc1123_date(time)
  time.getgm.strftime("%a, %d %b %Y %T GMT")
end

#unescape(string, encoding = @@accept_charset) ⇒ Object

URL-decode an application/x-www-form-urlencoded string with encoding(optional).

string = CGI.unescape("%27Stop%21%27+said+Fred")
   # => "'Stop!' said Fred"


27
28
29
30
31
32
33
34
35
# File 'lib/cgi/util.rb', line 27

def unescape(string, encoding = @@accept_charset)
  str = string.tr('+', ' ')
  str = str.b
  str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m|
    [m.delete('%')].pack('H*')
  end
  str.force_encoding(encoding)
  str.valid_encoding? ? str : str.force_encoding(string.encoding)
end

#unescapeElement(string, *elements) ⇒ Object Also known as: unescape_element

Undo escaping such as that done by CGI.escapeElement()

print CGI.unescapeElement(
        CGI.escapeHTML('<BR><A HREF="url"></A>'), "A", "IMG")
  # "&lt;BR&gt;<A HREF="url"></A>"

print CGI.unescapeElement(
        CGI.escapeHTML('<BR><A HREF="url"></A>'), ["A", "IMG"])
  # "&lt;BR&gt;<A HREF="url"></A>"


204
205
206
207
208
209
210
211
212
213
# File 'lib/cgi/util.rb', line 204

def unescapeElement(string, *elements)
  elements = elements[0] if elements[0].kind_of?(Array)
  unless elements.empty?
    string.gsub(/&lt;\/?(?:#{elements.join("|")})(?!\w)(?:.|\n)*?&gt;/i) do
      unescapeHTML($&)
    end
  else
    string
  end
end

#unescapeHTML(string) ⇒ Object Also known as: unescape_html

Unescape a string that has been HTML-escaped

CGI.unescapeHTML("Usage: foo &quot;bar&quot; &lt;baz&gt;")
   # => "Usage: foo \"bar\" <baz>"


107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# File 'lib/cgi/util.rb', line 107

def unescapeHTML(string)
  enc = string.encoding
  unless enc.ascii_compatible?
    if enc.dummy?
      origenc = enc
      enc = Encoding::Converter.asciicompat_encoding(enc)
      string = enc ? string.encode(enc) : string.b
    end
    string = string.gsub(Regexp.new('&(apos|amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do
      case $1.encode(Encoding::US_ASCII)
      when 'apos'                then "'".encode(enc)
      when 'amp'                 then '&'.encode(enc)
      when 'quot'                then '"'.encode(enc)
      when 'gt'                  then '>'.encode(enc)
      when 'lt'                  then '<'.encode(enc)
      when /\A#0*(\d+)\z/        then $1.to_i.chr(enc)
      when /\A#x([0-9a-f]+)\z/i  then $1.hex.chr(enc)
      end
    end
    string.encode!(origenc) if origenc
    return string
  end
  return string unless string.include? '&'
  charlimit = case enc
              when Encoding::UTF_8; 0x10ffff
              when Encoding::ISO_8859_1; 256
              else 128
              end
  string = string.b
  string.gsub!(/&(apos|amp|quot|gt|lt|\#[0-9]+|\#[xX][0-9A-Fa-f]+);/) do
    match = $1.dup
    case match
    when 'apos'                then "'"
    when 'amp'                 then '&'
    when 'quot'                then '"'
    when 'gt'                  then '>'
    when 'lt'                  then '<'
    when /\A#0*(\d+)\z/
      n = $1.to_i
      if n < charlimit
        n.chr(enc)
      else
        "&##{$1};"
      end
    when /\A#x([0-9a-f]+)\z/i
      n = $1.hex
      if n < charlimit
        n.chr(enc)
      else
        "&#x#{$1};"
      end
    else
      "&#{match};"
    end
  end
  string.force_encoding enc
end

#unescapeURIComponent(string, encoding = @@accept_charset) ⇒ Object Also known as: unescape_uri_component

URL-decode a string following RFC 3986 with encoding(optional).

string = CGI.unescapeURIComponent("%27Stop%21%27+said%20Fred")
   # => "'Stop!'+said Fred"


54
55
56
57
58
59
60
61
# File 'lib/cgi/util.rb', line 54

def unescapeURIComponent(string, encoding = @@accept_charset)
  str = string.b
  str.gsub!(/((?:%[0-9a-fA-F]{2})+)/) do |m|
    [m.delete('%')].pack('H*')
  end
  str.force_encoding(encoding)
  str.valid_encoding? ? str : str.force_encoding(string.encoding)
end