Method: CGI.unescapeHTML
- Defined in:
- lib/cgi/util.rb
.unescapeHTML(string) ⇒ Object
Unescape a string that has been HTML-escaped
CGI::unescapeHTML("Usage: foo "bar" <baz>")
# => "Usage: foo \"bar\" <baz>"
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
# File 'lib/cgi/util.rb', line 41 def CGI::unescapeHTML(string) enc = string.encoding if [Encoding::UTF_16BE, Encoding::UTF_16LE, Encoding::UTF_32BE, Encoding::UTF_32LE].include?(enc) return string.gsub(Regexp.new('&(amp|quot|gt|lt|#[0-9]+|#x[0-9A-Fa-f]+);'.encode(enc))) do case $1.encode("US-ASCII") when 'amp' then '&'.encode(enc) when 'quot' then '"'.encode(enc) when 'gt' then '>'.encode(enc) when 'lt' then '<'.encode(enc) when /\A#0*(\d+)\z/ then $1.to_i.chr(enc) when /\A#x([0-9a-f]+)\z/i then $1.hex.chr(enc) end end end asciicompat = Encoding.compatible?(string, "a") string.gsub(/&(amp|quot|gt|lt|\#[0-9]+|\#x[0-9A-Fa-f]+);/) do match = $1.dup case match when 'amp' then '&' when 'quot' then '"' when 'gt' then '>' when 'lt' then '<' when /\A#0*(\d+)\z/ n = $1.to_i if enc == Encoding::UTF_8 or enc == Encoding::ISO_8859_1 && n < 256 or asciicompat && n < 128 n.chr(enc) else "&##{$1};" end when /\A#x([0-9a-f]+)\z/i n = $1.hex if enc == Encoding::UTF_8 or enc == Encoding::ISO_8859_1 && n < 256 or asciicompat && n < 128 n.chr(enc) else "&#x#{$1};" end else "&#{match};" end end end |