Class: UTF8Parser
- Inherits:
-
StringScanner
- Object
- StringScanner
- UTF8Parser
- Defined in:
- lib/rdf_objects/parsers.rb
Constant Summary collapse
- STRING =
/(([\x0-\x1f]|[\\\/bfnrt]|\\u[0-9a-fA-F]{4}|[\x20-\xff])*)/nx
- UNPARSED =
Object.new
- UNESCAPE_MAP =
Hash.new { |h, k| h[k] = k.chr }
- UTF16toUTF8 =
Iconv.new('utf-8', 'utf-16be')
Instance Method Summary collapse
-
#initialize(str) ⇒ UTF8Parser
constructor
A new instance of UTF8Parser.
- #parse_string ⇒ Object
Constructor Details
#initialize(str) ⇒ UTF8Parser
Returns a new instance of UTF8Parser.
30 31 32 33 |
# File 'lib/rdf_objects/parsers.rb', line 30 def initialize(str) super(str) @string = str end |
Instance Method Details
#parse_string ⇒ Object
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
# File 'lib/rdf_objects/parsers.rb', line 34 def parse_string if scan(STRING) return '' if self[1].empty? string = self[1].gsub(%r((?:\\[\\bfnrt"/]|(?:\\u(?:[A-Fa-f\d]{4}))+|\\[\x20-\xff]))n) do |c| if u = UNESCAPE_MAP[$&[1]] u else # \uXXXX bytes = '' i = 0 while c[6 * i] == ?\\ && c[6 * i + 1] == ?u bytes << c[6 * i + 2, 2].to_i(16) << c[6 * i + 4, 2].to_i(16) i += 1 end UTF16toUTF8.iconv(bytes) end end if string.respond_to?(:force_encoding) string.force_encoding(Encoding::UTF_8) end string else UNPARSED end rescue Iconv::Failure => e raise StandardError, "Caught #{e.class}: #{e}" end |