Module: OkJson

Extended by:
OkJson
Included in:
OkJson
Defined in:
lib/vendor/okjson.rb

Overview

Defined Under Namespace

Classes: ParserError

Constant Summary collapse

Utagx =

1000 0000

0x80
Utag2 =

1100 0000

0xc0
Utag3 =

1110 0000

0xe0
Utag4 =

1111 0000

0xf0
Utag5 =

1111 1000

0xF8
Umaskx =

0011 1111

0x3f
Umask2 =

0001 1111

0x1f
Umask3 =

0000 1111

0x0f
Umask4 =

0000 0111

0x07
Uchar1max =
(1 << 7) - 1
Uchar2max =
(1 << 11) - 1
Uchar3max =
(1 << 16) - 1
Ucharerr =

unicode “replacement char”

0xFFFD
Usurrself =
0x10000
Usurr1 =
0xd800
Usurr2 =
0xdc00
Usurr3 =
0xe000
Umax =
0x10ffff
Spc =
Unesc =
{ 'b' => "\b", 'f' => "\f", 'n' => "\n", 'r' => "\r", 't' => "\t" }.freeze
Hex =
'0123456789abcdef'.freeze

Instance Method Summary collapse

Instance Method Details

#abbrev(s) ⇒ Object



221
222
223
224
225
226
227
# File 'lib/vendor/okjson.rb', line 221

def abbrev(s)
  t = s[0, 10]
  p = t['`']
  t = t[0, p] if p
  t += '...' if t.length < s.length
  '`' + t + '`'
end

#arrenc(a) ⇒ Object



348
349
350
# File 'lib/vendor/okjson.rb', line 348

def arrenc(a)
  '[' + a.map { |x| encode(x) }.join(',') + ']'
end

#arrparse(ts) ⇒ Object

Parses an “array” in the sense of RFC 4627. Returns the parsed value and any trailing tokens.



114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# File 'lib/vendor/okjson.rb', line 114

def arrparse(ts)
  ts = eat('[', ts)
  arr = []

  return arr, ts[1..-1] if ts[0][0] == ']'

  v, ts = valparse(ts)
  arr << v

  return arr, ts[1..-1] if ts[0][0] == ']'

  loop do
    ts = eat(',', ts)

    v, ts = valparse(ts)
    arr << v

    return arr, ts[1..-1] if ts[0][0] == ']'
  end
end

#decode(s) ⇒ Object

Decodes a json document in string s and returns the corresponding ruby value. String s must be valid UTF-8. If you have a string in some other encoding, convert it first.

String values in the resulting structure will be UTF-8.



41
42
43
44
45
46
# File 'lib/vendor/okjson.rb', line 41

def decode(s)
  ts = lex(s)
  v, ts = textparse(ts)
  raise OkJson::ParserError, 'trailing garbage' unless ts.empty?
  v
end

#eat(typ, ts) ⇒ Object



135
136
137
138
139
140
# File 'lib/vendor/okjson.rb', line 135

def eat(typ, ts)
  if ts[0][0] != typ
    raise OkJson::ParserError, "expected #{typ} (got #{ts[0].inspect})"
  end
  ts[1..-1]
end

#encode(x) ⇒ Object

Encodes x into a json text. It may contain only Array, Hash, String, Numeric, true, false, nil. (Note, this list excludes Symbol.) Strings contained in x must be valid UTF-8. Values that cannot be represented, such as Nan, Infinity, Symbol, and Proc, are encoded as null, in accordance with ECMA-262, 5th ed.



330
331
332
333
334
335
336
337
338
339
340
341
342
# File 'lib/vendor/okjson.rb', line 330

def encode(x)
  case x
  when Hash    then objenc(x)
  when Array   then arrenc(x)
  when String  then strenc(x)
  when Numeric then numenc(x)
  when Symbol  then strenc(x.to_s)
  when true    then 'true'
  when false   then 'false'
  when nil     then 'null'
  else              'null'
  end
end

#falsetok(s) ⇒ Object



198
199
200
# File 'lib/vendor/okjson.rb', line 198

def falsetok(s)
  s[0, 5] == 'false' && [:val, 'false', false]
end

#hexdec4(s) ⇒ Object



291
292
293
294
# File 'lib/vendor/okjson.rb', line 291

def hexdec4(s)
  raise OkJson::ParserError, 'short' if s.length != 4
  (nibble(s[0]) << 12) | (nibble(s[1]) << 8) | (nibble(s[2]) << 4) | nibble(s[3])
end

#hexenc4(t, u) ⇒ Object



391
392
393
394
395
396
# File 'lib/vendor/okjson.rb', line 391

def hexenc4(t, u)
  t.putc(Hex[(u >> 12) & 0xf])
  t.putc(Hex[(u >> 8) & 0xf])
  t.putc(Hex[(u >> 4) & 0xf])
  t.putc(Hex[u & 0xf])
end

#lex(s) ⇒ Object

Sans s and returns a list of json tokens, excluding white space (as defined in RFC 4627).



144
145
146
147
148
149
150
151
152
153
154
155
# File 'lib/vendor/okjson.rb', line 144

def lex(s)
  ts = []
  until s.empty?
    typ, lexeme, val = tok(s)
    if typ.nil?
      raise OkJson::ParserError, "invalid character at #{s[0, 10].inspect}"
    end
    ts << [typ, lexeme, val] if typ != :space
    s = s[lexeme.length..-1]
  end
  ts
end

#nibble(c) ⇒ Object



313
314
315
316
317
318
319
320
321
# File 'lib/vendor/okjson.rb', line 313

def nibble(c)
  case true
  when c >= '0' && c <= '9' then c.ord - '0'.ord
  when c >= 'a' && c <= 'z' then c.ord - 'a'.ord + 10
  when c >= 'A' && c <= 'Z' then c.ord - 'A'.ord + 10
  else
    raise OkJson::ParserError, "invalid hex code #{c}"
  end
end

#nulltok(s) ⇒ Object



190
191
192
# File 'lib/vendor/okjson.rb', line 190

def nulltok(s)
  s[0, 4] == 'null'  && [:val, 'null',  nil]
end

#numenc(x) ⇒ Object



398
399
400
401
402
403
404
405
# File 'lib/vendor/okjson.rb', line 398

def numenc(x)
  begin
    return 'null' if x.nan? || x.infinite?
  rescue
    nil
  end
  x.to_s
end

#numtok(s) ⇒ Object



202
203
204
205
206
207
208
209
210
211
212
213
# File 'lib/vendor/okjson.rb', line 202

def numtok(s)
  m = /-?([1-9][0-9]+|[0-9])([.][0-9]+)?([eE][+-]?[0-9]+)?/.match(s)
  if m && m.begin(0) == 0
    if m[3] && !m[2]
      [:val, m[0], Integer(m[1]) * (10**Integer(m[3][1..-1]))]
    elsif m[2]
      [:val, m[0], Float(m[0])]
    else
      [:val, m[0], Integer(m[0])]
    end
  end
end

#objenc(x) ⇒ Object



344
345
346
# File 'lib/vendor/okjson.rb', line 344

def objenc(x)
  '{' + x.map { |k, v| encode(k) + ':' + encode(v) }.join(',') + '}'
end

#objparse(ts) ⇒ Object

Parses an “object” in the sense of RFC 4627. Returns the parsed value and any trailing tokens.



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
# File 'lib/vendor/okjson.rb', line 80

def objparse(ts)
  ts = eat('{', ts)
  obj = {}

  return obj, ts[1..-1] if ts[0][0] == '}'

  k, v, ts = pairparse(ts)
  obj[k] = v

  return obj, ts[1..-1] if ts[0][0] == '}'

  loop do
    ts = eat(',', ts)

    k, v, ts = pairparse(ts)
    obj[k] = v

    return obj, ts[1..-1] if ts[0][0] == '}'
  end
end

#pairparse(ts) ⇒ Object

Parses a “member” in the sense of RFC 4627. Returns the parsed value and any trailing tokens.



103
104
105
106
107
108
109
110
# File 'lib/vendor/okjson.rb', line 103

def pairparse(ts)
  (typ, _, k) = ts[0]
  ts = ts[1..-1]
  raise OkJson::ParserError, "unexpected #{k.inspect}" if typ != :str
  ts = eat(':', ts)
  v, ts = valparse(ts)
  [k, v, ts]
end

#strenc(s) ⇒ Object



352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
# File 'lib/vendor/okjson.rb', line 352

def strenc(s)
  t = StringIO.new
  t.putc('"')
  r = 0
  while r < s.length
    case s[r]
    when '"'  then t.print('\\"')
    when '\\' then t.print('\\\\')
    when "\b" then t.print('\\b')
    when "\f" then t.print('\\f')
    when "\n" then t.print('\\n')
    when "\r" then t.print('\\r')
    when "\t" then t.print('\\t')
    else
      c = s[r]
      case true
      when Spc <= c && c <= '~'
        t.putc(c)
      when true
        u, size = uchardec(s, r)
        r += size - 1 # we add one more at the bottom of the loop
        if u < 0x10000
          t.print('\\u')
          hexenc4(t, u)
        else
          u1, u2 = unsubst(u)
          t.print('\\u')
          hexenc4(t, u1)
          t.print('\\u')
          hexenc4(t, u2)
        end
      end
    end
    r += 1
  end
  t.putc('"')
  t.string
end

#strtok(s) ⇒ Object



215
216
217
218
219
# File 'lib/vendor/okjson.rb', line 215

def strtok(s)
  m = /"([^"\\]|\\["\/\\bfnrt]|\\u[0-9a-fA-F]{4})*"/.match(s)
  raise OkJson::ParserError, "invalid string literal at #{abbrev(s)}" unless m
  [:str, m[0], unquote(m[0])]
end

#subst(u1, u2) ⇒ Object



296
297
298
299
300
301
# File 'lib/vendor/okjson.rb', line 296

def subst(u1, u2)
  if Usurr1 <= u1 && u1 < Usurr2 && Usurr2 <= u2 && u2 < Usurr3
    return ((u1 - Usurr1) << 10) | (u2 - Usurr2) + Usurrself
  end
  Ucharerr
end

#surrogate?(u) ⇒ Boolean

Returns:

  • (Boolean)


309
310
311
# File 'lib/vendor/okjson.rb', line 309

def surrogate?(u)
  Usurr1 <= u && u < Usurr3
end

#textparse(ts) ⇒ Object

Parses a “json text” in the sense of RFC 4627. Returns the parsed value and any trailing tokens. Note: this is almost the same as valparse, except that it does not accept atomic values.



52
53
54
55
56
57
58
59
60
61
# File 'lib/vendor/okjson.rb', line 52

def textparse(ts)
  raise OkJson::ParserError, 'empty' if ts.length < 0

  typ, _, val = ts[0]
  case typ
  when '{' then objparse(ts)
  when '[' then arrparse(ts)
  else valparse(ts)
  end
end

#tok(s) ⇒ Object

Scans the first token in s and returns a 3-element list, or nil if no such token exists.

The first list element is one of ‘‘’, ‘:’, ‘,’, ‘[’, ‘]’, :val, :str, and :space.

The second element is the lexeme.

The third element is the value of the token for :val and :str, otherwise it is the lexeme.



170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# File 'lib/vendor/okjson.rb', line 170

def tok(s)
  case s[0]
  when '{'  then ['{', s[0, 1], s[0, 1]]
  when '}'  then ['}', s[0, 1], s[0, 1]]
  when ':'  then [':', s[0, 1], s[0, 1]]
  when ','  then [',', s[0, 1], s[0, 1]]
  when '['  then ['[', s[0, 1], s[0, 1]]
  when ']'  then [']', s[0, 1], s[0, 1]]
  when 'n'  then nulltok(s)
  when 't'  then truetok(s)
  when 'f'  then falsetok(s)
  when '"'  then strtok(s)
  when Spc then [:space, s[0, 1], s[0, 1]]
  when "\t" then [:space, s[0, 1], s[0, 1]]
  when "\n" then [:space, s[0, 1], s[0, 1]]
  when "\r" then [:space, s[0, 1], s[0, 1]]
  else numtok(s)
  end
end

#truetok(s) ⇒ Object



194
195
196
# File 'lib/vendor/okjson.rb', line 194

def truetok(s)
  s[0, 4] == 'true'  && [:val, 'true',  true]
end

#uchardec(s, i) ⇒ Object

Decodes unicode character u from UTF-8 bytes in string s at position i. Returns u and the number of bytes read.



410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
# File 'lib/vendor/okjson.rb', line 410

def uchardec(s, i)
  n = s.length - i
  return [Ucharerr, 1] if n < 1

  c0 = s[i].ord

  # 1-byte, 7-bit sequence?
  return [c0, 1] if c0 < Utagx

  # unexpected continuation byte?
  return [Ucharerr, 1] if c0 < Utag2

  # need continuation byte
  return [Ucharerr, 1] if n < 2
  c1 = s[i + 1].ord
  return [Ucharerr, 1] if c1 < Utagx || Utag2 <= c1

  # 2-byte, 11-bit sequence?
  if c0 < Utag3
    u = (c0 & Umask2) << 6 | (c1 & Umaskx)
    return [Ucharerr, 1] if u <= Uchar1max
    return [u, 2]
  end

  # need second continuation byte
  return [Ucharerr, 1] if n < 3
  c2 = s[i + 2].ord
  return [Ucharerr, 1] if c2 < Utagx || Utag2 <= c2

  # 3-byte, 16-bit sequence?
  if c0 < Utag4
    u = (c0 & Umask3) << 12 | (c1 & Umaskx) << 6 | (c2 & Umaskx)
    return [Ucharerr, 1] if u <= Uchar2max
    return [u, 3]
  end

  # need third continuation byte
  return [Ucharerr, 1] if n < 4
  c3 = s[i + 3].ord
  return [Ucharerr, 1] if c3 < Utagx || Utag2 <= c3

  # 4-byte, 21-bit sequence?
  if c0 < Utag5
    u = (c0 & Umask4) << 18 | (c1 & Umaskx) << 12 | (c2 & Umaskx) << 6 | (c3 & Umaskx)
    return [Ucharerr, 1] if u <= Uchar3max
    return [u, 4]
  end

  [Ucharerr, 1]
end

#ucharenc(a, i, u) ⇒ Object

Encodes unicode character u as UTF-8 bytes in string a at position i. Returns the number of bytes written.



464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
# File 'lib/vendor/okjson.rb', line 464

def ucharenc(a, i, u)
  case true
  when u <= Uchar1max
    a[i] = (u & 0xff).chr
    1
  when u <= Uchar2max
    a[i + 0] = (Utag2 | ((u >> 6) & 0xff)).chr
    a[i + 1] = (Utagx | (u & Umaskx)).chr
    2
  when u <= Uchar3max
    a[i + 0] = (Utag3 | ((u >> 12) & 0xff)).chr
    a[i + 1] = (Utagx | ((u >> 6) & Umaskx)).chr
    a[i + 2] = (Utagx | (u & Umaskx)).chr
    3
  else
    a[i + 0] = (Utag4 | ((u >> 18) & 0xff)).chr
    a[i + 1] = (Utagx | ((u >> 12) & Umaskx)).chr
    a[i + 2] = (Utagx | ((u >> 6) & Umaskx)).chr
    a[i + 3] = (Utagx | (u & Umaskx)).chr
    4
  end
end

#unquote(q) ⇒ Object

Converts a quoted json string literal q into a UTF-8-encoded string. The rules are different than for Ruby, so we cannot use eval. Unquote will raise OkJson::ParserError, an error if q contains control characters.



232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
# File 'lib/vendor/okjson.rb', line 232

def unquote(q)
  q = q[1...-1]
  a = q.dup # allocate a big enough string
  r = 0
  w = 0
  while r < q.length
    c = q[r]
    case true
    when c == '\\'
      r += 1
      if r >= q.length
        raise OkJson::ParserError, "string literal ends with a \"\\\": \"#{q}\""
      end

      case q[r]
      when '"', '\\', '/', "'"
        a[w] = q[r]
        r += 1
        w += 1
      when 'b', 'f', 'n', 'r', 't'
        a[w] = Unesc[q[r]]
        r += 1
        w += 1
      when 'u'
        r += 1
        uchar = begin
          hexdec4(q[r, 4])
        rescue RuntimeError => e
          raise OkJson::ParserError, "invalid escape sequence \\u#{q[r, 4]}: #{e}"
        end
        r += 4
        if surrogate? uchar
          if q.length >= r + 6
            uchar1 = hexdec4(q[r + 2, 4])
            uchar = subst(uchar, uchar1)
            if uchar != Ucharerr
              # A valid pair; consume.
              r += 6
            end
          end
        end
        w += ucharenc(a, w, uchar)
      else
        raise OkJson::ParserError, "invalid escape char #{q[r]} in \"#{q}\""
      end
    when c == '"', c < Spc
      raise OkJson::ParserError, "invalid character in string literal \"#{q}\""
    else
      # Copy anything else byte-for-byte.
      # Valid UTF-8 will remain valid UTF-8.
      # Invalid UTF-8 will remain invalid UTF-8.
      a[w] = c
      r += 1
      w += 1
    end
  end
  a[0, w]
end

#unsubst(u) ⇒ Object



303
304
305
306
307
# File 'lib/vendor/okjson.rb', line 303

def unsubst(u)
  return Ucharerr, Ucharerr if u < Usurrself || u > Umax || surrogate?(u)
  u -= Usurrself
  [Usurr1 + ((u >> 10) & 0x3ff), Usurr2 + (u & 0x3ff)]
end

#valparse(ts) ⇒ Object

Parses a “value” in the sense of RFC 4627. Returns the parsed value and any trailing tokens.



65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/vendor/okjson.rb', line 65

def valparse(ts)
  raise OkJson::ParserError, 'empty' if ts.length < 0

  typ, _, val = ts[0]
  case typ
  when '{' then objparse(ts)
  when '[' then arrparse(ts)
  when :val, :str then [val, ts[1..-1]]
  else
    raise OkJson::ParserError, "unexpected #{val.inspect}"
  end
end