Class: Tml::Tokenizers::XMessage

Inherits:
Object
  • Object
show all
Defined in:
lib/tml/tokenizers/x_message.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text, opts = {}) ⇒ XMessage

Returns a new instance of XMessage.



69
70
71
72
73
74
75
76
77
# File 'lib/tml/tokenizers/x_message.rb', line 69

def initialize(text, opts = {})
  @label = text
  @pos = 0
  @len = @label ? @label.length : 0
  @last = nil
  @options = opts || {}
  @tree = nil
  tokenize
end

Instance Attribute Details

#labelObject

Returns the value of attribute label.



54
55
56
# File 'lib/tml/tokenizers/x_message.rb', line 54

def label
  @label
end

#lastObject

Returns the value of attribute last.



54
55
56
# File 'lib/tml/tokenizers/x_message.rb', line 54

def last
  @last
end

#lenObject

Returns the value of attribute len.



54
55
56
# File 'lib/tml/tokenizers/x_message.rb', line 54

def len
  @len
end

#optionsObject

Returns the value of attribute options.



54
55
56
# File 'lib/tml/tokenizers/x_message.rb', line 54

def options
  @options
end

#posObject

Returns the value of attribute pos.



54
55
56
# File 'lib/tml/tokenizers/x_message.rb', line 54

def pos
  @pos
end

#treeObject

Returns the value of attribute tree.



54
55
56
# File 'lib/tml/tokenizers/x_message.rb', line 54

def tree
  @tree
end

Instance Method Details

#choice(language, token, token_object) ⇒ Object



362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
# File 'lib/tml/tokenizers/x_message.rb', line 362

def choice(language, token, token_object)
  return unless token

  context_key = token.context_keys.first
  return unless context_key

  ctx = language.context_by_keyword(context_key)
  return unless ctx

  # pp context_key, token_object

  rule = ctx.find_matching_rule(token_object)
  if rule
    # pp context_key, rule.keyword
    return rule_key(context_key, rule.keyword)
  end

  nil
end

#choice?(type) ⇒ Boolean

Returns:

  • (Boolean)


392
393
394
# File 'lib/tml/tokenizers/x_message.rb', line 392

def choice?(type)
  type == 'choice'
end

#collection_format_style(result, c, argument_index, format_type) ⇒ Object



110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# File 'lib/tml/tokenizers/x_message.rb', line 110

def collection_format_style(result, c, argument_index, format_type)
  # register the format element
  styles = []
  subtype = 'text'; # default

  if c == ','
    # we have a sub-type
    subtype = ''
    c = next_char
    while c && !',}'.index(c)
      subtype += c
      c = next_char
      unless c
        raise "expected ',' or '}', but found end of string"
      end
    end
  end

  result << {index: argument_index, type: format_type, subtype: subtype, styles: styles}

  if c == '}'
    return
  end

  # parse format style
  while c
    c = next_char
    unless c
      raise "expected '}', '|' or format style value, but found end of string"
    end

    if c == '}' && !escaped?
      return
    elsif c == '|'
      next
    end

    style_key = ''
    while c && !'#<|}'.index(c)
      style_key += c
      c = next_char
      unless c
        raise "expected '#', '<' or '|', but found end of string"
      end
    end

    if c == '<'
      style_key += c
    end

    items = []
    styles << {key: style_key, items: items}

    if '#<'.index(c)
      traverse_text(items)
    elsif '|}'.index(c)
      # we found a key without value e.g. {0,param,possessive} and {0,param,prefix#.|possessive}
      revert
    end
  end
end

#compile(language, exp, buffer, params) ⇒ Object



400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
# File 'lib/tml/tokenizers/x_message.rb', line 400

def compile(language, exp, buffer, params)
  style = nil

  exp.each do |el|
    token = token_by_type(el[:type], el)
    token_object = get_token_object(params, token)
    token_value = get_token_value(token_object, token, language)

    if el[:styles]
      if choice?(el[:type])
        key = choice(language, token, token_object)
        style = el[:styles].find{ |style|
          style[:key] == key
        }
        if style
          compile(language, style[:items], buffer, params)
        end
      elsif map?(el[:type])
        style = el[:styles].find{ |style|
          style[:key] == token_value
        }
        compile(language, style[:items], buffer, params)
      elsif decoration?(el[:type])
        buffer << token.open_tag(token_object)
        compile(language, el[:styles][0][:items], buffer, params)
        buffer << token.close_tag
      else
        compile(language, el[:styles][0][:items], buffer, params)
      end
    elsif data?(el[:type])
      buffer << token_value
    else
      buffer << el[:value]
    end
  end

  buffer
end

#data?(type) ⇒ Boolean

Returns:

  • (Boolean)


382
383
384
385
# File 'lib/tml/tokenizers/x_message.rb', line 382

def data?(type)
  return false unless type
  %w(param number).include?(type)
end

#decoration?(type) ⇒ Boolean

Returns:

  • (Boolean)


387
388
389
390
# File 'lib/tml/tokenizers/x_message.rb', line 387

def decoration?(type)
  return false unless type
  Tml.config.xmessage_decoration_tokens.include?(type.to_sym)
end

#default_format_style(result, c, argument_index, format_type) ⇒ Object



195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
# File 'lib/tml/tokenizers/x_message.rb', line 195

def default_format_style(result, c, argument_index, format_type)
  # register the format element
  styles = []
  result << {index: argument_index, type: format_type, styles: styles}

  # parse format style
  while c
    c = next_char
    unless c
      raise "expected '}', '|' or format style value, but found end of string"
    end

    if c == '}' && !escaped?
      return
    elsif c == '|'
      next
    end

    style_key = ''
    while c && !'#<+|}'.index(c)
      style_key += c
      c = next_char
      unless c
        raise "expected '#', '<', '+' or '|', but found end of string"
      end
    end

    if c == '<' || c == '+'
      style_key += c
    end

    items = []
    styles << {key: style_key, items: items}

    if '#<+'.index(c)
      traverse_text(items)
    elsif '|}'.index(c)
      # we found a key without value e.g. {0,param,possessive} and {0,param,prefix#.|possessive}
      revert
    end
  end
end

#escaped?Boolean

Returns:

  • (Boolean)


102
103
104
# File 'lib/tml/tokenizers/x_message.rb', line 102

def escaped?
  @last && @last == '\\'
end

#extract_tokens(tree, tokens) ⇒ Object



461
462
463
464
465
466
467
468
469
470
471
# File 'lib/tml/tokenizers/x_message.rb', line 461

def extract_tokens(tree, tokens)
  tree.each do |fragment|
    token = token_by_type(fragment[:type], fragment)
    tokens << token if token
    if fragment[:items]
      extract_tokens(fragment[:items], tokens)
    elsif fragment[:styles]
      extract_tokens(fragment[:styles], tokens)
    end
  end
end

#get_token_object(token_values, token) ⇒ Object



473
474
475
476
# File 'lib/tml/tokenizers/x_message.rb', line 473

def get_token_object(token_values, token)
  return nil unless token
  token.token_object(token_values)
end

#get_token_value(token_object, token, language) ⇒ Object



478
479
480
481
# File 'lib/tml/tokenizers/x_message.rb', line 478

def get_token_value(token_object, token, language)
  return nil unless token_object && token
  token.token_value(token_object, language)
end

#map?(type) ⇒ Boolean

Returns:

  • (Boolean)


396
397
398
# File 'lib/tml/tokenizers/x_message.rb', line 396

def map?(type)
  type == 'map'
end

#next_charObject



83
84
85
86
87
88
# File 'lib/tml/tokenizers/x_message.rb', line 83

def next_char
  return if @len == 0 || @pos >= @len
  update_last
  @pos += 1
  @label[@pos - 1]
end

#no_format_style(result, c, argument_index, format_type) ⇒ Object



106
107
108
# File 'lib/tml/tokenizers/x_message.rb', line 106

def no_format_style(result, c, argument_index, format_type)
  raise "no format style allowed for format type '" + format_type + "'";
end

#optional_style_format_typesObject



56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/tml/tokenizers/x_message.rb', line 56

def optional_style_format_types
  @optional_style_format_types ||= {
      'text' => true,
      'date' => true,
      'time' => true,
      'number' => true,
      'name' => true,
      'list' => true,
      'possessive' => true,
      'salutation' => true
  }
end

#peek_charObject



90
91
92
93
# File 'lib/tml/tokenizers/x_message.rb', line 90

def peek_char
  return if @len == 0
  @label[@pos]
end

#revertObject



95
96
97
98
99
100
# File 'lib/tml/tokenizers/x_message.rb', line 95

def revert
  if (@pos > 0)
    @pos -= 1
    update_last
  end
end

#rule_key(context_key, rule_key) ⇒ Object



357
358
359
360
# File 'lib/tml/tokenizers/x_message.rb', line 357

def rule_key(context_key, rule_key)
  return rule_key unless Tml.config.xmessage_rule_key_mapping[context_key.to_sym]
  Tml.config.xmessage_rule_key_mapping[context_key.to_sym][rule_key.to_sym] || rule_key
end

#substitute(language, tokens = {}, options = {}) ⇒ Object



483
484
485
486
# File 'lib/tml/tokenizers/x_message.rb', line 483

def substitute(language, tokens = {}, options = {})
  return @label unless tree
  compile(language, tree, [], tokens).join('')
end

#text_format_style(result, c, argument_index, format_type) ⇒ Object



172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
# File 'lib/tml/tokenizers/x_message.rb', line 172

def text_format_style(result, c, argument_index, format_type)
  # parse format style
  buffer = ''
  c = next_char
  unless c
    raise "expected format style or '}', but found end of string"
  end

  while c
    if c == '}'
      result << {index: argument_index, type: format_type, value: buffer}
      return
    end

    # keep adding to buffer
    buffer += c
    c = next_char
    unless c
      raise "expected '}', but found end of string"
    end
  end
end

#token_by_type(type, data) ⇒ Object



447
448
449
450
451
452
453
454
455
456
457
458
459
# File 'lib/tml/tokenizers/x_message.rb', line 447

def token_by_type(type, data)
  if decoration?(type)
    Tml::Tokens::XMessage::Decoration.new(label, data)
  elsif data?(type)
    Tml::Tokens::XMessage::Data.new(label, data)
  elsif choice?(type)
    Tml::Tokens::XMessage::Choice.new(label, data)
  elsif map?(type)
    return Tml::Tokens::XMessage::Map.new(label, data)
  else
    nil
  end
end

#tokenizeObject



347
348
349
350
351
352
353
354
355
# File 'lib/tml/tokenizers/x_message.rb', line 347

def tokenize
  result = []
  traverse_text(result)
  @tree = result
rescue Exception => ex
  pp ex
  pp "Failed to parse the expression: " + @label
  @tree = nil
end

#tokensObject



439
440
441
442
443
444
445
# File 'lib/tml/tokenizers/x_message.rb', line 439

def tokens
  @tokens ||= begin
    tokens = []
    extract_tokens(tree, tokens)
    tokens.uniq{ |t| [t.class.name, t.full_name] }
  end
end

#traverse_format_element(result) ⇒ Object



238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
# File 'lib/tml/tokenizers/x_message.rb', line 238

def traverse_format_element(result)
  argument_index = -1
  format_type = nil
  c = next_char

  unless c
    raise 'expected place holder index, but found end of string'
  end

  if c.match(/[\d:]/)
    # process argument index
    is_keyword = c == ':'
    index = ''
    while c && !',}'.index(c)
      index += c
      c = next_char
      unless c
        raise "expected ',' or '}', but found end of string";
      end
    end

    if !is_keyword && !index.match(/\d+/)
      throw "argument index must be numeric: #{index}"
    end

    argument_index = is_keyword ? index : index * 1
  end

  if c != '}'
    # process format type
    format_type = ''
    c = next_char
    unless c
      raise 'expected format type, but found end of string'
    end

    while c && !',}'.index(c) && !escaped?
      format_type += c
      c = next_char
      unless c
        raise "expected ',' or '}', but found end of string"
      end
    end
  end

  if c == '}' && !escaped?
    if format_type && optional_style_format_types[format_type]
      # we found {0,number} or {0,possessive} or {0,salutation}, which are valid expressions
      result << {type: format_type, index: argument_index}
    else
      if format_type
        # we found something like {0,<type>}, which is invalid.
        raise "expected format style for format type '#{format_type}'"
      end

      # push param format element
      result << {type: 'param', index: argument_index}
    end
  elsif c == ','
    processors = {
        list: 'collection_format_style',
        date: 'text_format_style',
        time: 'text_format_style',
        number: 'text_format_style',
        suffix: 'text_format_style',
        possessive: 'no_format_style',
        salutation: 'no_format_style',
        default: 'default_format_style'
    }
    processor = (processors[format_type.to_sym] || processors[:default])
    self.send(processor, result, c, argument_index, format_type)
  else
    raise "expected ',' or '}', but found '#{c}' at position #{@pos}"
  end
end

#traverse_text(result) ⇒ Object



314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
# File 'lib/tml/tokenizers/x_message.rb', line 314

def traverse_text(result)
  in_quoted_string = false
  buffer = ''
  c = next_char

  while c do
    if c == "'"
      in_quoted_string = !in_quoted_string
    end

    if !in_quoted_string && c == '{' && !escaped?
      unless buffer.empty?
        result << {type: 'trans', value: buffer}
        buffer = ''
      end
      traverse_format_element(result)
    elsif !in_quoted_string && (c == '|' || c == '}') && !escaped?
      revert
      break
    else
      buffer += c
    end
    c = next_char
  end

  unless buffer.empty?
    result << {type: 'trans', value: buffer}
    buffer = ''
  end

  result
end

#update_lastObject



79
80
81
# File 'lib/tml/tokenizers/x_message.rb', line 79

def update_last
  @last = @pos > 0 ? @label[@pos - 1] : nil
end