Class: Glaemscribe::API::ResolveVirtualsPostProcessorOperator

Inherits:
PostProcessorOperator show all
Defined in:
lib/api/post_processor/resolve_virtuals.rb

Instance Attribute Summary

Attributes inherited from PrePostProcessorOperator

#finalized_glaeml_element, #glaeml_element

Instance Method Summary collapse

Methods inherited from PrePostProcessorOperator

#eval_arg, #finalize_glaeml_element, #initialize

Constructor Details

This class inherits a constructor from Glaemscribe::API::PrePostProcessorOperator

Instance Method Details

#apply(tokens, charset) ⇒ Object



98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/api/post_processor/resolve_virtuals.rb', line 98

def apply(tokens,charset)
  # Apply sequence chars
  tokens = apply_sequences(charset,tokens)

  tokens = apply_swaps(charset, tokens)

  # Clone the tokens so that we can perform ligatures AND diacritics without interferences
  new_tokens = tokens.clone

  # Handle l to r virtuals (diacritics ?)
  reset_trigger_states(charset)
  tokens.each_with_index{ |token,idx|
    apply_loop(charset,tokens,new_tokens,false,token,idx)
  }
  # Handle r to l virtuals (ligatures ?)
  reset_trigger_states(charset)
  tokens.reverse_each.with_index{ |token,idx|
    apply_loop(charset,tokens,new_tokens,true,token,tokens.count - 1 - idx)
  }
  new_tokens
end

#apply_loop(charset, tokens, new_tokens, reversed, token, idx) ⇒ Object



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/api/post_processor/resolve_virtuals.rb', line 40

def apply_loop(charset, tokens, new_tokens, reversed, token, idx)
  if token == '*SPACE' || token =='*LF'
    reset_trigger_states(charset)
    return
  end

  # Check if token is a virtual char
  c = charset[token]
  return if c.nil? # May happen for empty tokens
  if c.virtual? && (reversed == c.reversed)
    # Try to replace
    last_trigger = @last_triggers[c]
    if last_trigger != nil
      new_tokens[idx] = last_trigger.names.first  # Take the first name of the non-virtual replacement.
      token           = new_tokens[idx]           # Consider the token replaced, being itself a potential trigger for further virtuals (cascading virtuals)
    end
  end

  # Update states of virtual classes
  charset.virtual_chars.each{|vc|
    rc                  = vc[token]
    @last_triggers[vc]  = rc if rc != nil
  }
end

#apply_sequences(charset, tokens) ⇒ Object



65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/api/post_processor/resolve_virtuals.rb', line 65

def apply_sequences(charset,tokens)
  ret = []
  tokens.each { |token|
    c = charset[token]
    if c && c.sequence?
      ret += c.sequence
    else
      ret << token
    end
  }
  ret
end

#apply_swaps(charset, tokens) ⇒ Object



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/api/post_processor/resolve_virtuals.rb', line 78

def apply_swaps(charset, tokens)

  idx = 0
  while idx < tokens.length - 1
    tok = tokens[idx]
    tgt = tokens[idx+1]

    trig = charset.swap_for_trigger(tok)

    if trig && trig.has_target?(tgt)
      tokens[idx+1] = tok
      tokens[idx]   = tgt
    end

    idx += 1
  end

  tokens
end

#finalize(trans_options) ⇒ Object



28
29
30
31
# File 'lib/api/post_processor/resolve_virtuals.rb', line 28

def finalize(trans_options)
  super(trans_options)
  @last_triggers = {} # Allocate the lookup here to optimize
end

#reset_trigger_states(charset) ⇒ Object



33
34
35
36
37
38
# File 'lib/api/post_processor/resolve_virtuals.rb', line 33

def reset_trigger_states(charset)
  # For each virtual char in charset, maintain a state.
  charset.virtual_chars.each{ |vc|
    @last_triggers[vc] = nil # Clear the state
  }
end