Class: Glaemscribe::API::UpDownTehtaSplitPreProcessorOperator

Inherits:

PreProcessorOperator

Object
PrePostProcessorOperator
PreProcessorOperator
Glaemscribe::API::UpDownTehtaSplitPreProcessorOperator

show all

Defined in:: lib/api/pre_processor/up_down_tehta_split.rb

Instance Attribute Summary collapse

#consonant_list ⇒ Object readonly

Returns the value of attribute consonant_list.
#vowel_list ⇒ Object readonly

Returns the value of attribute vowel_list.

Attributes inherited from PrePostProcessorOperator

#finalized_glaeml_element, #glaeml_element

Instance Method Summary collapse

Methods inherited from PrePostProcessorOperator

#eval_arg, #finalize_glaeml_element, #initialize

Constructor Details

This class inherits a constructor from Glaemscribe::API::PrePostProcessorOperator

Instance Attribute Details

#consonant_list ⇒ `Object` (readonly)

Returns the value of attribute consonant_list.



28
29
30

# File 'lib/api/pre_processor/up_down_tehta_split.rb', line 28

def consonant_list
  @consonant_list
end

#vowel_list ⇒ `Object` (readonly)

Returns the value of attribute vowel_list.



28
29
30

# File 'lib/api/pre_processor/up_down_tehta_split.rb', line 28

def vowel_list
  @vowel_list
end

Instance Method Details

#apply(content) ⇒ `Object`

# File 'lib/api/pre_processor/up_down_tehta_split.rb', line 115

def apply(content)
  accumulated_word = ""
  
  ret = ""
  
  content.split(//).each{ |letter|
    if @word_split_map[letter]
      accumulated_word += letter
    else
      ret += apply_to_word(accumulated_word)
      ret += letter
      accumulated_word = ""
    end        
  }
  ret += apply_to_word(accumulated_word) 
  ret   
end

#apply_to_word(w) ⇒ `Object`

# File 'lib/api/pre_processor/up_down_tehta_split.rb', line 62

def apply_to_word(w)
  res = []
  
  if w.strip.empty?
    res << w
  else
    while w.length != 0
      r, len = @splitter_tree.transcribe(w)
         
      if r != [UNKNOWN_CHAR_OUTPUT]
        res << r 
      else
        res << w[0..0] # r
      end
    
      w = w[len..-1]
    end
  end
  
  res_modified = []

  # We replace the pattern CVC by CvVC where v is a phantom vowel.
  # This makes the pattern CVC not possible.
  i = 0
  while i < res.count - 2 do
    
    r0 = res[i]
    r1 = res[i+1]
    r2 = res[i+2]
    t0 = type_of(r0)
    t1 = type_of(r1)
    t2 = type_of(r2)
  
    if t0 == "C" && t1 == "V" && t2 == "C"
      res_modified << res[i]
      res_modified << "@"
      res_modified << res[i+1] 
      i += 2
    else
      res_modified << res[i]
      i += 1
    end
  end

  # Add the remaining stuff
  while i < res.count
    res_modified << res[i]
    i += 1
  end

  return res_modified.join("")       
end

#finalize(trans_options) ⇒ `Object`

# File 'lib/api/pre_processor/up_down_tehta_split.rb', line 29

def finalize(trans_options)
  super(trans_options)
  
  vowel_list          = finalized_glaeml_element.args[0]
  consonant_list      = finalized_glaeml_element.args[1]
      
  vowel_list          = vowel_list.split(/,/).map{|s| s.strip}
  consonant_list      = consonant_list.split(/,/).map{|s| s.strip}  
     
  @vowel_map          = {} # Recognize vowel tokens
  @consonant_map      = {} # Recognize consonant tokens
  @splitter_tree      = TranscriptionTreeNode.new(nil,nil) # Recognize tokens
  @word_split_map     = {}
  # The word split map will help to recognize words
  # The splitter tree will help to split words into tokens
  
  vowel_list.each      { |v| @splitter_tree.add_subpath(v, v); @vowel_map[v] = v }
  consonant_list.each  { |c| @splitter_tree.add_subpath(c, c); @consonant_map[c] = c}

  all_letters = (vowel_list + consonant_list).join("").split(//).sort.uniq    
  all_letters.each{ |l| @word_split_map[l] = l }
end

#type_of(token) ⇒ `Object`

# File 'lib/api/pre_processor/up_down_tehta_split.rb', line 52

def type_of(token)
  if @vowel_map[token]        
    return "V"
  elsif @consonant_map[token] 
    return "C"
  else                        
    return "X"
  end            
end

Class: Glaemscribe::API::UpDownTehtaSplitPreProcessorOperator

Instance Attribute Summary collapse

Attributes inherited from PrePostProcessorOperator

Instance Method Summary collapse

Methods inherited from PrePostProcessorOperator

Constructor Details

Instance Attribute Details

#consonant_list ⇒ Object (readonly)

#vowel_list ⇒ Object (readonly)

Instance Method Details

#apply(content) ⇒ Object

#apply_to_word(w) ⇒ Object

#finalize(trans_options) ⇒ Object

#type_of(token) ⇒ Object

#consonant_list ⇒ `Object` (readonly)

#vowel_list ⇒ `Object` (readonly)

#apply(content) ⇒ `Object`

#apply_to_word(w) ⇒ `Object`

#finalize(trans_options) ⇒ `Object`

#type_of(token) ⇒ `Object`