Class: Glaemscribe::API::TranscriptionProcessor

Inherits:

Object

Object
Glaemscribe::API::TranscriptionProcessor

show all

Defined in:: lib/api/transcription_processor.rb

Instance Attribute Summary collapse

#mode ⇒ Object readonly

Returns the value of attribute mode.
#rule_groups ⇒ Object readonly

Returns the value of attribute rule_groups.

Instance Method Summary collapse

#add_subrule(sub_rule) ⇒ Object
#apply(l, debug_context) ⇒ Object
#finalize(trans_options) ⇒ Object
#initialize(mode) ⇒ TranscriptionProcessor constructor

A new instance of TranscriptionProcessor.
#transcribe_word(word, debug_context) ⇒ Object

Constructor Details

#initialize(mode) ⇒ `TranscriptionProcessor`

Returns a new instance of TranscriptionProcessor.

# File 'lib/api/transcription_processor.rb', line 30

def initialize(mode)
  @mode         = mode
  @rule_groups  = {}
end

Instance Attribute Details

#mode ⇒ `Object` (readonly)

Returns the value of attribute mode.



28
29
30

# File 'lib/api/transcription_processor.rb', line 28

def mode
  @mode
end

#rule_groups ⇒ `Object` (readonly)

Returns the value of attribute rule_groups.



27
28
29

# File 'lib/api/transcription_processor.rb', line 27

def rule_groups
  @rule_groups
end

Instance Method Details

#add_subrule(sub_rule) ⇒ `Object`

# File 'lib/api/transcription_processor.rb', line 35

def add_subrule(sub_rule)
  path = sub_rule.src_combination.join("")
  @transcription_tree.add_subpath(path, sub_rule.dst_combination)
end

#apply(l, debug_context) ⇒ `Object`

# File 'lib/api/transcription_processor.rb', line 76

def apply(l, debug_context)
  ret = []
  current_group     = nil
  accumulated_word  = ""
 
  l.split("").each{ |c|
    case c
    when " ", "\t" 
      ret += transcribe_word(accumulated_word, debug_context)
      ret += ["*SPACE"]
      
      accumulated_word = ""
    when "\r"
      # Ignore
    when "\n"
      ret += transcribe_word(accumulated_word, debug_context)
      ret += ["*LF"]
      
      accumulated_word = ""
    else
      c_group = @in_charset[c]
      if c_group == current_group
        accumulated_word += c
      else
        ret += transcribe_word(accumulated_word, debug_context)
        current_group    = c_group
        accumulated_word = c
      end
    end            
  }
  # Just in case
  ret += transcribe_word(accumulated_word, debug_context)
  ret
end

#finalize(trans_options) ⇒ `Object`

# File 'lib/api/transcription_processor.rb', line 40

def finalize(trans_options)
  @errors = []
  
  @transcription_tree = TranscriptionTreeNode.new(nil,nil)
  
  # Add WORD_BOUNDARY and WORD_BREAKER in the tree
  @transcription_tree.add_subpath(WORD_BOUNDARY_TREE,   [""])
  @transcription_tree.add_subpath(WORD_BREAKER,         [""])
  
  rule_groups.each{ |rgname, rg| 
    rg.finalize(trans_options) 
  }
  
  # Build the input charset
  @in_charset = {}
  rule_groups.each{ |rgname, rg| 
    rg.in_charset.each{ |char, group|
      group_for_char = @in_charset[char]
      if group_for_char
        mode.errors << Glaeml::Error.new(-1,"Group #{rgname} uses input character #{char} which is also used by group #{group_for_char.name}. Input charsets should not intersect between groups.") 
      else
        @in_charset[char] = group
      end
    }
  }
  
  # Build the transcription tree
  rule_groups.each{ |rgname, rg|
    rg.rules.each { |r|
      r.sub_rules.each{ |sr|      
        add_subrule(sr)
      }
    }
  }        
end

#transcribe_word(word, debug_context) ⇒ `Object`

# File 'lib/api/transcription_processor.rb', line 111

def transcribe_word(word, debug_context)
  res = []
  word = WORD_BOUNDARY_TREE + word + WORD_BOUNDARY_TREE
  while word.length != 0
    tokens, len = @transcription_tree.transcribe(word)       
    word        = word[len..-1]
    eaten       = word[0..len-1]
    res         += tokens
    
    debug_context.processor_pathes << [eaten, tokens, tokens]
  end
  # Return token list
  res
end

Class: Glaemscribe::API::TranscriptionProcessor

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(mode) ⇒ TranscriptionProcessor

Instance Attribute Details

#mode ⇒ Object (readonly)

#rule_groups ⇒ Object (readonly)

Instance Method Details

#add_subrule(sub_rule) ⇒ Object

#apply(l, debug_context) ⇒ Object

#finalize(trans_options) ⇒ Object

#transcribe_word(word, debug_context) ⇒ Object

#initialize(mode) ⇒ `TranscriptionProcessor`

#mode ⇒ `Object` (readonly)

#rule_groups ⇒ `Object` (readonly)

#add_subrule(sub_rule) ⇒ `Object`

#apply(l, debug_context) ⇒ `Object`

#finalize(trans_options) ⇒ `Object`

#transcribe_word(word, debug_context) ⇒ `Object`