Class: Glaemscribe::API::TranscriptionProcessor

Inherits:

Object

Object
Glaemscribe::API::TranscriptionProcessor

show all

Defined in:: lib/api/transcription_processor.rb

Instance Attribute Summary collapse

#mode ⇒ Object readonly

Returns the value of attribute mode.
#out_space ⇒ Object

Returns the value of attribute out_space.
#rule_groups ⇒ Object readonly

Returns the value of attribute rule_groups.

Instance Method Summary collapse

#add_subrule(sub_rule) ⇒ Object
#apply(l, out_charset) ⇒ Object
#finalize(trans_options) ⇒ Object
#initialize(mode) ⇒ TranscriptionProcessor constructor

A new instance of TranscriptionProcessor.
#transcribe_word(word, out_charset) ⇒ Object

Constructor Details

#initialize(mode) ⇒ `TranscriptionProcessor`

Returns a new instance of TranscriptionProcessor.

# File 'lib/api/transcription_processor.rb', line 32

def initialize(mode)
  @mode         = mode
  @rule_groups  = {}
end

Instance Attribute Details

#mode ⇒ `Object` (readonly)

Returns the value of attribute mode.



28
29
30

# File 'lib/api/transcription_processor.rb', line 28

def mode
  @mode
end

#out_space ⇒ `Object`

Returns the value of attribute out_space.



30
31
32

# File 'lib/api/transcription_processor.rb', line 30

def out_space
  @out_space
end

#rule_groups ⇒ `Object` (readonly)

Returns the value of attribute rule_groups.



27
28
29

# File 'lib/api/transcription_processor.rb', line 27

def rule_groups
  @rule_groups
end

Instance Method Details

#add_subrule(sub_rule) ⇒ `Object`

# File 'lib/api/transcription_processor.rb', line 37

def add_subrule(sub_rule)
  path = sub_rule.src_combination.join("")
  @transcription_tree.add_subpath(path, sub_rule.dst_combination)
end

#apply(l, out_charset) ⇒ `Object`

# File 'lib/api/transcription_processor.rb', line 78

def apply(l, out_charset)
  ret = ""
  current_group     = nil
  accumulated_word  = ""
  
  out_space_str     = " "
  out_space_str     = @out_space.map{ |token| out_charset[token].str }.join("") if @out_space
  
  l.split("").each{ |c|
    case c
    when " ", "\t" 
      ret += transcribe_word(accumulated_word, out_charset)
      ret += out_space_str
      
      accumulated_word = ""
    when "\r", "\n"
      ret += transcribe_word(accumulated_word, out_charset)
      ret += c
      
      accumulated_word = ""
    else
      c_group = @in_charset[c]
      if c_group == current_group
        accumulated_word += c
      else
        ret += transcribe_word(accumulated_word, out_charset)
        current_group    = c_group
        accumulated_word = c
      end
    end            
  }
  # Just in case
  ret += transcribe_word(accumulated_word, out_charset)
  ret
end

#finalize(trans_options) ⇒ `Object`

# File 'lib/api/transcription_processor.rb', line 42

def finalize(trans_options)
  @errors = []
  
  @transcription_tree = TranscriptionTreeNode.new(nil,nil)
  
  # Add WORD_BOUNDARY and WORD_BREAKER in the tree
  @transcription_tree.add_subpath(WORD_BOUNDARY,  [""])
  @transcription_tree.add_subpath(WORD_BREAKER,   [""])
  
  rule_groups.each{ |rgname, rg| 
    rg.finalize(trans_options) 
  }
  
  # Build the input charset
  @in_charset = {}
  rule_groups.each{ |rgname, rg| 
    rg.in_charset.each{ |char, group|
      group_for_char = @in_charset[char]
      if group_for_char
        mode.errors << "Group #{rgname} uses input character #{char} which is also used by group #{group_for_char.name}. Input charsets should not intersect between groups." 
      else
        @in_charset[char] = group
      end
    }
  }
  
  # Build the transcription tree
  rule_groups.each{ |rgname, rg|
    rg.rules.each { |r|
      r.sub_rules.each{ |sr|      
        add_subrule(sr)
      }
    }
  }        
end

#transcribe_word(word, out_charset) ⇒ `Object`

# File 'lib/api/transcription_processor.rb', line 114

def transcribe_word(word, out_charset)
  res = []
  word = WORD_BOUNDARY + word + WORD_BOUNDARY
  while word.length != 0
    r, len = @transcription_tree.transcribe(word)       
    word = word[len..-1]
    res += r
  end
  ret = ""
  res.each{ |token|
    case token 
    when ""
    when UNKNOWN_CHAR_OUTPUT
      ret += UNKNOWN_CHAR_OUTPUT
    else
      ret += out_charset[token].str
    end        
  }
  ret
end

Class: Glaemscribe::API::TranscriptionProcessor

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(mode) ⇒ TranscriptionProcessor

Instance Attribute Details

#mode ⇒ Object (readonly)

#out_space ⇒ Object

#rule_groups ⇒ Object (readonly)

Instance Method Details

#add_subrule(sub_rule) ⇒ Object

#apply(l, out_charset) ⇒ Object

#finalize(trans_options) ⇒ Object

#transcribe_word(word, out_charset) ⇒ Object

#initialize(mode) ⇒ `TranscriptionProcessor`

#mode ⇒ `Object` (readonly)

#out_space ⇒ `Object`

#rule_groups ⇒ `Object` (readonly)

#add_subrule(sub_rule) ⇒ `Object`

#apply(l, out_charset) ⇒ `Object`

#finalize(trans_options) ⇒ `Object`

#transcribe_word(word, out_charset) ⇒ `Object`