Module: Ting::Conversions

Defined in:
lib/ting/conversions.rb,
lib/ting/conversions/hanyu.rb

Defined Under Namespace

Classes: Hanyu

Constant Summary collapse

All =
[]
DATA_DIR =
File.dirname(__FILE__)+'/data/'
@@rules =

Substitution rules

YAML::load(IO.read(DATA_DIR+'rules.yaml'))

Class Method Summary collapse

Class Method Details

.parse(type, string) ⇒ Object



34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/ting/conversions.rb', line 34

def self.parse(type, string)
  capitalized = (string.downcase != string && string.downcase.capitalize == string)
  string = string.to_s.downcase
  if (final = Final::All.find {|f| f.respond_to?("#{type}_standalone") && f.send("#{type}_standalone") == string})
    Syllable.new(Initial::Empty, final, nil, capitalized)
  else
    finals = Final::All.dup
    finals.unshift(finals.delete(Final::Uo)) #hack : move Uo to the front
                                             #otherwise wadegiles parses 'lo' as Le+O rather than Le+Uo
                                             #probably better to add a hardcoded 'overrule' table for these cases
    Initial.each do |ini|
      finals.each do |fin|
        next if Syllable.illegal?(ini,fin)
        if string == apply_rules(type, (ini.send(type)||'') + (fin.send(type)||''))
          return Syllable.new(ini, fin, nil, capitalized)
        end
      end
    end
    raise "Can't parse `#{string.inspect}'"
  end
end

.tokenize(str) ⇒ Object



67
68
69
70
71
72
73
74
75
76
# File 'lib/ting/conversions.rb', line 67

def self.tokenize(str)
  [].tap do |tokens|
    str,pos = str.dup, 0
    while str && token = str[/[^' ]*/]
      tokens << [token.strip, pos]
      pos += token.length
      str = str[/[' ]+(.*)/, 1]
    end
  end
end

.unparse(type, tsyll) ⇒ Object



56
57
58
59
60
61
62
63
64
65
# File 'lib/ting/conversions.rb', line 56

def self.unparse(type, tsyll)
  str = if tsyll.initial.send(type)
          apply_rules(type, tsyll.initial.send(type) + (tsyll.final.send(type) || ''))
        elsif tsyll.final.respond_to?(type.to_s+'_standalone') && standalone = tsyll.final.send(type.to_s+'_standalone')
          standalone
        else
          apply_rules(type, tsyll.final.send(type))
        end
  (tsyll.capitalized? ? str.capitalize : str).force_encoding('UTF-8')
end