Module: Ting::Conversions
- Defined in:
- lib/ting/conversions.rb,
lib/ting/conversions/hanyu.rb
Defined Under Namespace
Classes: Hanyu
Constant Summary collapse
- All =
[]
- DATA_DIR =
File.dirname(__FILE__)+'/data/'
- @@rules =
Substitution rules
YAML::load(IO.read(DATA_DIR+'rules.yaml'))
Class Method Summary collapse
Class Method Details
.parse(type, string) ⇒ Object
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
# File 'lib/ting/conversions.rb', line 34 def self.parse(type, string) capitalized = (string.downcase != string && string.downcase.capitalize == string) string = string.to_s.downcase if (final = Final::All.find {|f| f.respond_to?("#{type}_standalone") && f.send("#{type}_standalone") == string}) Syllable.new(Initial::Empty, final, nil, capitalized) else finals = Final::All.dup finals.unshift(finals.delete(Final::Uo)) #hack : move Uo to the front #otherwise wadegiles parses 'lo' as Le+O rather than Le+Uo #probably better to add a hardcoded 'overrule' table for these cases Initial.each do |ini| finals.each do |fin| next if Syllable.illegal?(ini,fin) if string == apply_rules(type, (ini.send(type)||'') + (fin.send(type)||'')) return Syllable.new(ini, fin, nil, capitalized) end end end raise "Can't parse `#{string.inspect}'" end end |
.tokenize(str) ⇒ Object
67 68 69 70 71 72 73 74 75 76 |
# File 'lib/ting/conversions.rb', line 67 def self.tokenize(str) [].tap do |tokens| str,pos = str.dup, 0 while str && token = str[/[^' ]*/] tokens << [token.strip, pos] pos += token.length str = str[/[' ]+(.*)/, 1] end end end |
.unparse(type, tsyll) ⇒ Object
56 57 58 59 60 61 62 63 64 65 |
# File 'lib/ting/conversions.rb', line 56 def self.unparse(type, tsyll) str = if tsyll.initial.send(type) apply_rules(type, tsyll.initial.send(type) + (tsyll.final.send(type) || '')) elsif tsyll.final.respond_to?(type.to_s+'_standalone') && standalone = tsyll.final.send(type.to_s+'_standalone') standalone else apply_rules(type, tsyll.final.send(type)) end (tsyll.capitalized? ? str.capitalize : str).force_encoding('UTF-8') end |