Module: ITRANS
- Defined in:
- lib/wiki_lyrics/utils/itrans.rb
Constant Summary collapse
- @@itrans_dir =
File.dirname( File.(__FILE__) ) + "/../itrans"
- @@null_dev =
"/dev/null"
- @@devanagari2itrans =
{ ITRANS.unicode( 0x0901 ) => "", # vowels: ITRANS.unicode( 0x0905 ) => "a", ITRANS.unicode( 0x0906 ) => "aa", # /A ITRANS.unicode( 0x093E ) => "aa", # /A ITRANS.unicode( 0x0907 ) => "i", ITRANS.unicode( 0x093F ) => "i", ITRANS.unicode( 0x0908 ) => "ii", # /I ITRANS.unicode( 0x0940 ) => "ii", # /I ITRANS.unicode( 0x0909 ) => "u", ITRANS.unicode( 0x0941 ) => "u", ITRANS.unicode( 0x090A ) => "uu", # /U ITRANS.unicode( 0x0942 ) => "uu", # /U ITRANS.unicode( 0x090B ) => "RRi", # R^i ITRANS.unicode( 0x0943 ) => "RRi", # R^i ITRANS.unicode( 0x090C ) => "LLi", # L^i ITRANS.unicode( 0x0944 ) => "LLi", # L^i ITRANS.unicode( 0x090F ) => "e", ITRANS.unicode( 0x0947 ) => "e", ITRANS.unicode( 0x0910 ) => "ai", ITRANS.unicode( 0x0948 ) => "ai", ITRANS.unicode( 0x0913 ) => "o", ITRANS.unicode( 0x094B ) => "o", ITRANS.unicode( 0x0914 ) => "au", ITRANS.unicode( 0x094C ) => "au", # itrans irregular "क्ष"=> "kSh", # x / kS "त्र"=> "tr", "ज्ञ"=> "j~n", # GY / dny "श्र"=> "shr", }
- @@devanagari2itrans_consonants =
{ # gutturals: ITRANS.unicode( 0x0915 ) => "k", ITRANS.unicode( 0x0916 ) => "kh", # ITRANS.unicode( 0x0916 ) => ".Nkh", ITRANS.unicode( 0x0917 ) => "g", ITRANS.unicode( 0x0918 ) => "gh", ITRANS.unicode( 0x0918 ) => "~N", # palatals: ITRANS.unicode( 0x091A ) => "ch", ITRANS.unicode( 0x091B ) => "Ch", ITRANS.unicode( 0x091C ) => "j", ITRANS.unicode( 0x091D ) => "jh", ITRANS.unicode( 0x091E ) => "~n", # JN # retroflexes: ITRANS.unicode( 0x091F ) => "T", ITRANS.unicode( 0x0920 ) => "Th", ITRANS.unicode( 0x0921 ) => "D", ITRANS.unicode( 0x0922 ) => "Dh", # ITRANS.unicode( 0x0922 ) => ".Dh", # Rh (valid?) ITRANS.unicode( 0x0923 ) => "N", # dentals: ITRANS.unicode( 0x0924 ) => "t", ITRANS.unicode( 0x0925 ) => "th", ITRANS.unicode( 0x0926 ) => "d", ITRANS.unicode( 0x0927 ) => "dh", ITRANS.unicode( 0x0928 ) => "n", # labials: ITRANS.unicode( 0x092A ) => "p", ITRANS.unicode( 0x092B ) => "ph", ITRANS.unicode( 0x092C ) => "b", ITRANS.unicode( 0x092D ) => "bh", ITRANS.unicode( 0x092E ) => "m", # semi-vowels: ITRANS.unicode( 0x092F ) => "y", ITRANS.unicode( 0x0930 ) => "r", ITRANS.unicode( 0x0932 ) => "l", ITRANS.unicode( 0x0935 ) => "v", # w # sibilants: ITRANS.unicode( 0x0936 ) => "sh", ITRANS.unicode( 0x0937 ) => "Sh", # shh ITRANS.unicode( 0x0938 ) => "s", # miscellaneous: ITRANS.unicode( 0x0939 ) => "h", ITRANS.unicode( 0x0902 ) => ".n", # M / .m ITRANS.unicode( 0x0903 ) => "H", # .h ITRANS.unicode( 0x0950 ) => "OM", # AUM # other consonants: "क़" => "q", ITRANS.unicode( 0x0958 ) => "q", "ख़" => "Kh", "ग़" => "G", "ज़" => "z", ITRANS.unicode( 0x095B ) => "z", "फ़" => "f", "ड़" => ".D", # R ITRANS.unicode( 0x095C ) => ".D", # R (valid?) "ढ़" => ".Dh", # Rh }
Class Method Summary collapse
- .from_devanagari(text) ⇒ Object
- .from_devanagari!(text) ⇒ Object
- .normalize(text) ⇒ Object
- .to_devanagari(text) ⇒ Object
- .to_devanagari!(text) ⇒ Object
- .unicode(codepoint) ⇒ Object
Class Method Details
.from_devanagari(text) ⇒ Object
58 59 60 |
# File 'lib/wiki_lyrics/utils/itrans.rb', line 58 def ITRANS.from_devanagari( text ) return ITRANS.from_devanagari!( String.new( text ) ) end |
.from_devanagari!(text) ⇒ Object
44 45 46 47 48 49 50 51 52 53 54 55 56 |
# File 'lib/wiki_lyrics/utils/itrans.rb', line 44 def ITRANS.from_devanagari!( text ) @@devanagari2itrans.each() do |devana, itrans| text.gsub!( devana, itrans ) end @@devanagari2itrans_consonants.each() do |devana, itrans| # is the only symbol in the 'word' --> add an 'a' at the end: text.gsub!( /(^|[ ""\.:;\(\[])#{devana}([,;:?!\)\]\s]|$)/, "\\1#{itrans}a\\2" ) # is not followed by a vocal --> add an 'a' at the end: text.gsub!( /#{devana}([^aeiouAEIOU,;:?!\)\]\s])/, "#{itrans}a\\1" ) text.gsub!( devana, itrans ) end return text end |
.normalize(text) ⇒ Object
26 27 28 |
# File 'lib/wiki_lyrics/utils/itrans.rb', line 26 def ITRANS.normalize( text ) return ITRANS.from_devanagari!( ITRANS.to_devanagari( text ) ) end |
.to_devanagari(text) ⇒ Object
34 35 36 37 38 39 40 41 42 |
# File 'lib/wiki_lyrics/utils/itrans.rb', line 34 def ITRANS.to_devanagari( text ) orig_pwd = Dir.pwd() Dir.chdir( @@itrans_dir ) trans = `echo #{Strings.shell_quote( "#indianifm=udvng.ifm\n #indian\n#{text}\n#endindian" )} | #{@@itrans_dir}/itrans -U 2>#{@@null_dev}` Dir.chdir( orig_pwd ) trans.gsub!( /%[^\n]*/, "" ) # TODO search line trans.strip!() return trans end |
.to_devanagari!(text) ⇒ Object
30 31 32 |
# File 'lib/wiki_lyrics/utils/itrans.rb', line 30 def ITRANS.to_devanagari!( text ) text.replace( to_devanagari( text ) ) end |
.unicode(codepoint) ⇒ Object
62 63 64 |
# File 'lib/wiki_lyrics/utils/itrans.rb', line 62 def ITRANS.unicode( codepoint ) [codepoint].pack( "U*" ) end |