Module: Normalizer

Included in:
FarsiProcessor
Defined in:
lib/normalizer.rb

Constant Summary collapse

ARABIC_KAF =

ك

"\u0643".freeze
FARSI_KEHEH =

ک

"\u06a9".freeze
ARABIC_YEH =

ي

"\u064a".freeze
ARABIC_ALEF_MAKSOURA =

ى

"\u0649".freeze
FARSI_YEH =

ی

"\u06cc".freeze
ALEF_MADDA =

آ

"\u0622".freeze
ALEF_WITH_HAMZA_BELOW =

إ

"\u0625".freeze
ALEF_WITH_HAMZA_ABOVE =

أ

"\u0623".freeze
ALEF =

ا

"\u0627".freeze
TATWIL =

ـ

"\u0640".freeze
FATHATAN =
"\u064b".freeze
DAMMATAN =
"\u064c".freeze
KASRATAN =
"\u064d".freeze
FATHA =
"\u064e".freeze
DAMMA =
"\u064f".freeze
KASRA =
"\u0650".freeze
SHADDA =
"\u0651".freeze
SUKUN =
"\u0652".freeze
CHARACTERS_MAPPINGS =
{
  ARABIC_KAF => FARSI_KEHEH,
  ARABIC_YEH => FARSI_YEH,
  ARABIC_ALEF_MAKSOURA => FARSI_YEH,
  ALEF_MADDA => ALEF,
  ALEF_WITH_HAMZA_BELOW => ALEF,
  ALEF_WITH_HAMZA_ABOVE => ALEF,
  TATWIL => ''
}.freeze
DIACRITICS =
[
  FATHATAN,
  DAMMATAN,
  KASRATAN,
  FATHA,
  DAMMA,
  KASRA,
  SHADDA,
  SUKUN
].freeze

Instance Method Summary collapse

Instance Method Details

#normalizeObject



46
47
48
49
50
# File 'lib/normalizer.rb', line 46

def normalize
  map_charachters
  remove_diacritics
  word
end