Class: Latinizer

Inherits:
Object
  • Object
show all
Defined in:
lib/latinizer.rb

Constant Summary collapse

SUPPORTED_SCRIPTS =
[
  'Arabic',
  'Cyrillic',
  'Han',
  'Japanese'
].freeze

Class Method Summary collapse

Class Method Details

.detect_non_latin_scripts(text) ⇒ Object



45
46
47
48
49
50
51
52
# File 'lib/latinizer.rb', line 45

def self.detect_non_latin_scripts(text)
  scripts = Unicode::Scripts.scripts(text) - ['Common', 'Inherited', 'Latin']
  if is_japanese?(scripts)
    scripts -= ['Han', 'Hiragana', 'Katakana']
    scripts += ['Japanese']
  end
  scripts & SUPPORTED_SCRIPTS
end

.has_non_latin?(text) ⇒ Boolean

Returns:

  • (Boolean)


62
63
64
65
# File 'lib/latinizer.rb', line 62

def self.has_non_latin?(text)
  scripts = Unicode::Scripts.scripts(text) - ['Common', 'Inherited', 'Latin']
  scripts.size > 0 ? true : false
end

.is_japanese?(scripts) ⇒ Boolean

Returns:

  • (Boolean)


67
68
69
# File 'lib/latinizer.rb', line 67

def self.is_japanese?(scripts)
  scripts.include?('Hiragana') || scripts.include?('Katakana')
end

.latinize_script(text, script, opt = nil) ⇒ Object



30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/latinizer.rb', line 30

def self.latinize_script(text, script, opt = nil)
  case script
  when 'Arabic'
    return Arabic.t(text)
  when 'Cyrillic'
    latinized = Translit.convert(text, :english)
    return opt == :ascii ? remove_diacritics(latinized) : latinized
  when 'Han'
    return  Han.t(text, opt)
  when 'Japanese'
    return Japanese.t(text)
  end
  text
end

.remove_diacritics(text) ⇒ Object



54
55
56
# File 'lib/latinizer.rb', line 54

def self.remove_diacritics(text)
  text.to_slug.transliterate.to_s
end

.remove_non_ascii(text) ⇒ Object



58
59
60
# File 'lib/latinizer.rb', line 58

def self.remove_non_ascii(text)
  text.to_slug.transliterate.to_ascii.to_s
end

.t(text, opt = nil) ⇒ Object



16
17
18
19
20
21
22
23
24
25
26
27
28
# File 'lib/latinizer.rb', line 16

def self.t(text, opt = nil)
  scripts = detect_non_latin_scripts(text)

  if scripts.size == 0
    return opt == :ascii ? remove_non_ascii(text) : text
  elsif scripts.size > 1
    latinized = latinize_script(text, scripts.first, opt)
    return t(latinized, opt)
  end

  latinized = latinize_script(text, scripts.first, opt)
  opt == :ascii ? remove_non_ascii(latinized) : latinized
end