Class: String

Inherits:
Object
  • Object
show all
Includes:
ChineseDetector
Defined in:
lib/script_detector.rb

Instance Method Summary collapse

Instance Method Details

#chinese?Boolean

Returns true if the string contains Chinese characters and no Japanese or Korean characters

Returns:

  • (Boolean)


9
10
11
# File 'lib/script_detector.rb', line 9

def chinese?
  look_for_chars_in(/\p{Han}/) and !self.japanese? and !self.korean?
end

#identify_scriptObject

Try to detect script and return one of “Japanese”, “Korean”, “Traditional Chinese”, “Simplified Chinese”, “Ambiguous Chinese” or “Unknown”



34
35
36
37
38
39
40
41
# File 'lib/script_detector.rb', line 34

def identify_script
  return "Japanese" if self.japanese?
  return "Korean" if self.korean?
  return "Traditional Chinese" if self.traditional_chinese?
  return "Simplified Chinese" if self.simplified_chinese?
  return "Ambiguous Chinese" if self.chinese?
  "Unknown"
end

#japanese?Boolean

Returns true if the string contains specifically Japanese (hiragana or katakana) characters

Returns:

  • (Boolean)


24
25
26
# File 'lib/script_detector.rb', line 24

def japanese?
  look_for_chars_in /(\p{Katakana}|\p{Hiragana})/
end

#korean?Boolean

Returns true if the string contains specifically Korean (hangul) characters

Returns:

  • (Boolean)


29
30
31
# File 'lib/script_detector.rb', line 29

def korean?
  look_for_chars_in /\p{Hangul}/
end

#simplified_chinese?Boolean

Return true if the string contains simplified Chinese characters (简体字)

Returns:

  • (Boolean)


19
20
21
# File 'lib/script_detector.rb', line 19

def simplified_chinese?
  look_for_chars_in simplified_chinese_regex
end

#traditional_chinese?Boolean

Return true if the string contains traditional Chinese characters (繁體字)

Returns:

  • (Boolean)


14
15
16
# File 'lib/script_detector.rb', line 14

def traditional_chinese?
  look_for_chars_in traditional_chinese_regex
end