Class: Babosa::Transliterator::Base
- Inherits:
-
Object
- Object
- Babosa::Transliterator::Base
- Includes:
- Singleton
- Defined in:
- lib/babosa/transliterator/base.rb
Constant Summary collapse
- APPROXIMATIONS =
{ "×" => "x", "÷" => "/", "‐" => "-", "‑" => "-", "‒" => "-", "–" => "-", "—" => "-", "―" => "-", "‘" => "'", "‛" => "'", "“" => '"', "”" => '"', "„" => '"', "‟" => '"', "’" => "'", "," => ",", "。" => ".", "!" => "!", "?" => "?", "、" => ",", "(" => "(", ")" => ")", "【" => "[", "】" => "]", ";" => ";", ":" => ":", "《" => "<", "》" => ">" }.merge( { # various kinds of space characters "\xc2\xa0" => " ", "\xe2\x80\x80" => " ", "\xe2\x80\x81" => " ", "\xe2\x80\x82" => " ", "\xe2\x80\x83" => " ", "\xe2\x80\x84" => " ", "\xe2\x80\x85" => " ", "\xe2\x80\x86" => " ", "\xe2\x80\x87" => " ", "\xe2\x80\x88" => " ", "\xe2\x80\x89" => " ", "\xe2\x80\x8a" => " ", "\xe2\x81\x9f" => " ", "\xe3\x80\x80" => " " } ).freeze
Instance Attribute Summary collapse
-
#approximations ⇒ Object
readonly
Returns the value of attribute approximations.
Instance Method Summary collapse
-
#[](codepoint) ⇒ Object
Accepts a single UTF-8 codepoint and returns the ASCII character code used as the transliteration value.
-
#initialize ⇒ Base
constructor
A new instance of Base.
-
#transliterate(string) ⇒ Object
Transliterates a string.
Constructor Details
#initialize ⇒ Base
Returns a new instance of Base.
66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
# File 'lib/babosa/transliterator/base.rb', line 66 def initialize if self.class < Base @approximations = self.class.superclass.instance.approximations.dup else @approximations = {} end self.class.const_get(:APPROXIMATIONS).inject(@approximations) do |memo, object| index = object[0].codepoints.shift value = object[1].codepoints memo[index] = value.length == 1 ? value[0] : value memo end @approximations.freeze end |
Instance Attribute Details
#approximations ⇒ Object (readonly)
Returns the value of attribute approximations.
64 65 66 |
# File 'lib/babosa/transliterator/base.rb', line 64 def approximations @approximations end |
Instance Method Details
#[](codepoint) ⇒ Object
Accepts a single UTF-8 codepoint and returns the ASCII character code used as the transliteration value.
83 84 85 |
# File 'lib/babosa/transliterator/base.rb', line 83 def [](codepoint) @approximations[codepoint] end |
#transliterate(string) ⇒ Object
Transliterates a string.
88 89 90 |
# File 'lib/babosa/transliterator/base.rb', line 88 def transliterate(string) string.codepoints.map { |char| self[char] || char }.flatten.pack("U*") end |