Class: TwitterCldr::Tokenizers::NumberTokenizer
- Inherits:
-
Object
- Object
- TwitterCldr::Tokenizers::NumberTokenizer
- Defined in:
- lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb
Constant Summary collapse
- SPECIAL_SYMBOLS_MAP =
{ '.' => '{DOT}', ',' => '{COMMA}', '0' => '{ZERO}', '#' => '{POUND}', 'ยค' => '{CURRENCY}', '%' => '{PERCENT}', 'E' => '{SCIENTIFIC}' }
- SPECIAL_SYMBOLS_REGEX =
/'(?:#{SPECIAL_SYMBOLS_MAP.keys.map { |s| Regexp.escape(s) }.join('|')})'/
- INVERSE_SPECIAL_SYMBOLS_MAP =
SPECIAL_SYMBOLS_MAP.invert
- INVERSE_SPECIAL_SYMBOLS_REGEX =
/#{INVERSE_SPECIAL_SYMBOLS_MAP.keys.map { |s| Regexp.escape(s) }.join('|')}/
Instance Attribute Summary collapse
-
#data_reader ⇒ Object
readonly
Returns the value of attribute data_reader.
Instance Method Summary collapse
-
#initialize(data_reader) ⇒ NumberTokenizer
constructor
A new instance of NumberTokenizer.
- #tokenize(pattern) ⇒ Object
Constructor Details
#initialize(data_reader) ⇒ NumberTokenizer
Returns a new instance of NumberTokenizer.
28 29 30 |
# File 'lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb', line 28 def initialize(data_reader) @data_reader = data_reader end |
Instance Attribute Details
#data_reader ⇒ Object (readonly)
Returns the value of attribute data_reader.
26 27 28 |
# File 'lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb', line 26 def data_reader @data_reader end |
Instance Method Details
#tokenize(pattern) ⇒ Object
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/twitter_cldr/tokenizers/numbers/number_tokenizer.rb', line 32 def tokenize(pattern) escaped_pattern = pattern.gsub(SPECIAL_SYMBOLS_REGEX) do |match| SPECIAL_SYMBOLS_MAP[match[1..-2]] end tokens = PatternTokenizer.new(data_reader, tokenizer).tokenize(escaped_pattern) tokens.each do |token| token.value = token.value.gsub(INVERSE_SPECIAL_SYMBOLS_REGEX) do |match| INVERSE_SPECIAL_SYMBOLS_MAP[match] end end if tokens.first.value == "" tokens[1..-1] else tokens end end |