Class: Redmine::Search::Tokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/redmine/search.rb

Instance Method Summary collapse

Constructor Details

#initialize(question) ⇒ Tokenizer

Returns a new instance of Tokenizer.



131
132
133
# File 'lib/redmine/search.rb', line 131

def initialize(question)
  @question = question.to_s
end

Instance Method Details

#tokensObject



135
136
137
138
139
140
141
142
143
# File 'lib/redmine/search.rb', line 135

def tokens
  # extract tokens from the question
  # eg. hello "bye bye" => ["hello", "bye bye"]
  tokens = @question.scan(%r{((\s|^)"[^"]+"(\s|$)|\S+)}).collect {|m| m.first.gsub(%r{(^\s*"\s*|\s*"\s*$)}, '')}
  # tokens must be at least 2 characters long
  # but for Chinese characters (Chinese HANZI/Japanese KANJI), tokens can be one character
  # no more than 5 tokens to search for
  tokens.uniq.select{|w| w.length > 1 || w =~ /\p{Han}/}.first 5
end