Class: MagickColumns::Tokenizer
- Inherits:
-
Object
- Object
- MagickColumns::Tokenizer
- Defined in:
- lib/magick_columns/tokenizer.rb
Instance Method Summary collapse
- #clean_query ⇒ Object
- #extract_terms ⇒ Object
-
#initialize(query = '') ⇒ Tokenizer
constructor
A new instance of Tokenizer.
- #split_term_in_terms(term) ⇒ Object
Constructor Details
#initialize(query = '') ⇒ Tokenizer
Returns a new instance of Tokenizer.
3 4 5 |
# File 'lib/magick_columns/tokenizer.rb', line 3 def initialize(query = '') @query = query end |
Instance Method Details
#clean_query ⇒ Object
27 28 29 30 31 32 33 |
# File 'lib/magick_columns/tokenizer.rb', line 27 def clean_query @query.strip .gsub(%r{\A(\s*(#{MagickColumns.and_operators})\s+)+}, '') .gsub(%r{(\s+(#{MagickColumns.and_operators})\s*)+\z}, '') .gsub(%r{\A(\s*(#{MagickColumns.or_operators})\s+)+}, '') .gsub(%r{(\s+(#{MagickColumns.or_operators})\s*)+\z}, '') end |
#extract_terms ⇒ Object
7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
# File 'lib/magick_columns/tokenizer.rb', line 7 def extract_terms terms = [] clean_query.split(%r{\s+(#{MagickColumns.or_operators})\s+}).each do |o_t| unless o_t =~ %r{\A(#{MagickColumns.or_operators})\z} and_terms = [] o_t.split(%r{\s+(#{MagickColumns.and_operators})\s+}).each do |t| unless t =~ %r{\A(#{MagickColumns.and_operators})\z} and_terms.concat split_term_in_terms(t) end end terms << and_terms unless and_terms.empty? end end terms.reject(&:empty?) end |
#split_term_in_terms(term) ⇒ Object
35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 |
# File 'lib/magick_columns/tokenizer.rb', line 35 def split_term_in_terms(term) term_copy = term.dup terms = [] MagickColumns.replacement_rules.each do |rule, | pattern = [:pattern].respond_to?(:call) ? [:pattern].call : [:pattern] while(match = term_copy.match(pattern)) term_copy.sub!(pattern, [:replacement].call(match)) end end MagickColumns.tokenize_rules.each do |rule, | pattern = [:pattern].respond_to?(:call) ? [:pattern].call : [:pattern] while(match = term_copy.match(pattern)) terms << [:tokenizer].call(match) term_copy.sub!(pattern, '') end end terms + term_copy.strip.split(/\s+/).map { |t| { term: t } } end |