Module: Sparkql::ParserTools

Included in:: Parser

Defined in:: lib/sparkql/parser_tools.rb

Overview

This is the guts of the parser internals and is mixed into the parser for organization.

Constant Summary collapse

DATE_TYPES = Coercible types from highest precision to lowest

[:datetime, :date]

NUMBER_TYPES =

[:decimal, :integer]

Instance Method Summary collapse

#coercible_types(type1, type2) ⇒ Object

If both types support coercion with eachother, always selects the highest precision type to return as a reflection of the two.
#next_token ⇒ Object
#on_error(error_token_id, error_value, value_stack) ⇒ Object
#parse(str) ⇒ Object
#tokenize_conjunction(exp1, conj, exp2) ⇒ Object
#tokenize_expression(field, op, val) ⇒ Object
#tokenize_field_arg(field) ⇒ Object
#tokenize_function(name, f_args) ⇒ Object
#tokenize_function_args(lit1, lit2) ⇒ Object
#tokenize_group(expressions) ⇒ Object
#tokenize_list(list) ⇒ Object
#tokenize_literal_negation(number_token) ⇒ Object
#tokenize_multiple(lit1, lit2) ⇒ Object
#tokenize_unary_conjunction(conj, exp) ⇒ Object
#validate_expressions(results) ⇒ Object
#validate_level_depth(expression) ⇒ Object
#validate_multiple_arguments(args) ⇒ Object
#validate_multiple_values(values) ⇒ Object

Instance Method Details

#coercible_types(type1, type2) ⇒ `Object`

If both types support coercion with eachother, always selects the highest precision type to return as a reflection of the two. Any type that doesn’t support coercion with the other type returns nil

# File 'lib/sparkql/parser_tools.rb', line 230

def coercible_types type1, type2
  if DATE_TYPES.include?(type1) && DATE_TYPES.include?(type2)
    DATE_TYPES.first
  elsif NUMBER_TYPES.include?(type1) && NUMBER_TYPES.include?(type2)
    NUMBER_TYPES.first
  else
    nil
  end
end

#next_token ⇒ `Object`

# File 'lib/sparkql/parser_tools.rb', line 17

def next_token
  t = @lexer.shift
  while t[0] == :SPACE or t[0] == :NEWLINE
    t = @lexer.shift
  end
  t
end

#on_error(error_token_id, error_value, value_stack) ⇒ `Object`

# File 'lib/sparkql/parser_tools.rb', line 180

def on_error(error_token_id, error_value, value_stack)
  token_name = token_to_str(error_token_id)
  token_name.downcase!
  token = error_value.to_s.inspect
  tokenizer_error(:token => @lexer.current_token_value, 
                  :message => "Error parsing token #{token_name}",
                  :status => :fatal, 
                  :syntax => true)    
end

#parse(str) ⇒ `Object`

# File 'lib/sparkql/parser_tools.rb', line 8

def parse(str)
  @lexer = Sparkql::Lexer.new(str)
  @expression_count = 0
  results = do_parse
  return if results.nil?
  validate_expressions results
  results
end

#tokenize_conjunction(exp1, conj, exp2) ⇒ `Object`

# File 'lib/sparkql/parser_tools.rb', line 64

def tokenize_conjunction(exp1, conj, exp2)
  exp2.first[:conjunction] = conj
  exp2.first[:conjunction_level] = @lexer.level
  exp1 + exp2
end

#tokenize_expression(field, op, val) ⇒ `Object`

# File 'lib/sparkql/parser_tools.rb', line 25

def tokenize_expression(field, op, val)
  operator = get_operator(val,op) unless val.nil?
  field_args = {}
  # Function support for fields is stapled in here. The function information
  # is remapped to the expression
  if field.is_a?(Hash) && field[:type] == :function
    function = Sparkql::FunctionResolver::SUPPORTED_FUNCTIONS[field[:value].to_sym]
    if !function.nil?
      field_args[:field_function] = field[:value]
      field_args[:args] = field[:args]

      if field_args[:field_function] == 'cast'
        field_args[:field_function_type] = field[:args].last.to_sym
      else
        field_args[:field_function_type] = function[:return_type]
      end
    else
      tokenizer_error(:token => field[:value], 
        :message => "Unsupported function type", :status => :fatal )
    end
    field = field[:args].first
  end
  custom_field = !field.nil? && field.start_with?('"')
  block_group = (@lexer.level == 0) ? 0 : @lexer.block_group_identifier
  expression = {:field => field, :operator => operator, :conjunction => 'And',
    :conjunction_level => 0, :level => @lexer.level,
    :block_group => block_group, :custom_field => custom_field}.
    merge!(field_args)
  expression = val.merge(expression) unless val.nil?
  expression[:condition] ||= expression[:value]
  validate_level_depth expression
  if operator.nil?
    tokenizer_error(:token => op, :expression => expression,
      :message => "Operator not supported for this type and value string", :status => :fatal )
  end
  @expression_count += 1
  [expression]
end

#tokenize_field_arg(field) ⇒ `Object`

# File 'lib/sparkql/parser_tools.rb', line 147

def tokenize_field_arg(field)
  {
    :type => :field,
    :value => field,
  }
end

#tokenize_function(name, f_args) ⇒ `Object`

# File 'lib/sparkql/parser_tools.rb', line 154

def tokenize_function(name, f_args)
  @lexer.leveldown
  @lexer.block_group_identifier -= 1

  args = f_args.instance_of?(Array) ? f_args : [f_args]
  validate_multiple_arguments args
  condition_list = []
  args.each do |arg|
    condition_list << arg[:value] # Needs to be pure string value
    arg[:value] = escape_value(arg)
  end
  resolver = Sparkql::FunctionResolver.new(name, args)
  
  resolver.validate
  if(resolver.errors?)
    tokenizer_error(:token => @lexer.last_field, 
                    :message => "Error parsing function #{resolver.errors.join(',')}",
                    :status => :fatal, 
                    :syntax => true)    
    return nil
  else
    result = resolver.call()
    result.nil? ? result : result.merge(:condition => "#{name}(#{condition_list.join(',')})")
  end
end

#tokenize_function_args(lit1, lit2) ⇒ `Object`

# File 'lib/sparkql/parser_tools.rb', line 141

def tokenize_function_args(lit1, lit2)
  array = lit1.kind_of?(Array) ? lit1 : [lit1]
  array << lit2
  array
end

#tokenize_group(expressions) ⇒ `Object`

# File 'lib/sparkql/parser_tools.rb', line 88

def tokenize_group(expressions)
  @lexer.leveldown
  expressions
end

#tokenize_list(list) ⇒ `Object`

# File 'lib/sparkql/parser_tools.rb', line 93

def tokenize_list(list)
  validate_multiple_values list[:value]
  list[:condition] ||= list[:value]
  list
end

#tokenize_literal_negation(number_token) ⇒ `Object`

# File 'lib/sparkql/parser_tools.rb', line 99

def tokenize_literal_negation(number_token)
  old_val = case number_token[:type]
  when :integer
    number_token[:value].to_i
  when :decimal
    number_token[:value].to_f
  else
    tokenizer_error(:token => @lexer.current_token_value,
                    :expression => number_token,
                    :message => "Negation is only allowed for integer and floats",
                    :status => :fatal,
                    :syntax => true)
    return number_token
  end
  number_token[:value] = (-1 * old_val).to_s

  number_token
end

#tokenize_multiple(lit1, lit2) ⇒ `Object`

# File 'lib/sparkql/parser_tools.rb', line 118

def tokenize_multiple(lit1, lit2)
  final_type = lit1[:type]
  if lit1[:type] != lit2[:type]
    final_type = coercible_types(lit1[:type],lit2[:type])
    if final_type.nil?
      final_type = lit1[:type]
      tokenizer_error(:token => @lexer.last_field, 
                      :message => "Type mismatch in field list.",
                      :status => :fatal, 
                      :syntax => true)
    end
  end
  array = Array(lit1[:value])
  condition = lit1[:condition] || lit1[:value] 
  array << lit2[:value]
  {
    :type => final_type ,
    :value => array,
    :multiple => "true",
    :condition => condition + "," + (lit2[:condition] || lit2[:value])
  }
end

#tokenize_unary_conjunction(conj, exp) ⇒ `Object`

# File 'lib/sparkql/parser_tools.rb', line 70

def tokenize_unary_conjunction(conj, exp)
  # Handles the case when a SparkQL filter string
  # begins with a unary operator, and is nested, such as:
  #   Not (Not Field Eq 1)
  # In this instance we treat the outer unary as a conjunction. With any other
  # expression this would be the case, so that should make processing 
  # consistent.
  if exp.first[:unary] && @lexer.level == 0
    exp.first[:conjunction] =  conj
    exp.first[:conjunction_level] = @lexer.level
  else
    exp.first[:unary] = conj
    exp.first[:unary_level] = @lexer.level
  end

  exp
end

#validate_expressions(results) ⇒ `Object`

# File 'lib/sparkql/parser_tools.rb', line 198

def validate_expressions results
  if results.size > max_expressions 
    compile_error(:token => results[max_expressions][:field], :expression => results[max_expressions],
          :message => "You have exceeded the maximum expression count.  Please limit to no more than #{max_expressions} expressions in a filter.",
          :status => :fatal, :syntax => false, :constraint => true )
    results.slice!(max_expressions..-1)
  end
end

#validate_level_depth(expression) ⇒ `Object`

# File 'lib/sparkql/parser_tools.rb', line 190

def validate_level_depth expression
  if @lexer.level > max_level_depth
    compile_error(:token => "(", :expression => expression,
          :message => "You have exceeded the maximum nesting level.  Please nest no more than #{max_level_depth} levels deep.",
          :status => :fatal, :syntax => false, :constraint => true )
  end
end

#validate_multiple_arguments(args) ⇒ `Object`

# File 'lib/sparkql/parser_tools.rb', line 217

def validate_multiple_arguments args
  args = Array(args)
  if args.size > max_values 
    compile_error(:token => args[max_values],
          :message => "You have exceeded the maximum parameter count.  Please limit to #{max_values} parameters to a single function.",
          :status => :fatal, :syntax => false, :constraint => true )
    args.slice!(max_values..-1)
  end
end

#validate_multiple_values(values) ⇒ `Object`

# File 'lib/sparkql/parser_tools.rb', line 207

def validate_multiple_values values
  values = Array(values)
  if values.size > max_values 
    compile_error(:token => values[max_values],
          :message => "You have exceeded the maximum value count.  Please limit to #{max_values} values in a single expression.",
          :status => :fatal, :syntax => false, :constraint => true )
    values.slice!(max_values..-1)
  end
end

Module: Sparkql::ParserTools

Overview

Constant Summary collapse

Instance Method Summary collapse

Instance Method Details

#coercible_types(type1, type2) ⇒ Object

#next_token ⇒ Object

#on_error(error_token_id, error_value, value_stack) ⇒ Object

#parse(str) ⇒ Object

#tokenize_conjunction(exp1, conj, exp2) ⇒ Object

#tokenize_expression(field, op, val) ⇒ Object

#tokenize_field_arg(field) ⇒ Object

#tokenize_function(name, f_args) ⇒ Object

#tokenize_function_args(lit1, lit2) ⇒ Object

#tokenize_group(expressions) ⇒ Object

#tokenize_list(list) ⇒ Object

#tokenize_literal_negation(number_token) ⇒ Object

#tokenize_multiple(lit1, lit2) ⇒ Object

#tokenize_unary_conjunction(conj, exp) ⇒ Object

#validate_expressions(results) ⇒ Object

#validate_level_depth(expression) ⇒ Object

#validate_multiple_arguments(args) ⇒ Object

#validate_multiple_values(values) ⇒ Object