Class: PgQuery::ParserResult

Inherits:
Object
  • Object
show all
Defined in:
lib/pg_query/parse.rb,
lib/pg_query/deparse.rb,
lib/pg_query/truncate.rb,
lib/pg_query/param_refs.rb,
lib/pg_query/treewalker.rb,
lib/pg_query/fingerprint.rb,
lib/pg_query/filter_columns.rb

Defined Under Namespace

Classes: FingerprintSubHash, PossibleTruncation

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(query, tree, warnings = []) ⇒ ParserResult

Returns a new instance of ParserResult.



31
32
33
34
35
36
37
38
39
# File 'lib/pg_query/parse.rb', line 31

def initialize(query, tree, warnings = [])
  @query = query
  @tree = tree
  @warnings = warnings
  @tables = nil
  @aliases = nil
  @cte_names = nil
  @functions = nil
end

Instance Attribute Details

#queryObject (readonly)

Returns the value of attribute query.



27
28
29
# File 'lib/pg_query/parse.rb', line 27

def query
  @query
end

#treeObject (readonly)

Returns the value of attribute tree.



28
29
30
# File 'lib/pg_query/parse.rb', line 28

def tree
  @tree
end

#warningsObject (readonly)

Returns the value of attribute warnings.



29
30
31
# File 'lib/pg_query/parse.rb', line 29

def warnings
  @warnings
end

Instance Method Details

#aliasesObject



81
82
83
84
# File 'lib/pg_query/parse.rb', line 81

def aliases
  load_objects! if @aliases.nil?
  @aliases
end

#call_functionsObject



72
73
74
# File 'lib/pg_query/parse.rb', line 72

def call_functions
  functions_with_details.select { |f| f[:type] == :call }.map { |f| f[:function] }.uniq
end

#cte_namesObject



76
77
78
79
# File 'lib/pg_query/parse.rb', line 76

def cte_names
  load_objects! if @cte_names.nil?
  @cte_names
end

#ddl_functionsObject



68
69
70
# File 'lib/pg_query/parse.rb', line 68

def ddl_functions
  functions_with_details.select { |f| f[:type] == :ddl }.map { |f| f[:function] }.uniq
end

#ddl_tablesObject



57
58
59
# File 'lib/pg_query/parse.rb', line 57

def ddl_tables
  tables_with_details.select { |t| t[:type] == :ddl }.map { |t| t[:name] }.uniq
end

#deparseObject



3
4
5
# File 'lib/pg_query/deparse.rb', line 3

def deparse
  PgQuery.deparse(@tree)
end

#dml_tablesObject



53
54
55
# File 'lib/pg_query/parse.rb', line 53

def dml_tables
  tables_with_details.select { |t| t[:type] == :dml }.map { |t| t[:name] }.uniq
end

#dup_treeObject



41
42
43
# File 'lib/pg_query/parse.rb', line 41

def dup_tree
  ParseResult.decode(ParseResult.encode(@tree))
end

#filter_columnsObject

Returns a list of columns that the query filters by - this excludes the target list, but includes things like JOIN condition and WHERE clause.

Note: This also traverses into sub-selects.



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/pg_query/filter_columns.rb', line 7

def filter_columns # rubocop:disable Metrics/CyclomaticComplexity
  load_objects! if @aliases.nil?

  # Get condition items from the parsetree
  statements = @tree.stmts.dup.to_a.map(&:stmt)
  condition_items = []
  filter_columns = []
  loop do
    statement = statements.shift
    if statement
      case statement.node
      when :list
        statements += statement.list.items
      when :raw_stmt
        statements << statement.raw_stmt.stmt
      when :select_stmt
        case statement.select_stmt.op
        when :SETOP_NONE
          if statement.select_stmt.from_clause
            # FROM subselects
            statement.select_stmt.from_clause.each do |item|
              next unless item['RangeSubselect']
              statements << item['RangeSubselect']['subquery']
            end

            # JOIN ON conditions
            condition_items += conditions_from_join_clauses(statement.select_stmt.from_clause)
          end

          # WHERE clause
          condition_items << statement.select_stmt.where_clause if statement.select_stmt.where_clause

          # CTEs
          if statement.select_stmt.with_clause
            statement.select_stmt.with_clause.ctes.each do |item|
              statements << item.common_table_expr.ctequery if item.node == :common_table_expr
            end
          end
        when :SETOP_UNION, :SETOP_EXCEPT, :SETOP_INTERSECT
          statements << PgQuery::Node.new(select_stmt: statement.select_stmt.larg) if statement.select_stmt.larg
          statements << PgQuery::Node.new(select_stmt: statement.select_stmt.rarg) if statement.select_stmt.rarg
        end
      when :update_stmt
        condition_items << statement.update_stmt.where_clause if statement.update_stmt.where_clause
      when :delete_stmt
        condition_items << statement.delete_stmt.where_clause if statement.delete_stmt.where_clause
      when :index_stmt
        condition_items << statement.index_stmt.where_clause if statement.index_stmt.where_clause
      end
    end

    # Process both JOIN and WHERE conditions here
    next_item = condition_items.shift
    if next_item
      case next_item.node
      when :a_expr
        condition_items << next_item.a_expr.lexpr if next_item.a_expr.lexpr
        condition_items << next_item.a_expr.rexpr if next_item.a_expr.rexpr
      when :bool_expr
        condition_items += next_item.bool_expr.args
      when :coalesce_expr
        condition_items += next_item.coalesce_expr.args
      when :row_expr
        condition_items += next_item.row_expr.args
      when :column_ref
        column, table = next_item.column_ref.fields.map { |f| f.string.sval }.reverse
        filter_columns << [@aliases[table] || table, column]
      when :null_test
        condition_items << next_item.null_test.arg
      when :boolean_test
        condition_items << next_item.boolean_test.arg
      when :func_call
        # FIXME: This should actually be extracted as a funccall and be compared with those indices
        condition_items += next_item.func_call.args if next_item.func_call.args
      when :sub_link
        condition_items << next_item.sub_link.testexpr
        statements << next_item.sub_link.subselect
      end
    end

    break if statements.empty? && condition_items.empty?
  end

  filter_columns.uniq
end

#fingerprintObject



5
6
7
8
9
10
# File 'lib/pg_query/fingerprint.rb', line 5

def fingerprint
  hash = FingerprintSubHash.new
  fingerprint_tree(hash)
  fp = PgQuery.hash_xxh3_64(hash.parts.join, FINGERPRINT_VERSION)
  format('%016x', fp)
end

#functionsObject

Returns function names, ignoring their argument types. This may be insufficient if you need to disambiguate two functions with the same name but different argument types.



64
65
66
# File 'lib/pg_query/parse.rb', line 64

def functions
  functions_with_details.map { |f| f[:function] }.uniq
end

#functions_with_detailsObject



91
92
93
94
# File 'lib/pg_query/parse.rb', line 91

def functions_with_details
  load_objects! if @functions.nil?
  @functions
end

#param_refsObject

rubocop:disable Metrics/CyclomaticComplexity



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/pg_query/param_refs.rb', line 3

def param_refs # rubocop:disable Metrics/CyclomaticComplexity
  results = []

  treewalker_with_location! @tree do |_, _, node, location|
    case node
    when PgQuery::ParamRef
      # Ignore param refs inside type casts, as these are already handled
      next if location[-3..-1] == %i[type_cast arg param_ref]

      results << { 'location' => node.location,
                   'length' => param_ref_length(node) }
    when PgQuery::TypeCast
      next unless node.arg && node.type_name

      p = node.arg.param_ref
      t = node.type_name
      next unless p && t

      location = p.location
      typeloc  = t.location
      length   = param_ref_length(p)

      if location == -1
        location = typeloc
      elsif typeloc < location
        length += location - typeloc
        location = typeloc
      end

      results << { 'location' => location, 'length' => length, 'typename' => t.names.map { |n| n.string.sval } }
    end
  end

  results.sort_by! { |r| r['location'] }
  results
end

#select_tablesObject



49
50
51
# File 'lib/pg_query/parse.rb', line 49

def select_tables
  tables_with_details.select { |t| t[:type] == :select }.map { |t| t[:name] }.uniq
end

#tablesObject



45
46
47
# File 'lib/pg_query/parse.rb', line 45

def tables
  tables_with_details.map { |t| t[:name] }.uniq
end

#tables_with_detailsObject



86
87
88
89
# File 'lib/pg_query/parse.rb', line 86

def tables_with_details
  load_objects! if @tables.nil?
  @tables
end

#truncate(max_length) ⇒ Object

Truncates the query string to be below the specified length, first trying to omit less important parts of the query, and only then cutting off the end.



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
# File 'lib/pg_query/truncate.rb', line 8

def truncate(max_length) # rubocop:disable Metrics/CyclomaticComplexity
  output = deparse

  # Early exit if we're already below the max length
  return output if output.size <= max_length

  truncations = find_possible_truncations

  # Truncate the deepest possible truncation that is the longest first
  truncations.sort_by! { |t| [-t.location.size, -t.length] }

  tree = dup_tree
  truncations.each do |truncation|
    next if truncation.length < 3

    find_tree_location(tree, truncation.location) do |node, _k|
      dummy_column_ref = PgQuery::Node.new(column_ref: PgQuery::ColumnRef.new(fields: [PgQuery::Node.new(string: PgQuery::String.new(sval: ''))]))
      case truncation.node_type
      when :target_list
        res_target_name = '' if node.is_a?(PgQuery::UpdateStmt) || node.is_a?(PgQuery::OnConflictClause)
        node.target_list.replace(
          [
            PgQuery::Node.new(res_target: PgQuery::ResTarget.new(name: res_target_name, val: dummy_column_ref))
          ]
        )
      when :where_clause
        node.where_clause = dummy_column_ref
      when :values_lists
        node.values_lists.replace(
          [
            PgQuery::Node.new(list: PgQuery::List.new(items: [dummy_column_ref]))
          ]
        )
      when :ctequery
        node.ctequery = PgQuery::Node.new(select_stmt: PgQuery::SelectStmt.new(where_clause: dummy_column_ref, op: :SETOP_NONE))
      when :cols
        node.cols.replace([PgQuery::Node.from(PgQuery::ResTarget.new(name: ''))]) if node.is_a?(PgQuery::InsertStmt)
      else
        raise ArgumentError, format('Unexpected truncation node type: %s', truncation.node_type)
      end
    end

    output = PgQuery.deparse(tree).gsub('SELECT WHERE "…"', '...').gsub('"…"', '...')
    return output if output.size <= max_length
  end

  # We couldn't do a proper smart truncation, so we need a hard cut-off
  output[0..max_length - 4] + '...'
end

#walk!(&block) ⇒ Object

Walks the parse tree and calls the passed block for each contained node

If you pass a block with 1 argument, you will get each node. If you pass a block with 4 arguments, you will get each parent_node, parent_field, node and location.

If sufficient for the use case, the 1 argument block approach is recommended, since it’s faster.

Location uniquely identifies a given node within the parse tree. This is a stable identifier across multiple parser runs, assuming the same pg_query release and no modifications to the parse tree.



12
13
14
15
16
17
18
19
20
21
22
# File 'lib/pg_query/treewalker.rb', line 12

def walk!(&block)
  if block.arity == 1
    treewalker!(@tree) do |node|
      yield(node)
    end
  else
    treewalker_with_location!(@tree) do |parent_node, parent_field, node, location|
      yield(parent_node, parent_field, node, location)
    end
  end
end