Class: EmergeCLI::Reaper::AstParser

Inherits:
Object
  • Object
show all
Defined in:
lib/reaper/ast_parser.rb

Overview

Parses the AST of a given file using Tree Sitter and allows us to find usages or delete types. This does have a lot of limitations since it only looks at a single file at a time, but can get us most of the way there.

Constant Summary collapse

DECLARATION_NODE_TYPES =
{
  'swift' => %i[class_declaration protocol_declaration],
  'kotlin' => %i[class_declaration protocol_declaration interface_declaration object_declaration],
  'java' => %i[class_declaration protocol_declaration interface_declaration]
}.freeze
IDENTIFIER_NODE_TYPES =
{
  'swift' => %i[simple_identifier qualified_name identifier type_identifier],
  'kotlin' => %i[simple_identifier qualified_name identifier type_identifier],
  'java' => %i[simple_identifier qualified_name identifier type_identifier]
}.freeze
COMMENT_AND_IMPORT_NODE_TYPES =
{
  'swift' => %i[comment import_declaration],
  'kotlin' => %i[comment import_header],
  'java' => %i[comment import_declaration]
}.freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(language) ⇒ AstParser

Returns a new instance of AstParser.



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/reaper/ast_parser.rb', line 29

def initialize(language)
  @parser = TreeSitter::Parser.new
  @language = language
  @current_file_contents = nil

  platform = case RUBY_PLATFORM
             when /darwin/
               'darwin'
             when /linux/
               'linux'
             else
               raise "Unsupported platform: #{RUBY_PLATFORM}"
             end

  arch = case RUBY_PLATFORM
         when /x86_64|amd64/
           'x86_64'
         when /arm64|aarch64/
           'arm64'
         else
           raise "Unsupported architecture: #{RUBY_PLATFORM}"
         end

  extension = platform == 'darwin' ? 'dylib' : 'so'
  parser_file = "libtree-sitter-#{language}-#{platform}-#{arch}.#{extension}"
  parser_path = File.join('parsers', parser_file)

  case language
  when 'swift'
    @parser.language = TreeSitter::Language.load('swift', parser_path)
  when 'kotlin'
    @parser.language = TreeSitter::Language.load('kotlin', parser_path)
  when 'java'
    @parser.language = TreeSitter::Language.load('java', parser_path)
  else
    raise "Unsupported language: #{language}"
  end
end

Instance Attribute Details

#languageObject (readonly)

Returns the value of attribute language.



27
28
29
# File 'lib/reaper/ast_parser.rb', line 27

def language
  @language
end

#parserObject (readonly)

Returns the value of attribute parser.



27
28
29
# File 'lib/reaper/ast_parser.rb', line 27

def parser
  @parser
end

Instance Method Details

#delete_type(file_contents:, type_name:) ⇒ Object

Deletes a type from the given file contents. Returns the modified file contents if successful, otherwise nil. TODO(telkins): Look into the tree-sitter query API to see if it simplifies this.



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/reaper/ast_parser.rb', line 71

def delete_type(file_contents:, type_name:)
  @current_file_contents = file_contents
  tree = @parser.parse_string(nil, file_contents)
  cursor = TreeSitter::TreeCursor.new(tree.root_node)
  nodes_to_process = [cursor.current_node]
  lines_to_remove = []

  while (node = nodes_to_process.shift)
    Logger.debug "Processing node: #{node.type} #{node_text(node)}"
    if declaration_node_types.include?(node.type)
      type_identifier_node = find_type_identifier(node)
      if type_identifier_node && fully_qualified_type_name(type_identifier_node) == type_name
        remove_node(node, lines_to_remove)
      end
    end

    if extension?(node)
      user_type_nodes = node.select { |n| n.type == :user_type }
      if user_type_nodes.length >= 1 && fully_qualified_type_name(user_type_nodes[0]) == type_name
        remove_node(node, lines_to_remove)
      end
    end

    node.each_named { |child| nodes_to_process.push(child) }
  end

  lines = file_contents.split("\n")
  lines_to_remove.each do |range|
    Logger.debug "Removing lines #{range[:start]} to #{range[:end]}"
    (range[:start]..range[:end]).each { |i| lines[i] = nil }

    # Remove extra newline after class declaration, but only if it's blank
    if range[:end] + 1 < lines.length && !lines[range[:end] + 1].nil? && lines[range[:end] + 1].match?(/^\s*$/)
      lines[range[:end] + 1] = nil
    end
  end

  modified_source = lines.compact.join("\n")
  new_tree = @parser.parse_string(nil, modified_source)

  return nil if only_comments_and_imports?(TreeSitter::TreeCursor.new(new_tree.root_node))

  # Preserve original newline state
  had_final_newline = file_contents.end_with?("\n")
  modified_source = modified_source.rstrip
  had_final_newline ? "#{modified_source}\n" : modified_source
end

#delete_usage(file_contents:, type_name:) ⇒ Object



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# File 'lib/reaper/ast_parser.rb', line 147

def delete_usage(file_contents:, type_name:)
  @current_file_contents = file_contents
  tree = @parser.parse_string(nil, file_contents)
  cursor = TreeSitter::TreeCursor.new(tree.root_node)
  nodes_to_process = [cursor.current_node]
  nodes_to_remove = []

  Logger.debug "Starting to scan for usages of #{type_name}"

  while (node = nodes_to_process.shift)
    identifier_type = identifier_node_types.include?(node.type)
    if identifier_type && node_text(node) == type_name
      Logger.debug "Found usage of #{type_name} in node type: #{node.type}"
      removable_node = find_removable_parent(node)
      if removable_node
        Logger.debug "Will remove parent node of type: #{removable_node.type}"
        Logger.debug "Node text to remove: #{node_text(removable_node)}"
        nodes_to_remove << removable_node
      else
        Logger.debug 'No suitable parent node found for removal'
      end
    end

    node.each { |child| nodes_to_process.push(child) }
  end

  return file_contents if nodes_to_remove.empty?

  Logger.debug "Found #{nodes_to_remove.length} nodes to remove"
  remove_nodes_from_content(file_contents, nodes_to_remove)
end

#find_usages(file_contents:, type_name:) ⇒ Object

Finds all usages of a given type in a file. TODO(telkins): Look into the tree-sitter query API to see if it simplifies this.



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/reaper/ast_parser.rb', line 121

def find_usages(file_contents:, type_name:)
  @current_file_contents = file_contents
  tree = @parser.parse_string(nil, file_contents)
  cursor = TreeSitter::TreeCursor.new(tree.root_node)
  usages = []
  nodes_to_process = [cursor.current_node]

  while (node = nodes_to_process.shift)
    identifier_type = identifier_node_types.include?(node.type)
    declaration_type = if node == tree.root_node
                         false
                       else
                         declaration_node_types.include?(node.parent&.type)
                       end
    if declaration_type && fully_qualified_type_name(node) == type_name
      usages << { line: node.start_point.row, usage_type: 'declaration' }
    elsif identifier_type && node_text(node) == type_name
      usages << { line: node.start_point.row, usage_type: 'identifier' }
    end

    node.each { |child| nodes_to_process.push(child) }
  end

  usages
end