Class: RubyIndexer::Index

Inherits:

Object

Object
RubyIndexer::Index

show all

Extended by:: T::Sig

Defined in:: lib/ruby_indexer/lib/ruby_indexer/index.rb

Defined Under Namespace

Classes: NonExistingNamespaceError, UnresolvableAliasError

Constant Summary collapse

ENTRY_SIMILARITY_THRESHOLD = The minimum Jaro-Winkler similarity score for an entry to be considered a match for a given fuzzy search query

0.7

Instance Attribute Summary collapse

#configuration ⇒ Object readonly

Returns the value of attribute configuration.

Instance Method Summary collapse

Constructor Details

#initialize ⇒ `Index`

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 18

def initialize
  # Holds all entries in the index using the following format:
  # {
  #  "Foo" => [#<Entry::Class>, #<Entry::Class>],
  #  "Foo::Bar" => [#<Entry::Class>],
  # }
  @entries = T.let({}, T::Hash[String, T::Array[Entry]])

  # Holds all entries in the index using a prefix tree for searching based on prefixes to provide autocompletion
  @entries_tree = T.let(PrefixTree[T::Array[Entry]].new, PrefixTree[T::Array[Entry]])

  # Holds references to where entries where discovered so that we can easily delete them
  # {
  #  "/my/project/foo.rb" => [#<Entry::Class>, #<Entry::Class>],
  #  "/my/project/bar.rb" => [#<Entry::Class>],
  # }
  @files_to_entries = T.let({}, T::Hash[String, T::Array[Entry]])

  # Holds all require paths for every indexed item so that we can provide autocomplete for requires
  @require_paths_tree = T.let(PrefixTree[IndexablePath].new, PrefixTree[IndexablePath])

  # Holds the linearized ancestors list for every namespace
  @ancestors = T.let({}, T::Hash[String, T::Array[String]])

  # List of classes that are enhancing the index
  @enhancements = T.let([], T::Array[Enhancement])

  # Map of module name to included hooks that have to be executed when we include the given module
  @included_hooks = T.let(
    {},
    T::Hash[String, T::Array[T.proc.params(index: Index, base: Entry::Namespace).void]],
  )

  @configuration = T.let(RubyIndexer::Configuration.new, Configuration)
end

Instance Attribute Details

#configuration ⇒ `Object` (readonly)

Returns the value of attribute configuration.



15
16
17

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 15

def configuration
  @configuration
end

Instance Method Details

#[](fully_qualified_name) ⇒ `Object`



104
105
106

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 104

def [](fully_qualified_name)
  @entries[fully_qualified_name.delete_prefix("::")]
end

#add(entry, skip_prefix_tree: false) ⇒ `Object`

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 95

def add(entry, skip_prefix_tree: false)
  name = entry.name

  (@entries[name] ||= []) << entry
  (@files_to_entries[entry.file_path] ||= []) << entry
  @entries_tree.insert(name, T.must(@entries[name])) unless skip_prefix_tree
end

#delete(indexable) ⇒ `Object`

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 67

def delete(indexable)
  # For each constant discovered in `path`, delete the associated entry from the index. If there are no entries
  # left, delete the constant from the index.
  @files_to_entries[indexable.full_path]&.each do |entry|
    name = entry.name
    entries = @entries[name]
    next unless entries

    # Delete the specific entry from the list for this name
    entries.delete(entry)

    # If all entries were deleted, then remove the name from the hash and from the prefix tree. Otherwise, update
    # the prefix tree with the current entries
    if entries.empty?
      @entries.delete(name)
      @entries_tree.delete(name)
    else
      @entries_tree.insert(name, entries)
    end
  end

  @files_to_entries.delete(indexable.full_path)

  require_path = indexable.require_path
  @require_paths_tree.delete(require_path) if require_path
end

#empty? ⇒ `Boolean`



575
576
577

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 575

def empty?
  @entries.empty?
end

#existing_or_new_singleton_class(name) ⇒ `Object`

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 595

def existing_or_new_singleton_class(name)
  *_namespace, unqualified_name = name.split("::")
  full_singleton_name = "#{name}::<Class:#{unqualified_name}>"
  singleton = T.cast(self[full_singleton_name]&.first, T.nilable(Entry::SingletonClass))

  unless singleton
    attached_ancestor = T.must(self[name]&.first)

    singleton = Entry::SingletonClass.new(
      [full_singleton_name],
      attached_ancestor.file_path,
      attached_ancestor.location,
      attached_ancestor.name_location,
      [],
      nil,
    )
    add(singleton, skip_prefix_tree: true)
  end

  singleton
end

#first_unqualified_const(name) ⇒ `Object`

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 125

def first_unqualified_const(name)
  _name, entries = @entries.find do |const_name, _entries|
    const_name.end_with?(name)
  end

  T.cast(
    entries,
    T.nilable(T::Array[T.any(
      Entry::Namespace,
      Entry::ConstantAlias,
      Entry::UnresolvedConstantAlias,
      Entry::Constant,
    )]),
  )
end

#follow_aliased_namespace(name, seen_names = []) ⇒ `Object`

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 366

def follow_aliased_namespace(name, seen_names = [])
  return name if @entries[name]

  parts = name.split("::")
  real_parts = []

  (parts.length - 1).downto(0).each do |i|
    current_name = T.must(parts[0..i]).join("::")
    entry = @entries[current_name]&.first

    case entry
    when Entry::ConstantAlias
      target = entry.target
      return follow_aliased_namespace("#{target}::#{real_parts.join("::")}", seen_names)
    when Entry::UnresolvedConstantAlias
      resolved = resolve_alias(entry, seen_names)

      if resolved.is_a?(Entry::UnresolvedConstantAlias)
        raise UnresolvableAliasError, "The constant #{resolved.name} is an alias to a non existing constant"
      end

      target = resolved.target
      return follow_aliased_namespace("#{target}::#{real_parts.join("::")}", seen_names)
    else
      real_parts.unshift(T.must(parts[i]))
    end
  end

  real_parts.join("::")
end

#fuzzy_search(query) ⇒ `Object`

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 174

def fuzzy_search(query)
  unless query
    entries = @entries.filter_map do |_name, entries|
      next if entries.first.is_a?(Entry::SingletonClass)

      entries
    end

    return entries.flatten
  end

  normalized_query = query.gsub("::", "").downcase

  results = @entries.filter_map do |name, entries|
    next if entries.first.is_a?(Entry::SingletonClass)

    similarity = DidYouMean::JaroWinkler.distance(name.gsub("::", "").downcase, normalized_query)
    [entries, -similarity] if similarity > ENTRY_SIMILARITY_THRESHOLD
  end
  results.sort_by!(&:last)
  results.flat_map(&:first)
end

#handle_change(indexable) ⇒ `Object`

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 547

def handle_change(indexable)
  original_entries = @files_to_entries[indexable.full_path]

  delete(indexable)
  index_single(indexable)

  updated_entries = @files_to_entries[indexable.full_path]

  return unless original_entries && updated_entries

  # A change in one ancestor may impact several different others, which could be including that ancestor through
  # indirect means like including a module that than includes the ancestor. Trying to figure out exactly which
  # ancestors need to be deleted is too expensive. Therefore, if any of the namespace entries has a change to their
  # ancestor hash, we clear all ancestors and start linearizing lazily again from scratch
  original_map = T.cast(
    original_entries.select { |e| e.is_a?(Entry::Namespace) },
    T::Array[Entry::Namespace],
  ).to_h { |e| [e.name, e.ancestor_hash] }

  updated_map = T.cast(
    updated_entries.select { |e| e.is_a?(Entry::Namespace) },
    T::Array[Entry::Namespace],
  ).to_h { |e| [e.name, e.ancestor_hash] }

  @ancestors.clear if original_map.any? { |name, hash| updated_map[name] != hash }
end

#index_all(indexable_paths: @configuration.indexables, &block) ⇒ `Object`

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 304

def index_all(indexable_paths: @configuration.indexables, &block)
  RBSIndexer.new(self).index_ruby_core
  # Calculate how many paths are worth 1% of progress
  progress_step = (indexable_paths.length / 100.0).ceil

  indexable_paths.each_with_index do |path, index|
    if block && index % progress_step == 0
      progress = (index / progress_step) + 1
      break unless block.call(progress)
    end

    index_single(path)
  end
end

#index_single(indexable_path, source = nil) ⇒ `Object`

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 320

def index_single(indexable_path, source = nil)
  content = source || File.read(indexable_path.full_path)
  dispatcher = Prism::Dispatcher.new

  result = Prism.parse(content)
  listener = DeclarationListener.new(
    self,
    dispatcher,
    result,
    indexable_path.full_path,
    enhancements: @enhancements,
  )
  dispatcher.dispatch(result.value)

  indexing_errors = listener.indexing_errors.uniq

  require_path = indexable_path.require_path
  @require_paths_tree.insert(require_path, indexable_path) if require_path

  if indexing_errors.any?
    indexing_errors.each do |error|
      $stderr.puts error
    end
  end
rescue Errno::EISDIR, Errno::ENOENT
  # If `path` is a directory, just ignore it and continue indexing. If the file doesn't exist, then we also ignore
  # it
rescue SystemStackError => e
  if e.backtrace&.first&.include?("prism")
    $stderr.puts "Prism error indexing #{indexable_path.full_path}: #{e.message}"
  else
    raise
  end
end

#indexed?(name) ⇒ `Boolean`



585
586
587

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 585

def indexed?(name)
  @entries.key?(name)
end

#instance_variable_completion_candidates(name, owner_name) ⇒ `Object`

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 535

def instance_variable_completion_candidates(name, owner_name)
  entries = T.cast(prefix_search(name).flatten, T::Array[Entry::InstanceVariable])
  ancestors = linearized_ancestors_of(owner_name)

  variables = entries.select { |e| ancestors.any?(e.owner&.name) }
  variables.uniq!(&:name)
  variables
end

#length ⇒ `Object`



590
591
592

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 590

def length
  @entries.count
end

#linearized_ancestors_of(fully_qualified_name) ⇒ `Object`

Raises:

(NonExistingNamespaceError)

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 444

def linearized_ancestors_of(fully_qualified_name)
  # If we already computed the ancestors for this namespace, return it straight away
  cached_ancestors = @ancestors[fully_qualified_name]
  return cached_ancestors if cached_ancestors

  parts = fully_qualified_name.split("::")
  singleton_levels = 0

  parts.reverse_each do |part|
    break unless part.include?("<Class:")

    singleton_levels += 1
    parts.pop
  end

  attached_class_name = parts.join("::")

  # If we don't have an entry for `name`, raise
  entries = self[fully_qualified_name]

  if singleton_levels > 0 && !entries && indexed?(attached_class_name)
    entries = [existing_or_new_singleton_class(attached_class_name)]
  end

  raise NonExistingNamespaceError, "No entry found for #{fully_qualified_name}" unless entries

  ancestors = [fully_qualified_name]

  # Cache the linearized ancestors array eagerly. This is important because we might have circular dependencies and
  # this will prevent us from falling into an infinite recursion loop. Because we mutate the ancestors array later,
  # the cache will reflect the final result
  @ancestors[fully_qualified_name] = ancestors

  # If none of the entries for `name` are namespaces, raise
  namespaces = entries.filter_map do |entry|
    case entry
    when Entry::Namespace
      entry
    when Entry::ConstantAlias
      self[entry.target]&.grep(Entry::Namespace)
    end
  end.flatten

  raise NonExistingNamespaceError,
    "None of the entries for #{fully_qualified_name} are modules or classes" if namespaces.empty?

  # The original nesting where we discovered this namespace, so that we resolve the correct names of the
  # included/prepended/extended modules and parent classes
  nesting = T.must(namespaces.first).nesting.flat_map { |n| n.split("::") }

  if nesting.any?
    singleton_levels.times do
      nesting << "<Class:#{T.must(nesting.last)}>"
    end
  end

  # We only need to run included hooks when linearizing singleton classes. Included hooks are typically used to add
  # new singleton methods or to extend a module through an include. There's no need to support instance methods, the
  # inclusion of another module or the prepending of another module, because those features are already a part of
  # Ruby and can be used directly without any metaprogramming
  run_included_hooks(attached_class_name, nesting) if singleton_levels > 0

  linearize_mixins(ancestors, namespaces, nesting)
  linearize_superclass(
    ancestors,
    attached_class_name,
    fully_qualified_name,
    namespaces,
    nesting,
    singleton_levels,
  )

  ancestors
end

#method_completion_candidates(name, receiver_name) ⇒ `Object`

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 203

def method_completion_candidates(name, receiver_name)
  ancestors = linearized_ancestors_of(receiver_name)

  candidates = name ? prefix_search(name).flatten : @entries.values.flatten
  completion_items = candidates.each_with_object({}) do |entry, hash|
    unless entry.is_a?(Entry::Member) || entry.is_a?(Entry::MethodAlias) ||
        entry.is_a?(Entry::UnresolvedMethodAlias)
      next
    end

    entry_name = entry.name
    ancestor_index = ancestors.index(entry.owner&.name)
    existing_entry, existing_entry_index = hash[entry_name]

    # Conditions for matching a method completion candidate:
    # 1. If an ancestor_index was found, it means that this method is owned by the receiver. The exact index is
    # where in the ancestor chain the method was found. For example, if the ancestors are ["A", "B", "C"] and we
    # found the method declared in `B`, then the ancestors index is 1
    #
    # 2. We already established that this method is owned by the receiver. Now, check if we already added a
    # completion candidate for this method name. If not, then we just go and add it (the left hand side of the or)
    #
    # 3. If we had already found a method entry for the same name, then we need to check if the current entry that
    # we are comparing appears first in the hierarchy or not. For example, imagine we have the method `open` defined
    # in both `File` and its parent `IO`. If we first find the method `open` in `IO`, it will be inserted into the
    # hash. Then, when we find the entry for `open` owned by `File`, we need to replace `IO.open` by `File.open`,
    # since `File.open` appears first in the hierarchy chain and is therefore the correct method being invoked. The
    # last part of the conditional checks if the current entry was found earlier in the hierarchy chain, in which
    # case we must update the existing entry to avoid showing the wrong method declaration for overridden methods
    next unless ancestor_index && (!existing_entry || ancestor_index < existing_entry_index)

    if entry.is_a?(Entry::UnresolvedMethodAlias)
      resolved_alias = resolve_method_alias(entry, receiver_name)
      hash[entry_name] = [resolved_alias, ancestor_index] if resolved_alias.is_a?(Entry::MethodAlias)
    else
      hash[entry_name] = [entry, ancestor_index]
    end
  end

  completion_items.values.map!(&:first)
end

#names ⇒ `Object`



580
581
582

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 580

def names
  @entries.keys
end

#prefix_search(query, nesting = nil) ⇒ `Object`

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 155

def prefix_search(query, nesting = nil)
  unless nesting
    results = @entries_tree.search(query)
    results.uniq!
    return results
  end

  results = nesting.length.downto(0).flat_map do |i|
    prefix = T.must(nesting[0...i]).join("::")
    namespaced_query = prefix.empty? ? query : "#{prefix}::#{query}"
    @entries_tree.search(namespaced_query)
  end

  results.uniq!
  results
end

#register_enhancement(enhancement) ⇒ `Object`



56
57
58

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 56

def register_enhancement(enhancement)
  @enhancements << enhancement
end

#register_included_hook(module_name, &hook) ⇒ `Object`



62
63
64

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 62

def register_included_hook(module_name, &hook)
  (@included_hooks[module_name] ||= []) << hook
end

#resolve(name, nesting, seen_names = []) ⇒ `Object`

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 263

def resolve(name, nesting, seen_names = [])
  # If we have a top level reference, then we just search for it straight away ignoring the nesting
  if name.start_with?("::")
    entries = direct_or_aliased_constant(name.delete_prefix("::"), seen_names)
    return entries if entries
  end

  # Non qualified reference path
  full_name = nesting.any? ? "#{nesting.join("::")}::#{name}" : name

  # When the name is not qualified with any namespaces, Ruby will take several steps to try to the resolve the
  # constant. First, it will try to find the constant in the exact namespace where the reference was found
  entries = direct_or_aliased_constant(full_name, seen_names)
  return entries if entries

  # If the constant is not found yet, then Ruby will try to find the constant in the enclosing lexical scopes,
  # unwrapping each level one by one. Important note: the top level is not included because that's the fallback of
  # the algorithm after every other possibility has been exhausted
  entries = lookup_enclosing_scopes(name, nesting, seen_names)
  return entries if entries

  # If the constant does not exist in any enclosing scopes, then Ruby will search for it in the ancestors of the
  # specific namespace where the reference was found
  entries = lookup_ancestor_chain(name, nesting, seen_names)
  return entries if entries

  # Finally, as a fallback, Ruby will search for the constant in the top level namespace
  direct_or_aliased_constant(name, seen_names)
rescue UnresolvableAliasError
  nil
end

#resolve_instance_variable(variable_name, owner_name) ⇒ `Object`

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 522

def resolve_instance_variable(variable_name, owner_name)
  entries = T.cast(self[variable_name], T.nilable(T::Array[Entry::InstanceVariable]))
  return unless entries

  ancestors = linearized_ancestors_of(owner_name)
  return if ancestors.empty?

  entries.select { |e| ancestors.include?(e.owner&.name) }
end

#resolve_method(method_name, receiver_name, inherited_only: false) ⇒ `Object`

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 406

def resolve_method(method_name, receiver_name, inherited_only: false)
  method_entries = self[method_name]
  return unless method_entries

  ancestors = linearized_ancestors_of(receiver_name.delete_prefix("::"))
  ancestors.each do |ancestor|
    next if inherited_only && ancestor == receiver_name

    found = method_entries.filter_map do |entry|
      case entry
      when Entry::Member, Entry::MethodAlias
        entry if entry.owner&.name == ancestor
      when Entry::UnresolvedMethodAlias
        # Resolve aliases lazily as we find them
        if entry.owner&.name == ancestor
          resolved_alias = resolve_method_alias(entry, receiver_name)
          resolved_alias if resolved_alias.is_a?(Entry::MethodAlias)
        end
      end
    end

    return found if found.any?
  end

  nil
rescue NonExistingNamespaceError
  nil
end

#search_require_paths(query) ⇒ `Object`



109
110
111

# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 109

def search_require_paths(query)
  @require_paths_tree.search(query)
end

Class: RubyIndexer::Index

Defined Under Namespace

Constant Summary collapse

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize ⇒ Index

Instance Attribute Details

#configuration ⇒ Object (readonly)

Instance Method Details

#[](fully_qualified_name) ⇒ Object

#add(entry, skip_prefix_tree: false) ⇒ Object

#delete(indexable) ⇒ Object

#empty? ⇒ Boolean

#existing_or_new_singleton_class(name) ⇒ Object

#first_unqualified_const(name) ⇒ Object

#follow_aliased_namespace(name, seen_names = []) ⇒ Object

#fuzzy_search(query) ⇒ Object

#handle_change(indexable) ⇒ Object

#index_all(indexable_paths: @configuration.indexables, &block) ⇒ Object

#index_single(indexable_path, source = nil) ⇒ Object

#indexed?(name) ⇒ Boolean

#instance_variable_completion_candidates(name, owner_name) ⇒ Object

#length ⇒ Object

#linearized_ancestors_of(fully_qualified_name) ⇒ Object

#method_completion_candidates(name, receiver_name) ⇒ Object

#names ⇒ Object

#prefix_search(query, nesting = nil) ⇒ Object

#register_enhancement(enhancement) ⇒ Object

#register_included_hook(module_name, &hook) ⇒ Object

#resolve(name, nesting, seen_names = []) ⇒ Object

#resolve_instance_variable(variable_name, owner_name) ⇒ Object

#resolve_method(method_name, receiver_name, inherited_only: false) ⇒ Object

#search_require_paths(query) ⇒ Object