Class: RubyIndexer::Index

Inherits:
Object
  • Object
show all
Extended by:
T::Sig
Defined in:
lib/ruby_indexer/lib/ruby_indexer/index.rb

Defined Under Namespace

Classes: NonExistingNamespaceError, UnresolvableAliasError

Constant Summary collapse

ENTRY_SIMILARITY_THRESHOLD =

The minimum Jaro-Winkler similarity score for an entry to be considered a match for a given fuzzy search query

0.7

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeIndex



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 18

def initialize
  # Holds all entries in the index using the following format:
  # {
  #  "Foo" => [#<Entry::Class>, #<Entry::Class>],
  #  "Foo::Bar" => [#<Entry::Class>],
  # }
  @entries = T.let({}, T::Hash[String, T::Array[Entry]])

  # Holds all entries in the index using a prefix tree for searching based on prefixes to provide autocompletion
  @entries_tree = T.let(PrefixTree[T::Array[Entry]].new, PrefixTree[T::Array[Entry]])

  # Holds references to where entries where discovered so that we can easily delete them
  # {
  #  "/my/project/foo.rb" => [#<Entry::Class>, #<Entry::Class>],
  #  "/my/project/bar.rb" => [#<Entry::Class>],
  # }
  @files_to_entries = T.let({}, T::Hash[String, T::Array[Entry]])

  # Holds all require paths for every indexed item so that we can provide autocomplete for requires
  @require_paths_tree = T.let(PrefixTree[IndexablePath].new, PrefixTree[IndexablePath])

  # Holds the linearized ancestors list for every namespace
  @ancestors = T.let({}, T::Hash[String, T::Array[String]])

  # List of classes that are enhancing the index
  @enhancements = T.let([], T::Array[Enhancement])

  # Map of module name to included hooks that have to be executed when we include the given module
  @included_hooks = T.let(
    {},
    T::Hash[String, T::Array[T.proc.params(index: Index, base: Entry::Namespace).void]],
  )

  @configuration = T.let(RubyIndexer::Configuration.new, Configuration)
end

Instance Attribute Details

#configurationObject (readonly)

Returns the value of attribute configuration.



15
16
17
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 15

def configuration
  @configuration
end

Instance Method Details

#[](fully_qualified_name) ⇒ Object



104
105
106
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 104

def [](fully_qualified_name)
  @entries[fully_qualified_name.delete_prefix("::")]
end

#add(entry, skip_prefix_tree: false) ⇒ Object



95
96
97
98
99
100
101
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 95

def add(entry, skip_prefix_tree: false)
  name = entry.name

  (@entries[name] ||= []) << entry
  (@files_to_entries[entry.file_path] ||= []) << entry
  @entries_tree.insert(name, T.must(@entries[name])) unless skip_prefix_tree
end

#delete(indexable) ⇒ Object



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 67

def delete(indexable)
  # For each constant discovered in `path`, delete the associated entry from the index. If there are no entries
  # left, delete the constant from the index.
  @files_to_entries[indexable.full_path]&.each do |entry|
    name = entry.name
    entries = @entries[name]
    next unless entries

    # Delete the specific entry from the list for this name
    entries.delete(entry)

    # If all entries were deleted, then remove the name from the hash and from the prefix tree. Otherwise, update
    # the prefix tree with the current entries
    if entries.empty?
      @entries.delete(name)
      @entries_tree.delete(name)
    else
      @entries_tree.insert(name, entries)
    end
  end

  @files_to_entries.delete(indexable.full_path)

  require_path = indexable.require_path
  @require_paths_tree.delete(require_path) if require_path
end

#empty?Boolean



575
576
577
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 575

def empty?
  @entries.empty?
end

#existing_or_new_singleton_class(name) ⇒ Object



595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 595

def existing_or_new_singleton_class(name)
  *_namespace, unqualified_name = name.split("::")
  full_singleton_name = "#{name}::<Class:#{unqualified_name}>"
  singleton = T.cast(self[full_singleton_name]&.first, T.nilable(Entry::SingletonClass))

  unless singleton
    attached_ancestor = T.must(self[name]&.first)

    singleton = Entry::SingletonClass.new(
      [full_singleton_name],
      attached_ancestor.file_path,
      attached_ancestor.location,
      attached_ancestor.name_location,
      [],
      nil,
    )
    add(singleton, skip_prefix_tree: true)
  end

  singleton
end

#first_unqualified_const(name) ⇒ Object



125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 125

def first_unqualified_const(name)
  _name, entries = @entries.find do |const_name, _entries|
    const_name.end_with?(name)
  end

  T.cast(
    entries,
    T.nilable(T::Array[T.any(
      Entry::Namespace,
      Entry::ConstantAlias,
      Entry::UnresolvedConstantAlias,
      Entry::Constant,
    )]),
  )
end

#follow_aliased_namespace(name, seen_names = []) ⇒ Object



366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 366

def follow_aliased_namespace(name, seen_names = [])
  return name if @entries[name]

  parts = name.split("::")
  real_parts = []

  (parts.length - 1).downto(0).each do |i|
    current_name = T.must(parts[0..i]).join("::")
    entry = @entries[current_name]&.first

    case entry
    when Entry::ConstantAlias
      target = entry.target
      return follow_aliased_namespace("#{target}::#{real_parts.join("::")}", seen_names)
    when Entry::UnresolvedConstantAlias
      resolved = resolve_alias(entry, seen_names)

      if resolved.is_a?(Entry::UnresolvedConstantAlias)
        raise UnresolvableAliasError, "The constant #{resolved.name} is an alias to a non existing constant"
      end

      target = resolved.target
      return follow_aliased_namespace("#{target}::#{real_parts.join("::")}", seen_names)
    else
      real_parts.unshift(T.must(parts[i]))
    end
  end

  real_parts.join("::")
end

#fuzzy_search(query) ⇒ Object



174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 174

def fuzzy_search(query)
  unless query
    entries = @entries.filter_map do |_name, entries|
      next if entries.first.is_a?(Entry::SingletonClass)

      entries
    end

    return entries.flatten
  end

  normalized_query = query.gsub("::", "").downcase

  results = @entries.filter_map do |name, entries|
    next if entries.first.is_a?(Entry::SingletonClass)

    similarity = DidYouMean::JaroWinkler.distance(name.gsub("::", "").downcase, normalized_query)
    [entries, -similarity] if similarity > ENTRY_SIMILARITY_THRESHOLD
  end
  results.sort_by!(&:last)
  results.flat_map(&:first)
end

#handle_change(indexable) ⇒ Object



547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 547

def handle_change(indexable)
  original_entries = @files_to_entries[indexable.full_path]

  delete(indexable)
  index_single(indexable)

  updated_entries = @files_to_entries[indexable.full_path]

  return unless original_entries && updated_entries

  # A change in one ancestor may impact several different others, which could be including that ancestor through
  # indirect means like including a module that than includes the ancestor. Trying to figure out exactly which
  # ancestors need to be deleted is too expensive. Therefore, if any of the namespace entries has a change to their
  # ancestor hash, we clear all ancestors and start linearizing lazily again from scratch
  original_map = T.cast(
    original_entries.select { |e| e.is_a?(Entry::Namespace) },
    T::Array[Entry::Namespace],
  ).to_h { |e| [e.name, e.ancestor_hash] }

  updated_map = T.cast(
    updated_entries.select { |e| e.is_a?(Entry::Namespace) },
    T::Array[Entry::Namespace],
  ).to_h { |e| [e.name, e.ancestor_hash] }

  @ancestors.clear if original_map.any? { |name, hash| updated_map[name] != hash }
end

#index_all(indexable_paths: @configuration.indexables, &block) ⇒ Object



304
305
306
307
308
309
310
311
312
313
314
315
316
317
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 304

def index_all(indexable_paths: @configuration.indexables, &block)
  RBSIndexer.new(self).index_ruby_core
  # Calculate how many paths are worth 1% of progress
  progress_step = (indexable_paths.length / 100.0).ceil

  indexable_paths.each_with_index do |path, index|
    if block && index % progress_step == 0
      progress = (index / progress_step) + 1
      break unless block.call(progress)
    end

    index_single(path)
  end
end

#index_single(indexable_path, source = nil) ⇒ Object



320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 320

def index_single(indexable_path, source = nil)
  content = source || File.read(indexable_path.full_path)
  dispatcher = Prism::Dispatcher.new

  result = Prism.parse(content)
  listener = DeclarationListener.new(
    self,
    dispatcher,
    result,
    indexable_path.full_path,
    enhancements: @enhancements,
  )
  dispatcher.dispatch(result.value)

  indexing_errors = listener.indexing_errors.uniq

  require_path = indexable_path.require_path
  @require_paths_tree.insert(require_path, indexable_path) if require_path

  if indexing_errors.any?
    indexing_errors.each do |error|
      $stderr.puts error
    end
  end
rescue Errno::EISDIR, Errno::ENOENT
  # If `path` is a directory, just ignore it and continue indexing. If the file doesn't exist, then we also ignore
  # it
rescue SystemStackError => e
  if e.backtrace&.first&.include?("prism")
    $stderr.puts "Prism error indexing #{indexable_path.full_path}: #{e.message}"
  else
    raise
  end
end

#indexed?(name) ⇒ Boolean



585
586
587
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 585

def indexed?(name)
  @entries.key?(name)
end

#instance_variable_completion_candidates(name, owner_name) ⇒ Object



535
536
537
538
539
540
541
542
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 535

def instance_variable_completion_candidates(name, owner_name)
  entries = T.cast(prefix_search(name).flatten, T::Array[Entry::InstanceVariable])
  ancestors = linearized_ancestors_of(owner_name)

  variables = entries.select { |e| ancestors.any?(e.owner&.name) }
  variables.uniq!(&:name)
  variables
end

#lengthObject



590
591
592
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 590

def length
  @entries.count
end

#linearized_ancestors_of(fully_qualified_name) ⇒ Object



444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 444

def linearized_ancestors_of(fully_qualified_name)
  # If we already computed the ancestors for this namespace, return it straight away
  cached_ancestors = @ancestors[fully_qualified_name]
  return cached_ancestors if cached_ancestors

  parts = fully_qualified_name.split("::")
  singleton_levels = 0

  parts.reverse_each do |part|
    break unless part.include?("<Class:")

    singleton_levels += 1
    parts.pop
  end

  attached_class_name = parts.join("::")

  # If we don't have an entry for `name`, raise
  entries = self[fully_qualified_name]

  if singleton_levels > 0 && !entries && indexed?(attached_class_name)
    entries = [existing_or_new_singleton_class(attached_class_name)]
  end

  raise NonExistingNamespaceError, "No entry found for #{fully_qualified_name}" unless entries

  ancestors = [fully_qualified_name]

  # Cache the linearized ancestors array eagerly. This is important because we might have circular dependencies and
  # this will prevent us from falling into an infinite recursion loop. Because we mutate the ancestors array later,
  # the cache will reflect the final result
  @ancestors[fully_qualified_name] = ancestors

  # If none of the entries for `name` are namespaces, raise
  namespaces = entries.filter_map do |entry|
    case entry
    when Entry::Namespace
      entry
    when Entry::ConstantAlias
      self[entry.target]&.grep(Entry::Namespace)
    end
  end.flatten

  raise NonExistingNamespaceError,
    "None of the entries for #{fully_qualified_name} are modules or classes" if namespaces.empty?

  # The original nesting where we discovered this namespace, so that we resolve the correct names of the
  # included/prepended/extended modules and parent classes
  nesting = T.must(namespaces.first).nesting.flat_map { |n| n.split("::") }

  if nesting.any?
    singleton_levels.times do
      nesting << "<Class:#{T.must(nesting.last)}>"
    end
  end

  # We only need to run included hooks when linearizing singleton classes. Included hooks are typically used to add
  # new singleton methods or to extend a module through an include. There's no need to support instance methods, the
  # inclusion of another module or the prepending of another module, because those features are already a part of
  # Ruby and can be used directly without any metaprogramming
  run_included_hooks(attached_class_name, nesting) if singleton_levels > 0

  linearize_mixins(ancestors, namespaces, nesting)
  linearize_superclass(
    ancestors,
    attached_class_name,
    fully_qualified_name,
    namespaces,
    nesting,
    singleton_levels,
  )

  ancestors
end

#method_completion_candidates(name, receiver_name) ⇒ Object



203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 203

def method_completion_candidates(name, receiver_name)
  ancestors = linearized_ancestors_of(receiver_name)

  candidates = name ? prefix_search(name).flatten : @entries.values.flatten
  completion_items = candidates.each_with_object({}) do |entry, hash|
    unless entry.is_a?(Entry::Member) || entry.is_a?(Entry::MethodAlias) ||
        entry.is_a?(Entry::UnresolvedMethodAlias)
      next
    end

    entry_name = entry.name
    ancestor_index = ancestors.index(entry.owner&.name)
    existing_entry, existing_entry_index = hash[entry_name]

    # Conditions for matching a method completion candidate:
    # 1. If an ancestor_index was found, it means that this method is owned by the receiver. The exact index is
    # where in the ancestor chain the method was found. For example, if the ancestors are ["A", "B", "C"] and we
    # found the method declared in `B`, then the ancestors index is 1
    #
    # 2. We already established that this method is owned by the receiver. Now, check if we already added a
    # completion candidate for this method name. If not, then we just go and add it (the left hand side of the or)
    #
    # 3. If we had already found a method entry for the same name, then we need to check if the current entry that
    # we are comparing appears first in the hierarchy or not. For example, imagine we have the method `open` defined
    # in both `File` and its parent `IO`. If we first find the method `open` in `IO`, it will be inserted into the
    # hash. Then, when we find the entry for `open` owned by `File`, we need to replace `IO.open` by `File.open`,
    # since `File.open` appears first in the hierarchy chain and is therefore the correct method being invoked. The
    # last part of the conditional checks if the current entry was found earlier in the hierarchy chain, in which
    # case we must update the existing entry to avoid showing the wrong method declaration for overridden methods
    next unless ancestor_index && (!existing_entry || ancestor_index < existing_entry_index)

    if entry.is_a?(Entry::UnresolvedMethodAlias)
      resolved_alias = resolve_method_alias(entry, receiver_name)
      hash[entry_name] = [resolved_alias, ancestor_index] if resolved_alias.is_a?(Entry::MethodAlias)
    else
      hash[entry_name] = [entry, ancestor_index]
    end
  end

  completion_items.values.map!(&:first)
end

#namesObject



580
581
582
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 580

def names
  @entries.keys
end

#prefix_search(query, nesting = nil) ⇒ Object



155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 155

def prefix_search(query, nesting = nil)
  unless nesting
    results = @entries_tree.search(query)
    results.uniq!
    return results
  end

  results = nesting.length.downto(0).flat_map do |i|
    prefix = T.must(nesting[0...i]).join("::")
    namespaced_query = prefix.empty? ? query : "#{prefix}::#{query}"
    @entries_tree.search(namespaced_query)
  end

  results.uniq!
  results
end

#register_enhancement(enhancement) ⇒ Object



56
57
58
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 56

def register_enhancement(enhancement)
  @enhancements << enhancement
end

#register_included_hook(module_name, &hook) ⇒ Object



62
63
64
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 62

def register_included_hook(module_name, &hook)
  (@included_hooks[module_name] ||= []) << hook
end

#resolve(name, nesting, seen_names = []) ⇒ Object



263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 263

def resolve(name, nesting, seen_names = [])
  # If we have a top level reference, then we just search for it straight away ignoring the nesting
  if name.start_with?("::")
    entries = direct_or_aliased_constant(name.delete_prefix("::"), seen_names)
    return entries if entries
  end

  # Non qualified reference path
  full_name = nesting.any? ? "#{nesting.join("::")}::#{name}" : name

  # When the name is not qualified with any namespaces, Ruby will take several steps to try to the resolve the
  # constant. First, it will try to find the constant in the exact namespace where the reference was found
  entries = direct_or_aliased_constant(full_name, seen_names)
  return entries if entries

  # If the constant is not found yet, then Ruby will try to find the constant in the enclosing lexical scopes,
  # unwrapping each level one by one. Important note: the top level is not included because that's the fallback of
  # the algorithm after every other possibility has been exhausted
  entries = lookup_enclosing_scopes(name, nesting, seen_names)
  return entries if entries

  # If the constant does not exist in any enclosing scopes, then Ruby will search for it in the ancestors of the
  # specific namespace where the reference was found
  entries = lookup_ancestor_chain(name, nesting, seen_names)
  return entries if entries

  # Finally, as a fallback, Ruby will search for the constant in the top level namespace
  direct_or_aliased_constant(name, seen_names)
rescue UnresolvableAliasError
  nil
end

#resolve_instance_variable(variable_name, owner_name) ⇒ Object



522
523
524
525
526
527
528
529
530
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 522

def resolve_instance_variable(variable_name, owner_name)
  entries = T.cast(self[variable_name], T.nilable(T::Array[Entry::InstanceVariable]))
  return unless entries

  ancestors = linearized_ancestors_of(owner_name)
  return if ancestors.empty?

  entries.select { |e| ancestors.include?(e.owner&.name) }
end

#resolve_method(method_name, receiver_name, inherited_only: false) ⇒ Object



406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 406

def resolve_method(method_name, receiver_name, inherited_only: false)
  method_entries = self[method_name]
  return unless method_entries

  ancestors = linearized_ancestors_of(receiver_name.delete_prefix("::"))
  ancestors.each do |ancestor|
    next if inherited_only && ancestor == receiver_name

    found = method_entries.filter_map do |entry|
      case entry
      when Entry::Member, Entry::MethodAlias
        entry if entry.owner&.name == ancestor
      when Entry::UnresolvedMethodAlias
        # Resolve aliases lazily as we find them
        if entry.owner&.name == ancestor
          resolved_alias = resolve_method_alias(entry, receiver_name)
          resolved_alias if resolved_alias.is_a?(Entry::MethodAlias)
        end
      end
    end

    return found if found.any?
  end

  nil
rescue NonExistingNamespaceError
  nil
end

#search_require_paths(query) ⇒ Object



109
110
111
# File 'lib/ruby_indexer/lib/ruby_indexer/index.rb', line 109

def search_require_paths(query)
  @require_paths_tree.search(query)
end