Class: Dependabot::Linguist::Repository

Inherits:
Object
  • Object
show all
Defined in:
lib/dependabot/linguist/repository.rb

Overview

Repository wraps a Linguist::Repository, to discover “linguist languages” present in a repository, then maps them to Dependabot Ecosystems, finally verifying that those ecosystems are valid for the places linguist found the languages it thought was relevant to each dependabot ecosystem.

Instance Method Summary collapse

Constructor Details

#initialize(repo_path, repo_name, ignore_linguist: 0, verbose: false) ⇒ Repository

Returns a new instance of Repository.



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/dependabot/linguist/repository.rb', line 18

def initialize(repo_path, repo_name, ignore_linguist: 0, verbose: false)
  @repo_path = repo_path.chomp.delete_suffix("/") unless repo_path.nil?
  # If repo_path is nil, say that the current workdir is the path.
  @repo_path ||= "."
  @repo_name = repo_name
  begin
    @repo = Rugged::Repository.new(@repo_path)
  rescue Rugged::RepositoryError, Rugged::OSError
    # Either the folder doesn't exist, or it does and doesn't have a `.git/`
    # Try to clone into it, if it's public
    puts "Repository #{@repo_name} not found at #{@repo_path}; falling back to cloning public url"
    # If the current path isn't empty, make a temporary repository path.
    @repo_path = "./tmp/#{@repo_name}" unless Dir.empty? @repo_path
    puts "Cloning https://github.com/#{@repo_name}.git into #{@repo_path}"
    @repo = Rugged::Repository.clone_at("https://github.com/#{@repo_name}.git", @repo_path)
  end
  @ignore_linguist = ignore_linguist.clamp(0, 2)
  @verbose = verbose
  @linguist = ::Linguist::Repository.new(@repo, @repo.head.target_id)
end

Instance Method Details

#all_directoriesObject

Get ALL directories for the repo path.



162
163
164
165
166
# File 'lib/dependabot/linguist/repository.rb', line 162

def all_directories
  # /**/*/ rather than /**/ would remove the base path, but delete_prefix
  # will also remove it, so it needs to be specially added.
  @all_directories ||= (["/"] | Dir.glob("#{@repo_path}/**/*/").map { |subpath| subpath.delete_prefix(@repo_path).delete_suffix("/") })
end

#all_ecosystem_classesObject



185
186
187
# File 'lib/dependabot/linguist/repository.rb', line 185

def all_ecosystem_classes
  @all_ecosystem_classes ||= PACKAGE_ECOSYSTEM_TO_FILE_FETCHERS_REGISTRY_KEY.transform_values { |k, v| [k, Dependabot::FileFetchers.for_package_manager(v)] }
end

#all_sourcesObject

Get ALL sources from ALL directories for the repo path.



169
170
171
# File 'lib/dependabot/linguist/repository.rb', line 169

def all_sources
  @all_sources ||= all_directories.collect { |directory| Dependabot::Source.new(provider: "github", repo: @repo_name, directory: directory) }
end

#directories_per_ecosystem_validated_by_dependabotObject

directories_per_ecosystem_validated_by_dependabot maps each identified present ecosystem to a list of the directories that linguist found files for, that were then validated by running the file_fetcher files on them.



192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
# File 'lib/dependabot/linguist/repository.rb', line 192

def directories_per_ecosystem_validated_by_dependabot
  @directories_per_ecosystem_validated_by_dependabot ||= nil
  if @directories_per_ecosystem_validated_by_dependabot.nil?
    enable_options = { kubernetes_updates: true }
    @directories_per_ecosystem_validated_by_dependabot = {}
    case @ignore_linguist
    when 1
      # If ignore linguist is 1, we rely on it to block "vendored"
      # files from the sources, but we run all dependabot classes
      sources = linguist_sources.values
      ecosystem_classes = all_ecosystem_classes
    when 2
      # If ignore linguist is 2, we just don't use it at all.
      sources = all_sources
      ecosystem_classes = all_ecosystem_classes
    else # when 0 is part of this.
      # If ignore linguist is 0, we don't ignore it and rely
      # on it to find sources and pick dependabot classes
      sources = nil
      ecosystem_classes = file_fetcher_class_per_package_ecosystem
    end
    ecosystem_classes.each do |package_ecosystem, file_fetcher_class|
      @directories_per_ecosystem_validated_by_dependabot[package_ecosystem] = []
      puts "Spawning class instances for #{package_ecosystem}, in repo #{@repo_path}, class #{file_fetcher_class}" if @verbose
      sources = directories_per_package_ecosystem[package_ecosystem].collect { |directories| linguist_sources[directories] } unless [1, 2].any? @ignore_linguist
      sources.each do |source|
        fetcher = file_fetcher_class.new(source: source, credentials: [], repo_contents_path: @repo_path, options: enable_options)
        begin
          unless fetcher.files.map(&:name).empty?
            @directories_per_ecosystem_validated_by_dependabot[package_ecosystem] |= [source.directory]
            puts "-- Dependency files FOUND for package-ecosystem #{package_ecosystem} at #{source.directory}; #{fetcher.files.map(&:name)}" if @verbose
          end
        rescue Dependabot::DependabotError => e
          # Most of these will be Dependabot::DependencyFileNotFound
          # or Dependabot::PathDependenciesNotReachable
          puts "-- Caught a DependabotError, #{e.class}, for package-ecosystem #{package_ecosystem} at #{source.directory}: #{e.message}" if @verbose
        end
      end
    end
    @directories_per_ecosystem_validated_by_dependabot = @directories_per_ecosystem_validated_by_dependabot.delete_if { |_, v| v.empty? }.sort.to_h
  end
  @directories_per_ecosystem_validated_by_dependabot
end

#directories_per_linguist_languageObject

directories_per_linguist_language inverts the linguist_cache map to “<Language>” => [“<folder_path>”, …], a list of folders per language!



68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/dependabot/linguist/repository.rb', line 68

def directories_per_linguist_language
  @directories_per_linguist_language ||= linguist_cache.keys.to_h { |source_file_path|
    # create the map "<file_path>" => "<folder_path>"
    [source_file_path, "/#{source_file_path.slice(0, source_file_path.rindex("/") || 0)}"]
  }.group_by { |source_file_path, _source_folder_path|
    # create the map "<Language>" => [["<file_path>", "<folder_path>"], ...]
    linguist_cache[source_file_path][0]
  }.to_h { |linguist_language, file_then_folder_arr|
    # create the map "<Language>" => ["<folder_path>", ...] by taking the
    # (&:last) out of each ["<file_path>", "<folder_path>"] pair, uniquely
    [linguist_language, file_then_folder_arr.map(&:last).uniq]
  }
end

#directories_per_package_ecosystemObject

directories_per_package_ecosystem squashes the map of directories_per_package_manager according to the map of managers to ecosystems, as some managers share a common ecosystem name.



121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
# File 'lib/dependabot/linguist/repository.rb', line 121

def directories_per_package_ecosystem
  @directories_per_package_ecosystem ||= nil
  if @directories_per_package_ecosystem.nil?
    @directories_per_package_ecosystem = {}
    directories_per_package_manager.each do |dependabot_package_manager, source_directories|
      Dependabot::Linguist.package_managers_to_package_ecosystems([dependabot_package_manager]).each do |dependabot_package_ecosystem|
        if @directories_per_package_ecosystem[dependabot_package_ecosystem].nil?
          @directories_per_package_ecosystem[dependabot_package_ecosystem] = []
        end
        @directories_per_package_ecosystem[dependabot_package_ecosystem] |= source_directories
      end
    end
  end
  @directories_per_package_ecosystem
end

#directories_per_package_managerObject

directories_per_package_manager splits and merges the results of directories_per_linguist_language; split across each package manager that is relevant to the language, and then merges the list of file paths for that language into the list of file paths for each package manager!



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/dependabot/linguist/repository.rb', line 88

def directories_per_package_manager
  @directories_per_package_manager ||= {}.tap do |this|
    directories_per_linguist_language.each do |linguist_language, source_directories|
      Dependabot::Linguist.linguist_languages_to_package_managers([linguist_language]).each do |dependabot_package_manager|
        this[dependabot_package_manager] = (this[dependabot_package_manager] || []) | source_directories
      end
    end
    # GitHub Actions must be added seperately..
    # if any yaml exist in the workflows folder, it needs to be added at "/"
    if (directories_per_linguist_language["YAML"] || []).any? "/.github/workflows"
      this[PackageManagers::GITHUB_ACTIONS] = ["/"]
    end
    # Because actions are handled like this we also need to regexp for /\/action\.ya?ml$/
    (files_per_linguist_language["YAML"] || []).each do |source_file_path|
      # File paths aren't cleaned from linguist, so prepend the '/' here.
      # This lets it match the \/ before action.ya?ml if it's in the root dir.
      # /(?<dir>\S*)\/(?<file>action\.ya?ml)$/
      action_match = "/#{source_file_path}".match %r{(?<dir>\S*)/(?<file>action\.ya?ml)$}
      if action_match
        # But that also means we then need to check if dir is empty, if it's the root dir
        if action_match[:dir].empty?
          this[PackageManagers::GITHUB_ACTIONS] = (this[PackageManagers::GITHUB_ACTIONS] || []) | ["/"]
        else
          this[PackageManagers::GITHUB_ACTIONS] = (this[PackageManagers::GITHUB_ACTIONS] || []) | [action_match[:dir]]
        end
      end
    end
  end
end

#file_fetcher_class_per_package_ecosystemObject

file_fetcher_class_per_package_ecosystem maps ecosystem names to the class objects for each dependabot file fetcher class that’s relevant based on the list of ecosystems found by linguist languages.



140
141
142
143
144
145
146
147
148
149
150
151
152
# File 'lib/dependabot/linguist/repository.rb', line 140

def file_fetcher_class_per_package_ecosystem
  @file_fetcher_class_per_package_ecosystem ||= nil
  if @file_fetcher_class_per_package_ecosystem.nil?
    @file_fetcher_class_per_package_ecosystem = {}
    directories_per_package_ecosystem.each_key do |possible_ecosystem|
      @file_fetcher_class_per_package_ecosystem[possible_ecosystem] =
        Dependabot::FileFetchers.for_package_manager(
          Dependabot::Linguist::PACKAGE_ECOSYSTEM_TO_FILE_FETCHERS_REGISTRY_KEY[possible_ecosystem]
        )
    end
  end
  @file_fetcher_class_per_package_ecosystem
end

#files_per_linguist_languageObject

files_per_linguist_language inverts the linguist_cache map to “<Language>” => [“<file_path>”, …], a list of files per language! Note that they are not cleaned in the same way the folder paths in each of the “directories per *” are prepended with a ‘/’.



59
60
61
62
63
64
# File 'lib/dependabot/linguist/repository.rb', line 59

def files_per_linguist_language
  @files_per_linguist_language ||= linguist_cache.keys.group_by { |source_file_path|
    # create the map "<Language>" => ["<file_path>", ...]
    linguist_cache[source_file_path][0]
  }
end

#linguist_cacheObject

linguist_cache, linguist.cache, is a map of “<file_path>” => [“<Language>”, <loc>] for any files found for any language looked for.



47
48
49
# File 'lib/dependabot/linguist/repository.rb', line 47

def linguist_cache
  @linguist_cache ||= @linguist.cache
end

#linguist_directoriesObject

Get the list of all directories identified by linguist, that had their language mapped to a relevant dependabot ecosystem.



175
176
177
# File 'lib/dependabot/linguist/repository.rb', line 175

def linguist_directories
  @linguist_directories ||= directories_per_package_ecosystem.values.flatten.uniq
end

#linguist_languagesObject

Wraps Linguist::Repository.new(~).languages



40
41
42
# File 'lib/dependabot/linguist/repository.rb', line 40

def linguist_languages
  @linguist_languages ||= @linguist.languages
end

#linguist_sourcesObject

Get the list of all sources from all directories identified by linguist, that had their language mapped to a relevant dependabot ecosystem.



181
182
183
# File 'lib/dependabot/linguist/repository.rb', line 181

def linguist_sources
  @linguist_sources ||= linguist_directories.to_h { |directory| [directory, Dependabot::Source.new(provider: "github", repo: @repo_name, directory: directory)] }
end

#put_discovery_infoObject

Print out the lists of languages, managers, and ecosystems found here.



155
156
157
158
159
# File 'lib/dependabot/linguist/repository.rb', line 155

def put_discovery_info
  puts "List of languages: #{directories_per_linguist_language.keys}"
  puts "List of package managers: #{directories_per_package_manager.keys}"
  puts "List of package ecosystems: #{directories_per_package_ecosystem.keys}"
end