Class: Amp::Repositories::Verification::Verifier

Inherits:
Object
  • Object
show all
Defined in:
lib/amp/repository/verification.rb

Overview

Handles all logic for verifying a single repository and collecting the results.

Public interface: initialize with a repository and run #verify.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(repo) ⇒ Verifier

Creates a new Verifier. The Verifier can verify a Mercurial repository.

Parameters:

  • repo (Repository)

    the repository this verifier will examine



39
40
41
42
43
44
45
46
# File 'lib/amp/repository/verification.rb', line 39

def initialize(repo)
  @repository = repo
  @result = VerificationResult.new(0, 0, 0, 0, 0)
  
  @bad_revisions = {}
  @changelog = repo.changelog
  @manifest  = repo.manifest
end

Instance Attribute Details

#changelogObject (readonly)

Returns the value of attribute changelog.



33
34
35
# File 'lib/amp/repository/verification.rb', line 33

def changelog
  @changelog
end

#manifestObject (readonly)

Returns the value of attribute manifest.



33
34
35
# File 'lib/amp/repository/verification.rb', line 33

def manifest
  @manifest
end

#repositoryObject Also known as: repo

Returns the value of attribute repository.



30
31
32
# File 'lib/amp/repository/verification.rb', line 30

def repository
  @repository
end

Instance Method Details

#verifyVerificationResult

Runs a verification sweep on the repository this verifier is handling.

Returns:

  • (VerificationResult)

    the results of the verification, which includes error messages, warning counts, and so on.



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/amp/repository/verification.rb', line 53

def verify
  # Maps manifest node IDs to the link revision to which they belong
  manifest_linkrevs = Hash.new {|h,k| h[k] = []}
  
  # Maps filenames to a list of link revisions (global revision #s) in which
  # that file was changed
  file_linkrevs = Hash.new {|h, k| h[k] = []}
  
  # file_node_ids stores a hash for each file. The hash stored maps that file's node IDs
  # (the node stored in the file log itself) to the global "link revision index" - the
  # revision index in the changelog (and the one the user always sees)
  file_node_ids = Hash.new {|h, k| h[k] = {}}
  
  verify_changelog(manifest_linkrevs, file_linkrevs)
  verify_manifest(manifest_linkrevs,  file_node_ids)
  verify_crosscheck(manifest_linkrevs, file_linkrevs, file_node_ids)
  UI.status("checking files")
  store_files = verify_store
  verify_files(file_linkrevs, file_node_ids, store_files)
  @result
end

#verify_changelog(manifest_linkrevs, file_linkrevs) ⇒ Object

Verifies the changelog. Updates acceptable file_linkrevs and manifest_linkrevs along the way, since the changelog knows which files have been changed when, and which manifest entries go with which changelog entries.

Parameters:

  • manifest_linkrevs (Hash)

    the mapping between manifest node IDs and changelog revision numbers

  • file_linkrevs (Hash)

    a mapping between filenames and a list of changelog revision numbers where the file was modified, added, or deleted.



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/amp/repository/verification.rb', line 84

def verify_changelog(manifest_linkrevs, file_linkrevs)
  Amp::UI.status("checking changelog...")
  check_revlog(@changelog, "changelog")
  seen = {}
  # can't use the nice #each because it assumes functioning changelog and whatnot
  @changelog.size.times do |idx|
    node = @changelog.node_id_for_index idx
    check_entry(@changelog, idx, node, seen, [idx], "changelog")
    begin
      changelog_entry = @changelog.read(node)
      manifest_linkrevs[changelog_entry.first] << idx
      changelog_entry[3].each {|f| file_linkrevs[f] << idx}
    rescue Exception => err
      exception(idx, "unpacking changeset #{node.short_hex}:", err, "changelog")
    end
  end
  @result.changesets = @changelog.size
end

#verify_crosscheck(manifest_linkrevs, file_linkrevs, file_node_ids) ⇒ Object

Crosschecks the changelog agains the manifest and vice-versa. There should be no remaining unmatched manifest node IDs, nor any files not in file_node_map. A few other checks, too.

Parameters:

  • manifest_linkrevs (Hash)

    the mapping between manifest node IDs and changelog revision numbers

  • file_linkrevs (Hash)

    a mapping between filenames and a list of changelog revision numbers where the file was modified, added, or deleted.

  • file_node_ids (Hash)

    maps filenames to a mapping from file node IDs to global link revisions.



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/amp/repository/verification.rb', line 147

def verify_crosscheck(manifest_linkrevs, file_linkrevs, file_node_ids)
  Amp::UI.status("crosschecking files in changesets and manifests")
  
  # Check for node IDs found in the changelog, but not the manifest
  if @manifest.any?
    # check for any manifest node IDs we found in changesets, but not in the manifest
    manifest_linkrevs.map {|node, idx| [idx, node]}.sort.each do |idx, node|
      error(idx, "changeset refers to unknown manifest #{node.short_hex}")
    end
    
    # check for any file node IDs we found in the changeset, but not in the manifest
    file_linkrevs.sort.each do |file, _|
      if file_node_ids[file].empty?
        error(file_linkrevs[file].first, "in changeset but not in manifest", file)
      end
    end
  end
  
  # Check for node IDs found in the manifest, but not the changelog.
  if @changelog.any?
    file_node_ids.map {|file,_| file}.sort.each do |file|
      unless file_linkrevs[file]
        begin
          filelog = @repository.file(file)
          link_rev = file_node_ids[file].map {|node| filelog.link_revision_for_index(filelog.revision_index_for_node(node))}.min
        rescue
          link_rev = nil
        end
        error(link_rev, "in manifest but not in changeset", file)
      end
    end
  end
end

#verify_filelog(file, file_log, file_linkrevs, file_node_ids) ⇒ Object

Verifies a single file log. This is a complicated process - we need to cross- check a lot of data, which is why this has been extracted into its own method.

Parameters:

  • filename (String)

    the name of the file we’re verifying

  • file_log (FileLog)

    the file log we’re verifying

  • file_linkrevs (Hash)

    a mapping between filenames and a list of changelog revision numbers where the file was modified, added, or deleted.

  • file_node_ids (Hash)

    maps filenames to a mapping from file node IDs to global link revisions.



239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
# File 'lib/amp/repository/verification.rb', line 239

def verify_filelog(file, file_log, file_linkrevs, file_node_ids)
  check_revlog(file_log, file)
  seen = {}
  file_log.index_size.times do |idx|
    @result.revisions += 1
    node = file_log.node_id_for_index(idx)
    link_rev = check_entry(file_log, idx, node, seen, file_linkrevs[file], file)
    
    # Make sure that one of the manifests referenced the node ID. If not, one of our
    # manifests is wrong!
    if file_node_ids[file]
      if @manifest.any? && !file_node_ids[file][node]
        error(link_rev, "#{node.short_hex} not found in manifests", file)
      else
        file_node_ids[file].delete node
      end
    end
    
    # Make sure the size of the uncompressed file is correct.
    begin
      text = file_log.read node
      rename_info = file_log.renamed? node
      if text.size != file_log.uncompressed_size_for_index(idx)
        if file_log.decompress_revision(node).size != file_log.uncompressed_size_for_index(idx)
          error(link_rev, "unpacked size is #{text.size}, #{file_log.size(idx)} expected", file)
        end
      end
    rescue Exception => err
      exception(link_rev, "unpacking #{node.short_hex}", err, file)
    end
    
    # Check if we screwed up renaming a file (like lost the source revlog or something)
    begin
      if rename_info && rename_info.any?
        filelog_src = @repository.file(rename_info.first)
        if filelog_src.index_size == 0
          error(link_rev, "empty or missing copy source revlog "+
                          "#{rename_info[0]}, #{rename_info[1].short_hex}", file)
        elsif rename_info[1] == RevlogSupport::Node::NULL_ID
          warn("#{file}@#{link_rev}: copy source revision is NULL_ID "+
               "#{rename_info[0]}:#{rename_info[1].short_hex}", file)
        else
          rev = filelog_src.revision_index_for_node(rename_info[1])
        end
      end
    rescue Exception => err
      exception(link_rev, "checking rename of #{node.short_hex}", err, file)
    end
  end
  
  # Final cross-check
  if file_node_ids[file] && file_node_ids[file].any?
    file_node_ids[file].map { |node, link_rev| 
      [@manifest.link_revision_for_index(link_rev), node]
    }.sort.each do |link_rev, node|
      error(link_rev, "#{node.short_hex} in manifests not found", file)
    end
  end
end

#verify_files(file_linkrevs, file_node_ids, store_files) ⇒ Object

Verifies the individual file logs one by one.

Parameters:

  • file_linkrevs (Hash)

    a mapping between filenames and a list of changelog revision numbers where the file was modified, added, or deleted.

  • file_node_ids (Hash)

    maps filenames to a mapping from file node IDs to global link revisions.

  • store_files (Hash)

    a mapping keeping track of which file logs are in the store



206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# File 'lib/amp/repository/verification.rb', line 206

def verify_files(file_linkrevs, file_node_ids, store_files)
  files = (file_node_ids.keys + file_linkrevs.keys).uniq.sort
  @result.files = files.size
  files.each do |file|
    link_rev = file_linkrevs[file].first
    
    begin
      file_log = @repository.file(file) 
    rescue Exception => err
      error(link_rev, "broken revlog! (#{err})", file)
      next
    end
    
    file_log.files.each do |ff|
      unless store_files.delete(ff)
        error(link_rev, "missing revlog!", ff)
      end
    end
    
    verify_filelog(file, file_log, file_linkrevs, file_node_ids)
  end
end

#verify_manifest(manifest_linkrevs, file_node_ids) ⇒ Object

Verifies the manifest and its nodes. Also updates file_node_ids to store the node ID of files at given points in the manifest’s history.

Parameters:

  • manifest_linkrevs (Hash)

    the mapping between manifest node IDs and changelog revision numbers

  • file_node_ids (Hash)

    maps filenames to a mapping from file node IDs to global link revisions.



111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/amp/repository/verification.rb', line 111

def verify_manifest(manifest_linkrevs, file_node_ids)
  Amp::UI.status("checking manifests...")
  check_revlog(@manifest, "manifest")
  seen = {}
  
  @manifest.size.times do |idx|
    node = @manifest.node_id_for_index idx
    link_rev = check_entry(@manifest, idx, node, seen, manifest_linkrevs[node], "manifest")
    manifest_linkrevs.delete node
    
    begin
      @manifest.read_delta(node).each do |filename, file_node|
        if filename.empty?
          error(link_rev, "file without name in manifest")
        elsif filename != "/dev/null"
          file_node_map = file_node_ids[filename]
          file_node_map[file_node] ||= idx
        end
      end
    rescue Exception => err
      exception(idx, "reading manfiest delta #{node.short_hex}", err, "manifest")
    end
  end
end

#verify_storeHash<String => Boolean>

Verifies the store, and returns a hash with names of files that are OK

Returns:

  • (Hash<String => Boolean>)

    a hash with filenames as keys and “true” or “false” as values, indicating whether the file exists and is accessible



186
187
188
189
190
191
192
193
194
195
196
# File 'lib/amp/repository/verification.rb', line 186

def verify_store
  store_files = {}
  @repository.store.datafiles.each do |file, encoded_filename, size|
    if file.nil? || file.empty?
      error(nil, "can't decode filename from store: #{encoded_filename}")
    elsif size > 0
      store_files[file] = true
    end
  end
  store_files
end