14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
|
# File 'lib/loc_mods/cli.rb', line 14
def detect_duplicates(*paths)
all_records = []
paths.each do |path|
if File.directory?(path)
Dir.glob(File.join(path, "*.xml")).each do |file|
process_file(file, all_records)
end
elsif File.file?(path) && path.end_with?(".xml")
process_file(path, all_records)
else
puts "Warning: Skipping invalid path: #{path}"
end
end
records_by_url = {}
all_records.each do |record|
urls = record[:record].location.flat_map { |loc| loc.url.map(&:content) }.compact
unless urls.any?
puts "Warning: Record without URL found in file: #{record[:file]}"
next
end
urls.each do |url|
records_by_url[url] ||= []
records_by_url[url] << record
end
end
duplicate_count = 0
records_by_url.each do |url, records|
next unless records.size > 1
duplicate_count += 1
puts "Duplicate set ##{duplicate_count} found for URL: #{url}"
records.combination(2).each_with_index do |(record1, record2), index|
puts " Comparison #{index + 1}:"
puts " File 1: #{record1[:file]}"
puts " File 2: #{record2[:file]}"
print_differences(
record1[:record],
record2[:record],
options[:show_unchanged],
options[:highlight_diff],
color_enabled?
)
puts "\n"
end
end
end
|