Class: Longleaf::MetadataDeserializer

Inherits:
Object
  • Object
show all
Extended by:
Logging
Defined in:
lib/longleaf/services/metadata_deserializer.rb

Overview

Service which deserializes metadata files into MetadataRecord objects

Class Method Summary collapse

Methods included from Logging

initialize_logger, initialize_logger, logger, logger

Class Method Details

.deserialize(file_path:, format: 'yaml', digest_algs: []) ⇒ Object

Deserialize a file into a MetadataRecord object

Parameters:

  • file_path (String)

    path of the file to read. Required.

  • format (String) (defaults to: 'yaml')

    format the file is stored in. Default is ‘yaml’.



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
# File 'lib/longleaf/services/metadata_deserializer.rb', line 18

def self.deserialize(file_path:, format: 'yaml', digest_algs: [])
  file_path = file_path.path if file_path.is_a?(File)

  case format
  when 'yaml'
    md = from_yaml(file_path, digest_algs)
  else
    raise ArgumentError.new("Invalid deserialization format #{format} specified")
  end

  data = Hash.new.merge(md[MDF::DATA])
  # Extract reserved properties for submission as separate parameters
  registered = data.delete(MDFields::REGISTERED_TIMESTAMP)
  deregistered = data.delete(MDFields::DEREGISTERED_TIMESTAMP)
  checksums = data.delete(MDFields::CHECKSUMS)
  file_size = data.delete(MDFields::FILE_SIZE)
  last_modified = data.delete(MDFields::LAST_MODIFIED)
  physical_path = data.delete(MDFields::PHYSICAL_PATH)

  services = md[MDF::SERVICES]
  service_records = Hash.new
  services&.each do |name, props|
    raise MetadataError.new("Value of service #{name} must be a hash") unless props.class == Hash

    service_props = Hash.new.merge(props)

    stale_replicas = service_props.delete(MDFields::STALE_REPLICAS)
    timestamp = service_props.delete(MDFields::SERVICE_TIMESTAMP)
    run_needed = service_props.delete(MDFields::RUN_NEEDED)

    service_records[name] = ServiceRecord.new(
      properties: service_props,
      stale_replicas: stale_replicas,
      timestamp: timestamp,
      run_needed: run_needed)
  end

  MetadataRecord.new(properties: data,
                     services: service_records,
                     registered: registered,
                     deregistered: deregistered,
                     checksums: checksums,
                     file_size: file_size,
                     last_modified: last_modified,
                     physical_path: physical_path)
end

.from_yaml(file_path, digest_algs) ⇒ Object

Load configuration a yaml encoded configuration file



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/longleaf/services/metadata_deserializer.rb', line 66

def self.from_yaml(file_path, digest_algs)
  File.open(file_path, 'r:bom|utf-8') do |f|
    contents = f.read

    checksum_error = nil
    begin
      verify_digests(file_path, contents, digest_algs)
    rescue ChecksumMismatchError => err
      # Hold onto the checksum error, in case we can identify the underlying cause
      checksum_error = err
    end

    begin
      md = nil
      begin
        md = YAML.safe_load(contents, [], [], true)
      rescue => err
        raise MetadataError.new("Failed to parse metadata file #{file_path}: #{err.message}")
      end

      validation_result = MetadataValidator.new(md).validate_config
      if !validation_result.valid?
        if checksum_error.nil?
          raise MetadataError.new("Invalid metadata file #{file_path.to_s}:\n#{validation_result.errors.join("\n")}")
        else
          raise MetadataError.new(validation_result.errors.join("\n"))
        end
      end

      # Either return the valid metadata, or raise the checksum error as is
      if checksum_error.nil?
        md
      else
        raise checksum_error
      end
    rescue MetadataError => err
      if checksum_error.nil?
        raise err
      else
        # Add underlying cause from the metadata error to the checksum mismatch error
        msg = checksum_error.message + "\nWith related issue(s):\n#{err.message}"
        raise ChecksumMismatchError.new(msg)
      end
    end
  end
end

.verify_digests(file_path, contents, digest_algs) ⇒ Object



113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/longleaf/services/metadata_deserializer.rb', line 113

def self.verify_digests(file_path, contents, digest_algs)
  return if digest_algs.nil? || digest_algs.empty?

  digest_algs.each do |alg|
    if file_path.respond_to?(:path)
      path = file_path.path
    else
      path = file_path
    end
    digest_path = "#{path}.#{alg}"
    unless File.exist?(digest_path)
      logger.warn("Missing expected #{alg} digest for #{path}")
      next
    end

    digest = DigestHelper::start_digest(alg)
    result = digest.hexdigest(contents)
    existing_digest = IO.read(digest_path)

    if result == existing_digest
      logger.info("Metadata fixity check using algorithm '#{alg}' succeeded for file #{path}")
    else
      raise ChecksumMismatchError.new("Metadata digest of type #{alg} did not match the contents of #{path}:" \
                                      + " expected #{existing_digest}, calculated #{result}")
    end
  end
end