Class: Inferno::Terminology::Loader

Inherits:
Object
  • Object
show all
Defined in:
lib/inferno/terminology/loader.rb

Constant Summary collapse

SKIP_SYS =
[
  'http://hl7.org/fhir/ValueSet/message-events', # has 0 codes
  'http://hl7.org/fhir/ValueSet/care-team-category', # has 0 codes
  'http://hl7.org/fhir/ValueSet/action-participant-role', # has 0 codes
  'http://hl7.org/fhir/ValueSet/example-filter', # has fake property acme-plasma
  'http://hl7.org/fhir/ValueSet/all-distance-units', # UCUM filter "canonical"
  'http://hl7.org/fhir/ValueSet/all-time-units', # UCUM filter "canonical"
  'http://hl7.org/fhir/ValueSet/example-intensional', # Unhandled filter parent =
  'http://hl7.org/fhir/ValueSet/use-context', # ValueSet contains an unknown ValueSet
  'http://hl7.org/fhir/ValueSet/media-modality', # ValueSet contains an unknown ValueSet
  'http://hl7.org/fhir/ValueSet/example-hierarchical', # Example valueset with fake codes
  # We don't perform code system validation, this code system is no longer
  # in UMLS, and this bloom filter was not being correctly generated anyway
  'http://unitsofmeasure.org'
].freeze

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

.validators_repoObject (readonly)

Returns the value of attribute validators_repo.



43
44
45
# File 'lib/inferno/terminology/loader.rb', line 43

def validators_repo
  @validators_repo
end

.value_sets_repoObject (readonly)

Returns the value of attribute value_sets_repo.



43
44
45
# File 'lib/inferno/terminology/loader.rb', line 43

def value_sets_repo
  @value_sets_repo
end

Class Method Details

.add_alternative_code_system_names(code_systems) ⇒ Object



55
56
57
58
59
60
61
62
63
64
# File 'lib/inferno/terminology/loader.rb', line 55

def add_alternative_code_system_names(code_systems)
  code_systems << 'urn:oid:2.16.840.1.113883.6.285' if code_systems.include? 'http://www.cms.gov/Medicare/Coding/HCPCSReleaseCodeSets'
  code_systems << 'urn:oid:2.16.840.1.113883.6.13' if code_systems.include? 'http://ada.org/cdt'
  if code_systems.include? 'http://www.ada.org/cdt'
    code_systems << 'http://ada.org/cdt'
    code_systems << 'urn:oid:2.16.840.1.113883.6.13'
  end
  code_systems << 'urn:oid:2.16.840.1.113883.6.101' if code_systems.include? 'http://nucc.org/provider-taxonomy'
  code_systems.uniq!
end

.add_value_set_from_file(vs_file) ⇒ Object



307
308
309
310
311
312
# File 'lib/inferno/terminology/loader.rb', line 307

def add_value_set_from_file(vs_file)
  vs = ValueSet.new(@db)
  vs.read_value_set(vs_file)
  value_sets_repo.insert(vs)
  vs
end

.bloom_file_name(codesystem) ⇒ Object



326
327
328
329
330
331
332
# File 'lib/inferno/terminology/loader.rb', line 326

def bloom_file_name(codesystem)
  system = codesystem.tr('|', '_')
  uri = URI(system)
  return (uri.host + uri.path).gsub(%r{[./]}, '_') if uri.host && uri.port

  system.gsub(/\W/, '_')
end

.create_code_system_metadata(system_urls, root_dir) ⇒ Object



161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
# File 'lib/inferno/terminology/loader.rb', line 161

def (system_urls, root_dir)
  vs = ValueSet.new(@db)
   = "#{root_dir}/metadata.yml"
   =
    if File.file? 
      YAML.load_file()
    else
      {}
    end
  system_urls.each do |url|
    abbreviation = vs.umls_abbreviation(url)
    next if abbreviation.nil?

    versions = @db.execute("SELECT SVER FROM mrsab WHERE RSAB='#{abbreviation}' AND SABIN='Y'").flatten
    restriction_level = @db.execute(
      "SELECT SRL FROM mrsab WHERE RSAB='#{abbreviation}' AND SABIN='Y'"
    ).flatten.first
     = [url] || vs.(url).dup || {}
    [:versions] ||= []
    [:versions].concat(versions).uniq!
    [:restriction_level] = restriction_level

    [url] = 
  end

  File.write(, .to_yaml)
end

.create_validators(type: :bloom, minimum_binding_strength: 'example', include_umls: true, delete_existing: true) ⇒ Object

Creates the valueset validators, based on the passed in parameters and the value_sets_repo

Parameters:

  • type (Symbol) (defaults to: :bloom)

    the type of validators to create, either :bloom or :csv

  • minimum_binding_strength (String) (defaults to: 'example')

    the lowest binding strength for which we should build validators

  • include_umls (Boolean) (defaults to: true)

    a flag to determine if we should build validators that require UMLS

  • delete_existing (Boolean) (defaults to: true)

    a flag to determine whether any existing validators of ‘type` should be deleted before the creation tasks run. Default to `true`. If `false`, the existing validators will be read in and combined with the validators created in this step.



79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# File 'lib/inferno/terminology/loader.rb', line 79

def create_validators(
  type: :bloom,
  minimum_binding_strength: 'example',
  include_umls: true,
  delete_existing: true
)
  strengths = ['example', 'preferred', 'extensible', 'required'].drop_while do |s|
    s != minimum_binding_strength
  end
  umls_code_systems = Set.new(ValueSet::SAB.keys)
  root_dir = "resources/terminology/validators/#{type}"

  FileUtils.rm_r(root_dir, force: true) if delete_existing
  FileUtils.mkdir_p(root_dir)

  vs_validators = get_value_sets(strengths).map do |vs_url, vs|
    next if SKIP_SYS.include? vs_url
    next if !include_umls && !umls_code_systems.disjoint?(Set.new(vs.included_code_systems))

    Inferno.logger.debug "Processing #{vs_url}"
    filename = "#{root_dir}/#{(URI(vs.url).host + URI(vs.url).path).gsub(%r{[./]}, '_')}"
    begin
      # Save the validator to file, and get the "new" count of number of codes
      new_count = save_to_file(vs.value_set, filename, type)
      code_systems = vs.all_included_code_systems
      Inferno.logger.debug "  #{new_count} codes"
      next if new_count.zero?

      add_alternative_code_system_names(code_systems)
      {
        url: vs_url,
        file: name_by_type(File.basename(filename), type),
        count: new_count,
        type: type.to_s,
        code_systems:
      }
    rescue UnknownCodeSystemException,
           FilterOperationException,
           UnknownValueSetException,
           URI::InvalidURIError => e
      Inferno.logger.warn "#{e.message} for ValueSet: #{vs_url}"
      next
    end
  end
  vs_validators.compact!

  code_systems = vs_validators.flat_map { |validator| validator[:code_systems] }.uniq
  vs = ValueSet.new(@db)

  cs_validators = code_systems.map do |cs_name|
    next if SKIP_SYS.include? cs_name
    next if !include_umls && umls_code_systems.include?(cs_name)

    Inferno.logger.debug "Processing #{cs_name}"
    begin
      cs = vs.code_system_set(cs_name)
      filename = "#{root_dir}/#{bloom_file_name(cs_name)}"
      new_count = save_to_file(cs, filename, type)

      {
        url: cs_name,
        file: name_by_type(File.basename(filename), type),
        count: new_count,
        type: type.to_s,
        code_systems: cs_name
      }
    rescue UnknownCodeSystemException,
           FilterOperationException,
           UnknownValueSetException,
           URI::InvalidURIError => e
      Inferno.logger.warn "#{e.message} for CodeSystem #{cs_name}"
      next
    end
  end.compact
  validators = (vs_validators + cs_validators).compact

  # Write manifest for loading later
  File.write("#{root_dir}/manifest.yml", validators.to_yaml)

  (cs_validators.map { |validator| validator[:url] }, root_dir)
end

.get_value_sets(strengths) ⇒ Object

NOTE: resources/value_sets.yml controls which value sets get loaded. It is currently manually generated from the US Core metadata.



216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
# File 'lib/inferno/terminology/loader.rb', line 216

def get_value_sets(strengths)
  expected_vs_urls =
    value_sets_to_load
      .select { |vs| strengths.include? vs[:strength] }
      .map! { |vs| vs[:system] }
      .compact
      .uniq

  available_value_sets = value_sets_repo.select_by_url(expected_vs_urls)

  # Throw an error message for each missing valueset
  # But don't halt the rake task
  (expected_vs_urls - available_value_sets.keys).each do |missing_vs_url|
    Inferno.logger.error "Inferno doesn't know about valueset #{missing_vs_url}"
  end
  available_value_sets
end

.load_validators(directory = 'resources/terminology/validators/bloom') ⇒ Object



314
315
316
317
318
319
320
321
322
323
324
# File 'lib/inferno/terminology/loader.rb', line 314

def load_validators(directory = 'resources/terminology/validators/bloom')
  manifest_file = "#{directory}/manifest.yml"
  return unless File.file? manifest_file

   = YAML.load_file(manifest_file)
  .each do ||
    [:bloom_filter] =
      Bloomer::Scalable.from_msgpack(File.read("#{directory}/#{[:file]}"))
    validators_repo.insert(Validator.new())
  end
end

.load_value_sets_from_directory(directory, include_subdirectories = false) ⇒ Object

rubocop:disable Style/OptionalBooleanParameter



45
46
47
48
49
50
51
52
53
# File 'lib/inferno/terminology/loader.rb', line 45

def load_value_sets_from_directory(directory, include_subdirectories = false) # rubocop:disable Style/OptionalBooleanParameter
  directory += '/**/' if include_subdirectories
  value_set_files = Dir["#{directory}/*.json"]
  value_set_files.each do |vs_file|
    next unless JSON.parse(File.read(vs_file))['resourceType'] == 'ValueSet'

    add_value_set_from_file(vs_file)
  end
end

.missing_validatorsObject



334
335
336
337
338
339
340
341
342
# File 'lib/inferno/terminology/loader.rb', line 334

def missing_validators
  return @missing_validators if @missing_validators

  required_value_sets =
    value_sets_repo
      .select_by_binding_strength(['required', 'extensible', 'preferred'])
      .map(&:value_set_url)
  @missing_validators = required_value_sets.compact - validators_repo.all_urls
end

.name_by_type(filename, type) ⇒ Object



251
252
253
254
255
256
257
258
259
260
# File 'lib/inferno/terminology/loader.rb', line 251

def name_by_type(filename, type)
  case type
  when :bloom
    "#{filename}.msgpack"
  when :csv
    "#{filename}.csv"
  else
    raise 'Unknown Validator Type!'
  end
end

.register_umls_db(database) ⇒ Object



302
303
304
305
# File 'lib/inferno/terminology/loader.rb', line 302

def register_umls_db(database)
  FileUtils.mkdir_p File.dirname(database)
  @db = SQLite3::Database.new database
end

.save_bloom_to_file(codings, filename) ⇒ Object

Saves the valueset bloomfilter to a msgpack file

Parameters:

  • filename (String)

    the name of the file



265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
# File 'lib/inferno/terminology/loader.rb', line 265

def save_bloom_to_file(codings, filename)
  # If the file already exists, load it in
  bloom_filter =
    if File.file? filename
      Bloomer::Scalable.from_msgpack(File.read(filename))
    else
      Bloomer::Scalable.create_with_sufficient_size(codings.length)
    end
  codings.each do |coding|
    bloom_filter.add_without_duplication("#{coding[:system]}|#{coding[:code]}")
  end
  bloom_file = File.new(filename, 'wb')
  bloom_file.write(bloom_filter.to_msgpack) unless bloom_filter.nil?

  bloom_filter.count
end

.save_csv_to_file(codings, filename) ⇒ Object

Saves the valueset to a csv

Parameters:

  • filename (String)

    the name of the file



284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
# File 'lib/inferno/terminology/loader.rb', line 284

def save_csv_to_file(codings, filename)
  # If the file already exists, add it to the Set
  csv_set = Set.new
  if File.file? filename
    CSV.read(filename).each do |code_array|
      csv_set.add({ code: code_array[1], system: code_array[0] })
    end
  end
  codings.merge csv_set

  CSV.open(filename, 'wb') do |csv|
    codings.each do |coding|
      csv << [coding[:system], coding[:code]]
    end
  end
  codings.length
end

.save_new_value_set_listObject

Run this method in an inferno console to update the list of value set bindings. This is not done automatically during the build because Inferno isn’t loaded during the build process.



197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
# File 'lib/inferno/terminology/loader.rb', line 197

def save_new_value_set_list
   =
    USCoreTestKit::USCoreV311::USCoreTestSuite. +
    USCoreTestKit::USCoreV400::USCoreTestSuite. +
    USCoreTestKit::USCoreV501::USCoreTestSuite. +
    USCoreTestKit::USCoreV610::USCoreTestSuite. +
    USCoreTestKit::USCoreV700::USCoreTestSuite.

   =
    
      .flat_map { || .bindings.map { |bind| bind.merge(profile_url: .profile_url) } }
      .select { || [:strength] == 'required' }
      .uniq

  File.write(File.join('resources', 'value_sets.yml'), .to_yaml)
end

.save_to_file(codeset, filename, type) ⇒ Object

Chooses which filetype to save the validator as, based on the type variable passed in



235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
# File 'lib/inferno/terminology/loader.rb', line 235

def save_to_file(codeset, filename, type)
  if codeset.blank?
    Inferno.logger.debug "Unable to save #{filename} because it contains no codes"
    return 0
  end

  case type
  when :bloom
    save_bloom_to_file(codeset, name_by_type(filename, type))
  when :csv
    save_csv_to_file(codeset, name_by_type(filename, type))
  else
    raise 'Unknown Validator Type!'
  end
end

.value_sets_to_loadObject



189
190
191
192
# File 'lib/inferno/terminology/loader.rb', line 189

def value_sets_to_load
  @value_sets_to_load ||=
    YAML.load_file(File.join('resources', 'value_sets.yml'))
end