Class: Gtin2atc::Builder

Inherits:
Object
  • Object
show all
Defined in:
lib/gtin2atc/builder.rb

Constant Summary collapse

Strip_For_Sax_Machine =
'<?xml version="1.0" encoding="utf-8"?>'+"\n"

Instance Method Summary collapse

Constructor Details

#initialize(opts) ⇒ Builder

Returns a new instance of Builder.



11
12
13
14
15
16
17
18
19
20
# File 'lib/gtin2atc/builder.rb', line 11

def initialize(opts)
  Util.set_logging(opts[:log])
  @do_compare = opts[:compare]
  @gen_reports = opts[:compare] and opts[:full]
  Util.debug_msg "Builder: opts are #{opts} @do_compare is #{@do_compare}"
  @data_swissmedic = {}
  @data_bag = {}
  @data_swissindex = {}
  @bag_entries_without_gtin = 0
end

Instance Method Details

#bag_xml_extractorObject



78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
# File 'lib/gtin2atc/builder.rb', line 78

def bag_xml_extractor
  data = {}
  @bag = BagDownloader.new
  xml = @bag.download
  Util.debug_msg "bag_xml_extractor xml is #{xml.size} bytes long"

  result = PreparationsEntry.parse(xml.sub(Strip_For_Sax_Machine, ''), :lazy => true)
  @bag_entries_without_gtin = 0
  result.Preparations.Preparation.each do |seq|
    item = {}
    item[:atc_code]     = (atcc = seq.AtcCode)       ? atcc : ''
    seq.Packs.Pack.each do |pac|
      gtin = pac.GTIN
      if gtin
        gtin = gtin.to_i
        item[:gtin] = gtin
        item[:name] = seq.NameDe + " " +  pac.DescriptionDe
        data[gtin] = item
        Util.debug_msg "run_bag_extractor add #{item}" if $VERBOSE
      else
        @bag_entries_without_gtin += 1
        Util.debug_msg "run_bag_extractor skip phar #{seq.NameDe}: #{seq.DescriptionDe} without gtin."
      end
    end
  end
  Util.debug_msg "bag_xml_extractor extracted #{data.size} items. Skipped #{@bag_entries_without_gtin} entries without gtin"
  data
end

#calc_checksum(str) ⇒ Object



21
22
23
24
25
26
27
28
29
30
# File 'lib/gtin2atc/builder.rb', line 21

def calc_checksum(str)
  str = str.strip
  sum = 0
  val =   str.split(//u)
  12.times do |idx|
    fct = ((idx%2)*2)+1
    sum += fct*val[idx].to_i
  end
  ((10-(sum%10))%10).to_s
end

#check_bagObject

require ‘pry’;



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
# File 'lib/gtin2atc/builder.rb', line 147

def check_bag
  matching_atc_codes = []

  not_in_swissmedic = []
  match_in_swissmedic = []
  shorter_in_swissmedic = []
  longer_in_swissmedic = []
  different_atc_in_swissmedic = []

  not_in_swissindex = []
  match_in_swissindex = []
  shorter_in_swissindex = []
  longer_in_swissindex = []
  different_atc_in_swissindex = []
  j = 0
  @data_bag.each{
    |gtin, item|
    atc_code = item[:atc_code]
    j += 1
    Util.debug_msg "#{gtin}: j #{j} checking #{atc_code} in #{item}"
    if @data_swissmedic[gtin] and @data_swissindex[gtin] and
      atc_code == @data_swissmedic[gtin][:atc_code] and
      atc_code == @data_swissindex[gtin][:atc_code]
      matching_atc_codes << "#{gtin}: matching_atc_codes swissindex #{item} #{@data_swissmedic[gtin][:atc_code]} and #{@data_swissindex[gtin][:atc_code]}"
      next
    end

    if not @data_swissindex[gtin]
      not_in_swissindex << "#{gtin}: Not in swissindex #{item}"
    elsif atc_code == @data_swissindex[gtin][:atc_code]
      match_in_swissindex << "swissindex #{gtin}: ATC code #{atc_code} matches swissindex  #{@data_swissindex[gtin][:atc_code]}"
    elsif atc_code.length < @data_swissindex[gtin][:atc_code].length
      longer_in_swissindex << "swissindex #{gtin}: ATC code #{atc_code} longer in swissindex  #{@data_swissindex[gtin][:atc_code]}"
    elsif atc_code.length > @data_swissindex[gtin][:atc_code].length
      shorter_in_swissindex << "swissindex #{gtin}: ATC code #{atc_code} shorter in swissindex  #{@data_swissindex[gtin][:atc_code]}"
    else
      matching_atc_codes << "swissindex #{gtin}: ATC code #{atc_code} differs from swissindex  #{@data_swissindex[gtin][:atc_code]}"
    end

    if not @data_swissmedic[gtin]
      not_in_swissmedic <<  "#{gtin}: Not in swissmedic #{item}"
    elsif atc_code == @data_swissmedic[gtin][:atc_code]
      match_in_swissmedic << "swissmedic #{gtin}: ATC code #{atc_code} matches swissmedic  #{@data_swissmedic[gtin][:atc_code]}"
    elsif atc_code.length < @data_swissmedic[gtin][:atc_code].length
      longer_in_swissmedic << "swissmedic #{gtin}: ATC code #{atc_code} longer in swissmedic  #{@data_swissmedic[gtin][:atc_code]}"
    elsif atc_code.length > @data_swissmedic[gtin][:atc_code].length
      shorter_in_swissmedic << "swissmedic #{gtin}: ATC code #{atc_code} shorter in swissmedic  #{@data_swissmedic[gtin][:atc_code]}"
    else
      different_atc_in_swissmedic << "swissmedic #{gtin}: ATC code #{atc_code} differs from swissmedic  #{@data_swissmedic[gtin][:atc_code]}"
    end
    total1 = not_in_swissindex + match_in_swissindex + longer_in_swissindex +  shorter_in_swissindex + different_atc_in_swissindex
    total2 = not_in_swissmedic + match_in_swissmedic + longer_in_swissmedic +  shorter_in_swissmedic + different_atc_in_swissmedic
    # binding.pry if j != (total1 + matching_atc_codes)
    # binding.pry if j != (total2 + matching_atc_codes)
    # Util.debug_msg "#{gtin}: j #{j} finished #{total1} #{total2} #{atc_code} matching_atc_codes #{matching_atc_codes}"
  }
  Util.info  "Result of verifing data from bag (SL):
  bag-data fetched from #{@bag.origin}.
  bag had #{@data_bag.size} entries
  #{@bag_entries_without_gtin.size} entries had no GTIN field
  Not in swissmedic #{not_in_swissmedic.size}
  Not in swissindex #{not_in_swissindex.size}
"
  Util.info "Comparing ATC-Codes between bag and swissmedic"
  report('compare_bag_to_swissmedic', 'items had the same ATC code in bag, swissindex and swissmedic', matching_atc_codes)
  report('compare_bag_to_swissmedic', 'are the same in swissmedic and bag, but not in swissindex', match_in_swissmedic)
  report('compare_bag_to_swissmedic', 'are different in swissmedic and bag', different_atc_in_swissmedic)
  report('compare_bag_to_swissmedic', 'are shorter in swissmedic than in bag', shorter_in_swissmedic)
  report('compare_bag_to_swissmedic', 'are longer in swissmedic than in bag', longer_in_swissmedic)

  Util.info "Comparing ATC-Codes between bag and swissindex"
  report('compare_bag_to_swissindex', 'items had the same ATC code in bag, swissindex and swissmedic', matching_atc_codes)
  report('compare_bag_to_swissindex', 'are the same in swissindex and bag, but not in swissmedic', match_in_swissindex)
  report('compare_bag_to_swissindex', 'are different in swissmedic and bag', different_atc_in_swissindex)
  report('compare_bag_to_swissindex', 'are shorter in swissindex than in bag', shorter_in_swissindex)
  report('compare_bag_to_swissindex', 'are longer in swissindex than in bag', longer_in_swissindex)
end

#check_swissmedicObject



235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
# File 'lib/gtin2atc/builder.rb', line 235

def check_swissmedic
  matching = []
  not_in_bag = []
  not_in_swissindex = []
  matching_atc_codes = []
  shorter_in_swissmedic = []
  longer_in_swissindex = []
  different_atc = []
  @data_swissmedic.each{
    |gtin, item|
    atc_code = item[:atc_code]
    if @data_bag[gtin] and
      @data_swissmedic[gtin] and
      @data_bag[gtin] and
      atc_code.eql?(@data_bag[gtin][1]) and
      atc_code.eql?(@data_swissindex[gtin][1])
      matching << "#{gtin} #{atc_code} #{@data_swissmedic[gtin][1]} match in bag, swissmedic and swissindex"
      next
    end
    unless @data_swissindex[gtin]
      not_in_swissindex << "Swissmedic #{gtin}: Not in swissindex #{item}"
      next
    end
    if item[:atc_code] == @data_swissindex[gtin][:atc_code]
      matching_atc_codes << "swissindex #{gtin}: ATC code #{item[:atc_code]} matches swissindex  #{@data_swissindex[gtin][:atc_code]}"
    elsif item[:atc_code].length < @data_swissindex[gtin][:atc_code].length
      longer_in_swissindex << "swissindex #{gtin}: ATC code #{item[:atc_code]} longer  in swissindex  #{@data_swissindex[gtin][:atc_code]}"
    elsif item[:atc_code].length > @data_swissindex[gtin][:atc_code].length
      shorter_in_swissmedic << "swissindex #{gtin}: ATC code #{item[:atc_code]} shorter in swissindex  #{@data_swissindex[gtin][:atc_code]}"
    else
      different_atc << "swissindex #{gtin}: ATC code #{item[:atc_code]} differs from swissindex  #{@data_swissindex[gtin][:atc_code]}"
    end
    unless @data_bag[gtin]
      not_in_bag << "#{gtin}: Not in bag #{item}"
      next
    end
  }
  Util.info  "Result of verifing data from swissmedic:
  swissmedic had #{@data_swissmedic.size} entries. Fetched from #{@swissmedic.origin}
  swissindex #{@data_swissindex.size} entries. Fetched from #{@swissindex.origin}
  bag #{@data_bag.size} entries. #{@bag_entries_without_gtin.size} entries had no GTIN field. Fetched from #{@bag.origin}
  Matching #{matching.size} items.
  Not in bag #{not_in_bag.size}
  Not in swissindex #{not_in_swissindex.size}
  Comparing ATC-Codes between swissmedic and swissindex
"
   report('swissmedic', 'match in swissindex and swissmedic', matching_atc_codes)
   report('swissmedic', 'are different in swissindex and swissmedic', different_atc)
   report('swissmedic', 'are the same in swissindex and swissmedic', matching_atc_codes)
   report('swissmedic', 'are shorter in swissindex', shorter_in_swissmedic)
   report('swissmedic', 'are longer in swissindex', longer_in_swissindex)
end

#compareObject



288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
# File 'lib/gtin2atc/builder.rb', line 288

def compare
  all_gtin = @data_bag.merge(@data_swissindex).merge(@data_swissmedic).sort
  matching_atc_codes = []
  not_in_bag = []
  not_in_swissmedic = []
  not_in_swissindex = []
  different_atc = []
  all_gtin.each{
    |gtin, item|
    if @data_bag[gtin] and @data_swissindex[gtin] and @data_swissmedic[gtin] and
      @data_bag[gtin][:atc_code] == @data_swissindex[gtin][:atc_code] and
      @data_bag[gtin][:atc_code] == @data_swissindex[gtin][:atc_code]
      matching_atc_codes << "#{gtin}: ATC-Code #{@data_bag[gtin][:atc_code]} matches in bag, swissmedic and swissindex"
      next
    end
    unless @data_swissmedic[gtin]
      not_in_swissmedic << "#{gtin}: Not in swissmedic #{item}"
      next
    end
    unless @data_swissindex[gtin]
      not_in_swissindex << "#{gtin}: Not in swissindex #{item}"
      next
    end
    unless @data_bag[gtin]
      not_in_bag << "#{gtin}: Not in bag #{item}"
      next
    end
    different_atc << "#{gtin}: ATC code differs bag #{@data_bag[gtin][:atc_code]} swissindex  #{@data_swissindex[gtin][:atc_code]}"
  }
  Util.info  "Comparing all GTIN-codes:
  Found infos about #{all_gtin.size} entries
  bag #{@data_bag.size} entries. #{@bag_entries_without_gtin.size} entries had no GTIN field. Fetched from #{@bag.origin}
  swissindex #{@data_swissindex.size} entries. Fetched from #{@swissindex.origin}
  swissmedic #{@data_swissmedic.size} entries. Fetched from #{@swissmedic.origin}
"
  report('compare_all_gtins', 'items had the same ATC code in bag, swissindex and swissmedic', matching_atc_codes)
  report('compare_all_gtins', 'not in bag', not_in_bag)
  report('compare_all_gtins', 'not in swissindex', not_in_swissindex)
  report('compare_all_gtins', 'not in swissmedic', not_in_swissmedic)
  report('compare_all_gtins', 'ATC-Codes differed', different_atc)
end

#report(topic, msg, details) ⇒ Object



225
226
227
228
229
230
231
232
233
234
# File 'lib/gtin2atc/builder.rb', line 225

def report(topic, msg, details)
  full_msg = "#{topic}: #{details.size} #{msg}"
  Util.info "   #{full_msg}"
  return unless @gen_reports
  File.open((full_msg+'.txt').gsub(/[: ,]+/, '_'), 'w+') {
    |file|
    file.puts full_msg
    details.each{|detail| file.puts detail }
  }
end

#run(gtins_to_parse = []) ⇒ Object



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/gtin2atc/builder.rb', line 106

def run(gtins_to_parse=[])
  Util.debug_msg("run #{gtins_to_parse}")
  Util.debug_msg("@use_swissindex true")
  @data_swissindex = swissindex_xml_extractor
  output_name =  File.join(Util.get_archive, @do_compare ? 'gtin2atc_swissindex.csv' : 'gtin2atc.csv')
  CSV.open(output_name,'w+') do |csvfile|
    csvfile << ["gtin", "ATC", 'pharmacode', 'description']
    @data_swissindex.sort.each do |gtin, item|
      if @do_compare or gtins_to_parse.size == 0 or
          gtins_to_parse.index(gtin.to_s) or
          gtins_to_parse.index(item[:pharmacode])
        csvfile << [gtin, item[:atc_code], item[:pharmacode], item[:description]]
      end
    end
  end
  msg = "swissindex: Extracted #{gtins_to_parse.size} of #{@data_swissindex.size} items into #{output_name} for #{gtins_to_parse}"
  Util.debug_msg(msg)
  return unless @do_compare
  @data_bag = bag_xml_extractor
  output_name =  File.join(Util.get_archive, 'gtin2atc_bag.csv')
  CSV.open(output_name,'w+') do |csvfile|
    csvfile << ["gtin", "ATC", 'description']
    @data_bag.sort.each do |gtin, item|
      csvfile << [gtin, item[:atc_code], item[:description]]
    end
  end
  Util.debug_msg "bag: Extracted #{gtins_to_parse.size} of #{@data_bag.size} items into #{output_name} for #{gtins_to_parse}"
  @data_swissmedic = swissmedic_xls_extractor
  output_name =  File.join(Util.get_archive, 'gtin2atc_swissmedic.csv')
  CSV.open(output_name,'w+') do |csvfile|
    csvfile << ["gtin", "ATC", 'description']
    @data_swissmedic.sort.each do |gtin, item|
      csvfile << [gtin, item[:atc_code], item[:pharmacode], item[:description]]
    end
  end
  Util.debug_msg "swissmedic: Extracted #{@data_swissmedic.size} items into #{output_name}"
  check_bag
  check_swissmedic
  compare
end

#swissindex_xml_extractorObject



58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
# File 'lib/gtin2atc/builder.rb', line 58

def swissindex_xml_extractor
  @swissindex = SwissindexDownloader.new
  xml = @swissindex.download
  Util.debug_msg "swissindex_xml_extractor xml is #{xml.size} bytes long"
  data = {}
  result = PharmaEntry.parse(xml.sub(Strip_For_Sax_Machine, ''), :lazy => true)
  items = result.PHARMA.ITEM
  items.each do |pac|
    item = {}
    gtin = pac.GTIN ? pac.GTIN.to_i : nil
    next unless item[:gtin].to_i
    item[:gtin]            = gtin
    item[:pharmacode]      = (phar = pac.PHAR)   ? phar: ''
    item[:atc_code]        = (code = pac.ATC)    ? code.to_s : ''
    item[:description]     = pac.DSCR
    data[gtin] = item
  end
  Util.debug_msg "swissindex_xml_extractor extracted #{data.size} items"
  data
end

#swissmedic_xls_extractorObject



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/gtin2atc/builder.rb', line 31

def swissmedic_xls_extractor
  @swissmedic = SwissmedicDownloader.new
  filename = @swissmedic.download
  Util.debug_msg "swissmedic_xls_extractor xml is #{filename}"
  data = {}
  @sheet = RubyXL::Parser.parse(File.expand_path(filename)).worksheets[0]
  i_5,i_3   = 0,10 # :swissmedic_numbers
  atc       = 5    # :atc_code
  @sheet.each_with_index do |row, i|
    next if (i <= 1)
    next unless row[i_5] and row[i_3]
    no8 = sprintf('%05d',row[i_5].value.to_i) + sprintf('%03d',row[i_3].value.to_i)
    unless no8.empty?
      next if no8.to_i == 0
      item = {}
      ean_base12 = "7680#{no8}"
      gtin = (ean_base12.ljust(12, '0') + calc_checksum(ean_base12)).to_i
      item = {}
      item[:gtin]            = gtin
      item[:atc_code]         =  row[atc] ? row[atc].value.to_s : ''
      item[:name]             =  row[2].value.to_s
      data[gtin] = item
    end
  end
  Util.debug_msg "swissmedic_xls_extractor extracted #{data.size} items"
  data
end