Module: CSVPP::Conversions

Included in:
Parser
Defined in:
lib/csvpp/conversions.rb

Constant Summary collapse

ARRAY_TYPE_RGX =
/(?<array_type>\w+),\s*(?<array_delimiter>\W)/

Class Method Summary collapse

Class Method Details

.clean_decimal(str) ⇒ Object



142
143
144
145
146
147
148
149
150
151
152
153
154
# File 'lib/csvpp/conversions.rb', line 142

def clean_decimal(str)
  return str unless str.is_a?(String)

  val = str.strip
           .gsub(/['`\s]?/, '')               # remove thousand separators
           .sub(/[\sa-zA-Z]*$/, '')           # remove trailing words like "mg"
           .sub(/^-0*(.+)$/, '-\1')           # remove 0 after negative sign: -003 => -3
  if val =~ /^0+$/                            # remove leading zeros
    '0'
  else
    val.gsub( /^0*/, '')
  end
end

.convert(obj, to:, missings: [], **options) ⇒ Object

Returns parsed value, read from ‘obj`, interpreted as type given by `to`.

Parameters:

  • obj (Object)

    object to parse

  • to (String)

    a type, e.g. “int”

  • missings (Array) (defaults to: [])

    list of values that are treated as missings, e.g. [‘NA’, ‘-’, -999]

  • options (Hash)

    options passed on to parsing methods for specific types

Returns:

  • parsed value, read from ‘obj`, interpreted as type given by `to`



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
# File 'lib/csvpp/conversions.rb', line 15

def convert(obj, to:, missings: [], **options)
  return nil if missing?(obj, missings)

  if to.start_with?('array')
    to, rest = to.split('<')
    rest = rest.tr('>', '')
    match = rest.match(ARRAY_TYPE_RGX)
    options = options.merge(
      type: match[:array_type],
      delimiter: match[:array_delimiter]
    )
  end

  send("parse_#{to}", obj, **options)
end

.missing?(obj, missings) ⇒ Boolean

Returns:

  • (Boolean)


138
139
140
# File 'lib/csvpp/conversions.rb', line 138

def missing?(obj, missings)
  missings.map(&:to_s).include?(obj.to_s)
end

.parse_array(str, type:, delimiter:, **options) ⇒ Object



31
32
33
# File 'lib/csvpp/conversions.rb', line 31

def parse_array(str, type:, delimiter:, **options)
  str.split(delimiter).map { |entry| send("parse_#{type}", entry) }
end

.parse_boolean(str, true_values: [], false_values: [], **options) ⇒ Object

Returns true or false, or nil if ‘str` doesn’t match any value interpreted as ‘true` or `false`.

Parameters:

  • true_values (Array) (defaults to: [])

    : list of values that are interpreted as ‘true`

  • false_values (Array) (defaults to: [])

    : list of values that are interpreted as ‘false`

Returns:

  • true or false, or nil if ‘str` doesn’t match any value interpreted as ‘true` or `false`



114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
# File 'lib/csvpp/conversions.rb', line 114

def parse_boolean(str,
                  true_values: [],
                  false_values: [],
                  **options)

  cleaned = str.to_s.strip.downcase

  trues = if true_values.empty?
            ['1', 't', 'true']
          else
            true_values.map(&:to_s).map(&:downcase)
          end
  return true if trues.include? cleaned

  falses = if false_values.empty?
             ['0', 'f', 'false']
           else
             false_values.map(&:to_s).map(&:downcase)
           end
  return false if falses.include? cleaned

  nil
end

.parse_chop(str, delimiter: ':', **options) ⇒ Object



35
36
37
38
39
40
41
42
43
44
45
46
47
# File 'lib/csvpp/conversions.rb', line 35

def parse_chop(str, delimiter: ':', **options)
  code, laterality, date = str.split(delimiter)
  code = parse_string(code)
  laterality = parse_string(laterality) if laterality
  laterality = nil if laterality&.empty?
  date = parse_date(date) if date

  {
    code: code,
    laterality: laterality,
    date: date
  }
end

.parse_date(str, **options) ⇒ Object



106
107
108
# File 'lib/csvpp/conversions.rb', line 106

def parse_date(str, **options)
  Date.parse(str.to_s)
end

.parse_decimal(str, **options) ⇒ Object



94
95
96
97
98
99
100
101
102
103
104
# File 'lib/csvpp/conversions.rb', line 94

def parse_decimal(str, **options)
  return nil if str.to_s.empty?

  cleaned = clean_decimal(str).to_s

  if cleaned.empty?
    nil
  else
    BigDecimal(cleaned)
  end
end

.parse_float(str, **options) ⇒ Object



89
90
91
92
# File 'lib/csvpp/conversions.rb', line 89

def parse_float(str, **options)
  return nil if str.to_s.empty?
  Float(clean_decimal(str)) rescue nil
end

.parse_int(str, **options) ⇒ Object



73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
# File 'lib/csvpp/conversions.rb', line 73

def parse_int(str, **options)
  return nil if str.to_s.empty?

  cleaned = if str.is_a?(String)
              val = str.strip
                      .gsub(/['`\s]?/, '')      # remove thousand separators
                      .sub(/\.\d*/, '')         # remove decimal point and everything thereafter
                      .sub(/[\sa-zA-Z]*$/, '')  # remove trailing words like "mg"
                      .sub(/^-0*(.+)$/, '-\1')  # remove 0 after negative sign: -003 => -3
              val =~ /^0+$/ ? '0' : val.gsub( /^0*/, '')      # remove leading zeros
            else
              str
            end
  Integer(cleaned) rescue nil
end

.parse_medi(str, delimiter: ':', **options) ⇒ Object

See page 3 in documentation/Technisches_Begleitblatt_2017_d.pdf more more info on the medi data type.



51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/csvpp/conversions.rb', line 51

def parse_medi(str, delimiter: ':', **options)
  atc_code, annex, application, dose, unit = str.split(delimiter)
  atc_code = parse_string(atc_code)
  annex = parse_string(annex) if annex
  annex = nil if annex&.empty?
  application = parse_string(application)
  dose = parse_decimal(dose)
  unit = parse_string(unit)

  {
    atc_code: atc_code,
    annex: annex,
    application: application,
    dose: dose,
    unit: unit
  }
end

.parse_string(str, **options) ⇒ Object



69
70
71
# File 'lib/csvpp/conversions.rb', line 69

def parse_string(str, **options)
  str.to_s.strip
end