Module: StructuredCsv::Csv2Yaml
- Defined in:
- lib/structured_csv/csv2yaml.rb
Constant Summary collapse
- CAST_DEFAULT_TYPE =
"string".freeze
Class Method Summary collapse
- .cast_type(value, type_in_string) ⇒ Object
- .convert(csv_filename) ⇒ Object
- .get_portion(csv, section_name) ⇒ Object
- .is_row_empty?(row) ⇒ Boolean
- .is_start_of_portion?(row, section_name) ⇒ Boolean
-
.normalize_namespaces(hash) ⇒ Object
Structure all child hashes if the key is namespaced.
- .parse_data(rows, data_meta) ⇒ Object
- .parse_metadata(rows) ⇒ Object
- .split_header_key_type(header_field) ⇒ Object
Class Method Details
.cast_type(value, type_in_string) ⇒ Object
78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
# File 'lib/structured_csv/csv2yaml.rb', line 78 def self.cast_type(value, type_in_string) return if value.nil? type = type_in_string.downcase case type when "boolean" case value when /\A *true *\z/i true when /\A *false *\z/i false end when "integer" value.to_s.strip.to_i when "string" value.to_s.strip when /^array\{(.*)\}/ val_type = Regexp.last_match[1] || CAST_DEFAULT_TYPE value.split(";").map do |v| # warn "cast type as #{v}, #{val_type.to_s}" cast_type(v, val_type.to_s) end else value.to_s end end |
.convert(csv_filename) ⇒ Object
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 |
# File 'lib/structured_csv/csv2yaml.rb', line 199 def self.convert(csv_filename) raw_data = StructuredCsv::Common.load_csv(csv_filename) = get_portion(raw_data, "METADATA") data_section = get_portion(raw_data, "DATA") # warn '----------' # pp data_section[:rows] # warn '----------' { "metadata" => ([:rows]), "data" => parse_data(data_section[:rows], data_section[:meta]) } end |
.get_portion(csv, section_name) ⇒ Object
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/structured_csv/csv2yaml.rb', line 6 def self.get_portion(csv, section_name) first_row = nil last_row = -1 = {} warn "section_name #{section_name}" csv.each_with_index do |row, index| if first_row.nil? && is_start_of_portion?(row, section_name) # warn"found first" if row[1] && !row[1].empty? row[1].split(";").each do |opt| k, v = opt.split("=") [k.to_sym] = v end end first_row = index + 1 next end next unless !first_row.nil? && is_row_empty?(row) # warn "found last" last_row = index break end # warn "first #{first_row} last #{last_row}" { first_row: first_row, last_row: last_row, rows: csv[(first_row.nil? ? 0 : first_row)..last_row], meta: } end |
.is_row_empty?(row) ⇒ Boolean
50 51 52 53 54 |
# File 'lib/structured_csv/csv2yaml.rb', line 50 def self.is_row_empty?(row) row.map do |f| f.is_a?(String) ? f.strip : f end.all?(&:nil?) end |
.is_start_of_portion?(row, section_name) ⇒ Boolean
44 45 46 47 48 |
# File 'lib/structured_csv/csv2yaml.rb', line 44 def self.is_start_of_portion?(row, section_name) return false if row.first.nil? row.first.strip.to_s == section_name.to_s end |
.normalize_namespaces(hash) ⇒ Object
Structure all child hashes if the key is namespaced. e.g. { “hello.me” => data } becomes
{ "hello" => { "me" => data } }
219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 |
# File 'lib/structured_csv/csv2yaml.rb', line 219 def self.normalize_namespaces(hash) new_hash = {} hash.each_pair do |k, v| # warn"k (#{k}) v (#{v})" key_components = k.to_s.split(".") level = new_hash last_component = key_components.pop key_components.each do |component| # warn"c (#{component})" level[component] ||= {} level = level[component] end level[last_component] = v end new_hash end |
.parse_data(rows, data_meta) ⇒ Object
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 |
# File 'lib/structured_csv/csv2yaml.rb', line 126 def self.parse_data(rows, ) header = [] data_name = [:name] data_type = [:type] || "hash" data_key = [:key] base_structure = case data_type when "hash" {} when "array" [] end rows.each_with_index do |row, index| # Assume the first column is always the key if index == 0 # warn "row #{row}" header = row.map do |field| split_header_key_type(field) unless field.nil? end.compact data_key = header.first if data_type == "hash" && data_key.nil? next end # warn "header #{header.inspect}" # Skip all the empty rows next if is_row_empty?(row) # Skip if no key value next if row[0].nil? header_names = header.inject([]) do |acc, v| acc << v[:name] end row_values = [] header.each_with_index do |h, i| v = row[i] v = v.strip unless v.nil? row_values[i] = cast_type(v, h[:type]) end k = row_values[0] d = Hash[header_names[0..-1].zip(row_values[0..-1])] # .transform_keys { |k| k.to_sym } # Remove keys if they point to nil d.keys.each do |k| d.delete(k) if d[k].nil? end case data_type when "hash" unless base_structure[k].nil? warn "[WARNING] there is already data inside key [#{k}] -- maybe you should set type=array?" end base_structure[k] = normalize_namespaces(d) when "array" base_structure << normalize_namespaces(d) end end if data_name base_structure = { data_name => base_structure } end base_structure end |
.parse_metadata(rows) ⇒ Object
106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 |
# File 'lib/structured_csv/csv2yaml.rb', line 106 def self.(rows) hash = {} rows.each_with_index do |row, _index| # Skip all the empty rows next if is_row_empty?(row) name_type = split_header_key_type(row.first) key = name_type[:name] type = name_type[:type] value = cast_type(row[1], type) hash[key] = value end # warn "=============================METADATA=================" # pp hash normalize_namespaces(hash) end |
.split_header_key_type(header_field) ⇒ Object
56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
# File 'lib/structured_csv/csv2yaml.rb', line 56 def self.split_header_key_type(header_field) field_name = "" field_type = CAST_DEFAULT_TYPE # warn header_field arr = header_field.match(/\A([^\[]*)\[(.*)\]\Z/) if arr.nil? field_name = header_field else field_name = arr[1] field_type = arr[2] end { name: field_name, type: field_type } end |