Class: TSV::Parser
- Inherits:
-
Object
- Object
- TSV::Parser
- Defined in:
- lib/rbbt/tsv/parser.rb
Defined Under Namespace
Classes: END_PARSING, SKIP_LINE
Instance Attribute Summary collapse
-
#cast ⇒ Object
Returns the value of attribute cast.
-
#field_positions ⇒ Object
Returns the value of attribute field_positions.
-
#fields ⇒ Object
Returns the value of attribute fields.
-
#first_line ⇒ Object
Returns the value of attribute first_line.
-
#fix ⇒ Object
Returns the value of attribute fix.
-
#header_hash ⇒ Object
Returns the value of attribute header_hash.
-
#key_field ⇒ Object
Returns the value of attribute key_field.
-
#key_position ⇒ Object
Returns the value of attribute key_position.
-
#namespace ⇒ Object
Returns the value of attribute namespace.
-
#select ⇒ Object
Returns the value of attribute select.
-
#sep ⇒ Object
Returns the value of attribute sep.
-
#sep2 ⇒ Object
Returns the value of attribute sep2.
-
#serializer ⇒ Object
Returns the value of attribute serializer.
-
#straight ⇒ Object
Returns the value of attribute straight.
-
#take_all ⇒ Object
Returns the value of attribute take_all.
-
#type ⇒ Object
Returns the value of attribute type.
-
#zipped ⇒ Object
Returns the value of attribute zipped.
Instance Method Summary collapse
- #add_to_data_flat(data, keys, values) ⇒ Object
- #add_to_data_flat_merge(data, keys, values) ⇒ Object
- #add_to_data_merge(data, keys, values) ⇒ Object
- #add_to_data_merge_zipped(data, keys, values) ⇒ Object
- #add_to_data_no_merge_double(data, keys, values) ⇒ Object
- #add_to_data_no_merge_list(data, key, values) ⇒ Object
- #add_to_data_zipped(data, keys, values) ⇒ Object
- #all_fields ⇒ Object
- #cast? ⇒ Boolean
- #cast_values_double(values) ⇒ Object
- #cast_values_list(values) ⇒ Object
- #cast_values_single(value) ⇒ Object
- #chop_line(line) ⇒ Object
- #fix_fields(options) ⇒ Object
- #get_values_double(parts) ⇒ Object
- #get_values_flat(parts) ⇒ Object
- #get_values_flat_inverse(parts) ⇒ Object
- #get_values_list(parts) ⇒ Object
- #get_values_single(parts) ⇒ Object
- #get_values_single_from_flat(parts) ⇒ Object
-
#initialize(stream = nil, options = {}) ⇒ Parser
constructor
A new instance of Parser.
- #parse_header(stream) ⇒ Object
- #process(line) ⇒ Object
- #rescue_first_line ⇒ Object
- #setup(data) ⇒ Object
Constructor Details
#initialize(stream = nil, options = {}) ⇒ Parser
Returns a new instance of Parser.
317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 |
# File 'lib/rbbt/tsv/parser.rb', line 317 def initialize(stream = nil, = {}) @header_hash = Misc.(, :header_hash) || "#" @sep = Misc.(, :sep) || "\t" = parse_header(stream) = .merge @type = Misc.(, :type) || :double @sep2 = Misc.(, :sep2) || "|" @cast = Misc. , :cast; @cast = @cast.to_sym if String === @cast @type ||= Misc. , :type @fix = Misc.(, :fix) @select= Misc. , :select @zipped = Misc. , :zipped @namespace = Misc. , :namespace merge = Misc.(, :merge) merge = @zipped if merge.nil? merge = false if merge.nil? case @type when :double self.instance_eval do alias get_values get_values_double end self.instance_eval do alias cast_values cast_values_double end case when (merge and not zipped) self.instance_eval do alias add_to_data add_to_data_merge end when (merge and zipped) self.instance_eval do alias add_to_data add_to_data_merge_zipped end when zipped self.instance_eval do alias add_to_data add_to_data_zipped end else self.instance_eval do alias add_to_data add_to_data_no_merge_double end end when :single if [:type] == :flat self.instance_eval do alias get_values get_values_single_from_flat end self.instance_eval do alias cast_values cast_values_single end self.instance_eval do alias add_to_data add_to_data_no_merge_double end else self.instance_eval do alias get_values get_values_single end self.instance_eval do alias cast_values cast_values_single end self.instance_eval do alias add_to_data add_to_data_no_merge_list end end when :list self.instance_eval do alias get_values get_values_list end self.instance_eval do alias cast_values cast_values_list end self.instance_eval do alias add_to_data add_to_data_no_merge_list end when :flat @take_all = true if [:fields].nil? self.instance_eval do alias get_values get_values_flat end self.instance_eval do alias cast_values cast_values_double end if merge self.instance_eval do alias add_to_data add_to_data_flat_merge end else self.instance_eval do alias add_to_data add_to_data_flat end end end fields = [:fields] fix_fields() @straight = false if @sep != "\t" or not @cast.nil? or merge or (@type == :flat and fields) end |
Instance Attribute Details
#cast ⇒ Object
Returns the value of attribute cast.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def cast @cast end |
#field_positions ⇒ Object
Returns the value of attribute field_positions.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def field_positions @field_positions end |
#fields ⇒ Object
Returns the value of attribute fields.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def fields @fields end |
#first_line ⇒ Object
Returns the value of attribute first_line.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def first_line @first_line end |
#fix ⇒ Object
Returns the value of attribute fix.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def fix @fix end |
#header_hash ⇒ Object
Returns the value of attribute header_hash.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def header_hash @header_hash end |
#key_field ⇒ Object
Returns the value of attribute key_field.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def key_field @key_field end |
#key_position ⇒ Object
Returns the value of attribute key_position.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def key_position @key_position end |
#namespace ⇒ Object
Returns the value of attribute namespace.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def namespace @namespace end |
#select ⇒ Object
Returns the value of attribute select.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def select @select end |
#sep ⇒ Object
Returns the value of attribute sep.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def sep @sep end |
#sep2 ⇒ Object
Returns the value of attribute sep2.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def sep2 @sep2 end |
#serializer ⇒ Object
Returns the value of attribute serializer.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def serializer @serializer end |
#straight ⇒ Object
Returns the value of attribute straight.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def straight @straight end |
#take_all ⇒ Object
Returns the value of attribute take_all.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def take_all @take_all end |
#type ⇒ Object
Returns the value of attribute type.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def type @type end |
#zipped ⇒ Object
Returns the value of attribute zipped.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def zipped @zipped end |
Instance Method Details
#add_to_data_flat(data, keys, values) ⇒ Object
149 150 151 152 153 |
# File 'lib/rbbt/tsv/parser.rb', line 149 def add_to_data_flat(data, keys, values) keys.each do |key| data[key] = values.flatten unless data.include? key end end |
#add_to_data_flat_merge(data, keys, values) ⇒ Object
155 156 157 158 159 160 161 162 163 |
# File 'lib/rbbt/tsv/parser.rb', line 155 def add_to_data_flat_merge(data, keys, values) keys.each do |key| if data.include? key data[key] = data[key].concat values.flatten else data[key] = values.flatten end end end |
#add_to_data_merge(data, keys, values) ⇒ Object
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 |
# File 'lib/rbbt/tsv/parser.rb', line 171 def add_to_data_merge(data, keys, values) keys.uniq.each do |key| if data.include? key #data[key] = data[key].zip(values).collect do |old, new| # old.concat new # old #end new = data[key] new.each_with_index do |old, i| old.concat values[i] end data[key] = new else data[key] = values end end end |
#add_to_data_merge_zipped(data, keys, values) ⇒ Object
189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 |
# File 'lib/rbbt/tsv/parser.rb', line 189 def add_to_data_merge_zipped(data, keys, values) num = keys.length if values.first.length > 1 and num == 1 keys = keys * values.first.length num = keys.length end values = values.collect{|v| v.length != num ? [v.first] * num : v} all = values.unshift keys Misc.zip_fields(all).each do |values| key = values.shift if data.include? key data[key] = data[key].zip(values).collect do |old, new| old.push new old end else data[key] = values.collect{|v| [v]} end end end |
#add_to_data_no_merge_double(data, keys, values) ⇒ Object
165 166 167 168 169 |
# File 'lib/rbbt/tsv/parser.rb', line 165 def add_to_data_no_merge_double(data, keys, values) keys.each do |key| data[key] = values unless data.include? key end end |
#add_to_data_no_merge_list(data, key, values) ⇒ Object
145 146 147 |
# File 'lib/rbbt/tsv/parser.rb', line 145 def add_to_data_no_merge_list(data, key, values) data[key] = values unless data.include? key end |
#add_to_data_zipped(data, keys, values) ⇒ Object
212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 |
# File 'lib/rbbt/tsv/parser.rb', line 212 def add_to_data_zipped(data, keys, values) num = keys.length if values.first.length > 1 and num == 1 keys = keys * values.first.length num = keys.length end values = values.collect{|v| v.length != num ? [v.first] * num : v} all = values.unshift keys Misc.zip_fields(all).each do |values| key = values.shift next if data.include? key data[key] = values.collect{|v| [v]} end end |
#all_fields ⇒ Object
9 10 11 12 |
# File 'lib/rbbt/tsv/parser.rb', line 9 def all_fields all = [key_field] + fields NamedArray.setup all, all end |
#cast? ⇒ Boolean
58 59 60 |
# File 'lib/rbbt/tsv/parser.rb', line 58 def cast? !! @cast end |
#cast_values_double(values) ⇒ Object
248 249 250 251 252 253 254 255 |
# File 'lib/rbbt/tsv/parser.rb', line 248 def cast_values_double(values) case when Symbol === cast values.collect{|list| list.collect{|v| v.send(cast)}} when Proc === cast values.collect{|list| list.collect{|v| cast.call v }} end end |
#cast_values_list(values) ⇒ Object
239 240 241 242 243 244 245 246 |
# File 'lib/rbbt/tsv/parser.rb', line 239 def cast_values_list(values) case when Symbol === cast values.collect{|v| v.send(cast)} when Proc === cast values.collect{|v| cast.call v} end end |
#cast_values_single(value) ⇒ Object
230 231 232 233 234 235 236 237 |
# File 'lib/rbbt/tsv/parser.rb', line 230 def cast_values_single(value) case when Symbol === cast value.send(cast) when Proc === cast cast.call value end end |
#chop_line(line) ⇒ Object
62 63 64 |
# File 'lib/rbbt/tsv/parser.rb', line 62 def chop_line(line) line.split(@sep, -1) end |
#fix_fields(options) ⇒ Object
261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 |
# File 'lib/rbbt/tsv/parser.rb', line 261 def fix_fields() key_field = Misc. , :key_field fields = Misc. , :fields if (key_field.nil? or key_field == 0 or key_field == :key) and (fields.nil? or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a)) @straight = true return else @straight = false case when (key_field.nil? or (not Integer === key_field and @key_field.nil?) or key_field == @key_field or key_field == 0) @key_position = 0 when Integer === key_field @key_position = key_field when String === key_field @key_position = @fields.dup.unshift(@key_field).index key_field raise "Key field #{ key_field } was not found" if @key_position.nil? else raise "Format of key_field not understood: #{key_field.inspect}" end if (fields.nil? or (not (Array === fields and Integer === fields.first) and @fields.nil?) or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a)) if not @fields.nil? and type != :flat @field_positions = (0..@fields.length).to_a @field_positions.delete @key_position end else fields = [fields] if not Array === fields @field_positions = fields.collect{|field| case when Integer === field field when String === field pos = @fields.dup.unshift(@key_field).index field raise "Field not identified: #{ field }" if pos.nil? pos else raise "Format of fields not understood: #{fields.inspect}" end } end new_key_field = @fields.dup.unshift(@key_field)[@key_position] if not @fields.nil? @fields = @fields.dup.unshift(@key_field).values_at *@field_positions if not @fields.nil? and not @field_positions.nil? @fields ||= fields if Array === fields and String === fields.first @fields = [@key_field] if new_key_field != @key_field and type == :flat and @field_positions.nil? @key_field = new_key_field @key_field ||= key_field if String === key_field end end |
#get_values_double(parts) ⇒ Object
100 101 102 103 104 105 106 107 108 109 110 111 112 |
# File 'lib/rbbt/tsv/parser.rb', line 100 def get_values_double(parts) return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)} if field_positions.nil? and key_position.nil? keys = parts[key_position].split(@sep2, -1) values = case when field_positions.nil? parts.tap{|o| o.delete_at key_position} when field_positions.empty? [] else parts.values_at *field_positions end.collect{|value| value.split(@sep2, -1)} [keys, values] end |
#get_values_flat(parts) ⇒ Object
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 |
# File 'lib/rbbt/tsv/parser.rb', line 120 def get_values_flat(parts) if key_position and key_position != 0 and field_positions.nil? value = parts.shift keys = parts return [keys, [value]] end return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)} if field_positions.nil? and (key_position.nil? or key_position == 0) keys = parts[key_position].split(@sep2, -1) if @take_all values = parts.collect{|value| value.split(@sep2, -1)} else values = if field_positions.nil? parts.tap{|o| o.delete_at key_position} else parts.values_at *field_positions end.collect{|value| value.split(@sep2, -1)} end [keys, values] end |
#get_values_flat_inverse(parts) ⇒ Object
114 115 116 117 118 |
# File 'lib/rbbt/tsv/parser.rb', line 114 def get_values_flat_inverse(parts) value = parts.shift keys = parts [keys, [value]] end |
#get_values_list(parts) ⇒ Object
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
# File 'lib/rbbt/tsv/parser.rb', line 84 def get_values_list(parts) return parts.shift, parts if field_positions.nil? and key_position.nil? key = parts[key_position] values = case when field_positions.nil? parts.tap{|o| o.delete_at key_position} when field_positions.empty? [] else parts.values_at *field_positions end [key, values] end |
#get_values_single(parts) ⇒ Object
77 78 79 80 81 82 |
# File 'lib/rbbt/tsv/parser.rb', line 77 def get_values_single(parts) return parts.shift, parts.first if field_positions.nil? and key_position.nil? key = parts[key_position] value = parts[(field_positions.nil? or field_positions.empty?) ? 0 : field_positions.first] [key, value] end |
#get_values_single_from_flat(parts) ⇒ Object
66 67 68 69 70 71 72 73 74 75 |
# File 'lib/rbbt/tsv/parser.rb', line 66 def get_values_single_from_flat(parts) return parts.shift, parts.first if field_positions.nil? and key_position.nil? if key_position == 0 [parts.shift, parts.first] else key = parts.shift [parts, key] end end |
#parse_header(stream) ⇒ Object
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 |
# File 'lib/rbbt/tsv/parser.rb', line 14 def parse_header(stream) = {} # Get line line = stream.gets raise "Empty content" if line.nil? line = Misc.fixutf8 line line.chomp! # Process options line if line and line =~ /^#{@header_hash}: (.*)/ = Misc.string2hash $1 line = stream.gets end # Determine separator @sep = [:sep] if [:sep] # Process fields line if line and Misc.fixutf8(line) =~ /^#{@header_hash}/ line.chomp! @fields = line.split(@sep) @key_field = @fields.shift @key_field = @key_field[(0 + header_hash.length)..-1] # Remove initial hash character line = stream.gets end @first_line = line end |
#process(line) ⇒ Object
50 51 52 53 54 55 56 |
# File 'lib/rbbt/tsv/parser.rb', line 50 def process(line) l = line.chomp raise Parser::SKIP_LINE if l[0] == "#"[0] or (Proc === @select and not @select.call l) l = @fix.call l if Proc === @fix raise Parser::END_PARSING unless l l end |
#rescue_first_line ⇒ Object
257 258 259 |
# File 'lib/rbbt/tsv/parser.rb', line 257 def rescue_first_line @first_line end |