Class: TSV::Parser
- Inherits:
-
Object
- Object
- TSV::Parser
- Defined in:
- lib/rbbt/tsv/parser.rb
Defined Under Namespace
Classes: END_PARSING, SKIP_LINE
Instance Attribute Summary collapse
-
#cast ⇒ Object
Returns the value of attribute cast.
-
#field_positions ⇒ Object
Returns the value of attribute field_positions.
-
#fields ⇒ Object
Returns the value of attribute fields.
-
#fix ⇒ Object
Returns the value of attribute fix.
-
#header_hash ⇒ Object
Returns the value of attribute header_hash.
-
#key_field ⇒ Object
Returns the value of attribute key_field.
-
#key_position ⇒ Object
Returns the value of attribute key_position.
-
#namespace ⇒ Object
Returns the value of attribute namespace.
-
#select ⇒ Object
Returns the value of attribute select.
-
#sep ⇒ Object
Returns the value of attribute sep.
-
#sep2 ⇒ Object
Returns the value of attribute sep2.
-
#serializer ⇒ Object
Returns the value of attribute serializer.
-
#straight ⇒ Object
Returns the value of attribute straight.
-
#take_all ⇒ Object
Returns the value of attribute take_all.
-
#type ⇒ Object
Returns the value of attribute type.
-
#zipped ⇒ Object
Returns the value of attribute zipped.
Instance Method Summary collapse
- #add_to_data_flat(data, keys, values) ⇒ Object
- #add_to_data_flat_merge(data, keys, values) ⇒ Object
- #add_to_data_merge(data, keys, values) ⇒ Object
- #add_to_data_merge_zipped(data, keys, values) ⇒ Object
- #add_to_data_no_merge_double(data, keys, values) ⇒ Object
- #add_to_data_no_merge_list(data, key, values) ⇒ Object
- #add_to_data_zipped(data, keys, values) ⇒ Object
- #all_fields ⇒ Object
- #cast? ⇒ Boolean
- #cast_values_double(values) ⇒ Object
- #cast_values_list(values) ⇒ Object
- #cast_values_single(value) ⇒ Object
- #chop_line(line) ⇒ Object
- #fix_fields(options) ⇒ Object
- #get_values_double(parts) ⇒ Object
- #get_values_flat(parts) ⇒ Object
- #get_values_flat_inverse(parts) ⇒ Object
- #get_values_list(parts) ⇒ Object
- #get_values_single(parts) ⇒ Object
- #get_values_single_from_flat(parts) ⇒ Object
-
#initialize(stream = nil, options = {}) ⇒ Parser
constructor
A new instance of Parser.
- #parse_header(stream) ⇒ Object
- #process(line) ⇒ Object
- #rescue_first_line ⇒ Object
- #setup(data) ⇒ Object
Constructor Details
#initialize(stream = nil, options = {}) ⇒ Parser
Returns a new instance of Parser.
297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 |
# File 'lib/rbbt/tsv/parser.rb', line 297 def initialize(stream = nil, = {}) @header_hash = Misc.(, :header_hash) || "#" @sep = Misc.(, :sep) || "\t" = parse_header(stream) = .merge @type = Misc.(, :type) || :double merge = Misc.(, :merge) || false @sep2 = Misc.(, :sep2) || "|" @cast = Misc. , :cast @type ||= Misc. , :type @fix = Misc.(, :fix) @select= Misc. , :select @zipped = Misc. , :zipped @namespace = Misc. , :namespace case @type when :double self.instance_eval do alias get_values get_values_double end self.instance_eval do alias cast_values cast_values_double end case when (merge and not zipped) self.instance_eval do alias add_to_data add_to_data_merge end when (merge and zipped) self.instance_eval do alias add_to_data add_to_data_merge_zipped end when zipped self.instance_eval do alias add_to_data add_to_data_zipped end else self.instance_eval do alias add_to_data add_to_data_no_merge_double end end when :single if [:type] == :flat self.instance_eval do alias get_values get_values_single_from_flat end self.instance_eval do alias cast_values cast_values_single end self.instance_eval do alias add_to_data add_to_data_no_merge_double end else self.instance_eval do alias get_values get_values_single end self.instance_eval do alias cast_values cast_values_single end self.instance_eval do alias add_to_data add_to_data_no_merge_list end end when :list self.instance_eval do alias get_values get_values_list end self.instance_eval do alias cast_values cast_values_list end self.instance_eval do alias add_to_data add_to_data_no_merge_list end when :flat @take_all = true if [:fields].nil? self.instance_eval do alias get_values get_values_flat end self.instance_eval do alias cast_values cast_values_double end if merge self.instance_eval do alias add_to_data add_to_data_flat_merge end else self.instance_eval do alias add_to_data add_to_data_flat end end end fields = [:fields] fix_fields() @straight = false if @sep != "\t" or not @cast.nil? or merge or (@type == :flat and fields) end |
Instance Attribute Details
#cast ⇒ Object
Returns the value of attribute cast.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def cast @cast end |
#field_positions ⇒ Object
Returns the value of attribute field_positions.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def field_positions @field_positions end |
#fields ⇒ Object
Returns the value of attribute fields.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def fields @fields end |
#fix ⇒ Object
Returns the value of attribute fix.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def fix @fix end |
#header_hash ⇒ Object
Returns the value of attribute header_hash.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def header_hash @header_hash end |
#key_field ⇒ Object
Returns the value of attribute key_field.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def key_field @key_field end |
#key_position ⇒ Object
Returns the value of attribute key_position.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def key_position @key_position end |
#namespace ⇒ Object
Returns the value of attribute namespace.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def namespace @namespace end |
#select ⇒ Object
Returns the value of attribute select.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def select @select end |
#sep ⇒ Object
Returns the value of attribute sep.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def sep @sep end |
#sep2 ⇒ Object
Returns the value of attribute sep2.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def sep2 @sep2 end |
#serializer ⇒ Object
Returns the value of attribute serializer.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def serializer @serializer end |
#straight ⇒ Object
Returns the value of attribute straight.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def straight @straight end |
#take_all ⇒ Object
Returns the value of attribute take_all.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def take_all @take_all end |
#type ⇒ Object
Returns the value of attribute type.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def type @type end |
#zipped ⇒ Object
Returns the value of attribute zipped.
4 5 6 |
# File 'lib/rbbt/tsv/parser.rb', line 4 def zipped @zipped end |
Instance Method Details
#add_to_data_flat(data, keys, values) ⇒ Object
141 142 143 144 145 |
# File 'lib/rbbt/tsv/parser.rb', line 141 def add_to_data_flat(data, keys, values) keys.each do |key| data[key] = values.flatten unless data.include? key end end |
#add_to_data_flat_merge(data, keys, values) ⇒ Object
147 148 149 150 151 152 153 154 155 |
# File 'lib/rbbt/tsv/parser.rb', line 147 def add_to_data_flat_merge(data, keys, values) keys.each do |key| if data.include? key data[key] = data[key].concat values.flatten else data[key] = values.flatten end end end |
#add_to_data_merge(data, keys, values) ⇒ Object
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 |
# File 'lib/rbbt/tsv/parser.rb', line 163 def add_to_data_merge(data, keys, values) keys.uniq.each do |key| if data.include? key #data[key] = data[key].zip(values).collect do |old, new| # old.concat new # old #end new = data[key] new.each_with_index do |old, i| old.concat values[i] end data[key] = new else data[key] = values end end end |
#add_to_data_merge_zipped(data, keys, values) ⇒ Object
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
# File 'lib/rbbt/tsv/parser.rb', line 181 def add_to_data_merge_zipped(data, keys, values) num = keys.length values = values.collect{|v| v.length != num ? [v.first] * num : v} all = values.unshift keys Misc.zip_fields(all).each do |values| key = values.shift if data.include? key data[key] = data[key].zip(values).collect do |old, new| old.push new old end else data[key] = values.collect{|v| [v]} end end end |
#add_to_data_no_merge_double(data, keys, values) ⇒ Object
157 158 159 160 161 |
# File 'lib/rbbt/tsv/parser.rb', line 157 def add_to_data_no_merge_double(data, keys, values) keys.each do |key| data[key] = values unless data.include? key end end |
#add_to_data_no_merge_list(data, key, values) ⇒ Object
137 138 139 |
# File 'lib/rbbt/tsv/parser.rb', line 137 def add_to_data_no_merge_list(data, key, values) data[key] = values unless data.include? key end |
#add_to_data_zipped(data, keys, values) ⇒ Object
198 199 200 201 202 203 204 205 206 207 |
# File 'lib/rbbt/tsv/parser.rb', line 198 def add_to_data_zipped(data, keys, values) num = keys.length values = values.collect{|v| v.length != num ? [v.first] * num : v} all = values.unshift keys Misc.zip_fields(all).each do |values| key = values.shift next if data.include? key data[key] = values.collect{|v| [v]} end end |
#all_fields ⇒ Object
9 10 11 12 |
# File 'lib/rbbt/tsv/parser.rb', line 9 def all_fields all = [key_field] + fields NamedArray.setup all, all end |
#cast? ⇒ Boolean
57 58 59 |
# File 'lib/rbbt/tsv/parser.rb', line 57 def cast? !! @cast end |
#cast_values_double(values) ⇒ Object
228 229 230 231 232 233 234 235 |
# File 'lib/rbbt/tsv/parser.rb', line 228 def cast_values_double(values) case when Symbol === cast values.collect{|list| list.collect{|v| v.send(cast)}} when Proc === cast values.collect{|list| list.collect{|v| cast.call v }} end end |
#cast_values_list(values) ⇒ Object
219 220 221 222 223 224 225 226 |
# File 'lib/rbbt/tsv/parser.rb', line 219 def cast_values_list(values) case when Symbol === cast values.collect{|v| v.send(cast)} when Proc === cast values.collect{|v| cast.call v} end end |
#cast_values_single(value) ⇒ Object
210 211 212 213 214 215 216 217 |
# File 'lib/rbbt/tsv/parser.rb', line 210 def cast_values_single(value) case when Symbol === cast value.send(cast) when Proc === cast cast.call value end end |
#chop_line(line) ⇒ Object
61 62 63 |
# File 'lib/rbbt/tsv/parser.rb', line 61 def chop_line(line) line.split(@sep, -1) end |
#fix_fields(options) ⇒ Object
241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 |
# File 'lib/rbbt/tsv/parser.rb', line 241 def fix_fields() key_field = Misc. , :key_field fields = Misc. , :fields if (key_field.nil? or key_field == 0 or key_field == :key) and (fields.nil? or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a)) @straight = true return else @straight = false case when (key_field.nil? or (not Integer === key_field and @key_field.nil?) or key_field == @key_field or key_field == 0) @key_position = 0 when Integer === key_field @key_position = key_field when String === key_field @key_position = @fields.dup.unshift(@key_field).index key_field raise "Key field #{ key_field } was not found" if @key_position.nil? else raise "Format of key_field not understood: #{key_field.inspect}" end if (fields.nil? or (not (Array === fields and Integer === fields.first) and @fields.nil?) or fields == @fields or (not @fields.nil? and fields == (1..@fields.length).to_a)) if not @fields.nil? and type != :flat @field_positions = (0..@fields.length).to_a @field_positions.delete @key_position end else fields = [fields] if not Array === fields @field_positions = fields.collect{|field| case when Integer === field field when String === field pos = @fields.dup.unshift(@key_field).index field raise "Field not identified: #{ field }" if pos.nil? pos else raise "Format of fields not understood: #{fields.inspect}" end } end new_key_field = @fields.dup.unshift(@key_field)[@key_position] if not @fields.nil? @fields = @fields.dup.unshift(@key_field).values_at *@field_positions if not @fields.nil? and not @field_positions.nil? @fields ||= fields if Array === fields and String === fields.first @fields = [@key_field] if new_key_field != @key_field and type == :flat and @field_positions.nil? @key_field = new_key_field @key_field ||= key_field if String === key_field end end |
#get_values_double(parts) ⇒ Object
95 96 97 98 99 100 101 102 103 104 |
# File 'lib/rbbt/tsv/parser.rb', line 95 def get_values_double(parts) return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)} if field_positions.nil? and key_position.nil? keys = parts[key_position].split(@sep2, -1) values = if field_positions.nil? parts.tap{|o| o.delete_at key_position} else parts.values_at *field_positions end.collect{|value| value.split(@sep2, -1)} [keys, values] end |
#get_values_flat(parts) ⇒ Object
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
# File 'lib/rbbt/tsv/parser.rb', line 112 def get_values_flat(parts) if key_position and key_position != 0 and field_positions.nil? value = parts.shift keys = parts return [keys, [value]] end return parts.shift.split(@sep2, -1), parts.collect{|value| value.split(@sep2, -1)} if field_positions.nil? and (key_position.nil? or key_position == 0) keys = parts[key_position].split(@sep2, -1) if @take_all values = parts.collect{|value| value.split(@sep2, -1)} else values = if field_positions.nil? parts.tap{|o| o.delete_at key_position} else parts.values_at *field_positions end.collect{|value| value.split(@sep2, -1)} end [keys, values] end |
#get_values_flat_inverse(parts) ⇒ Object
106 107 108 109 110 |
# File 'lib/rbbt/tsv/parser.rb', line 106 def get_values_flat_inverse(parts) value = parts.shift keys = parts [keys, [value]] end |
#get_values_list(parts) ⇒ Object
82 83 84 85 86 87 88 89 90 91 92 93 |
# File 'lib/rbbt/tsv/parser.rb', line 82 def get_values_list(parts) return parts.shift, parts if field_positions.nil? and key_position.nil? key = parts[key_position] values = if field_positions.nil? parts.tap{|o| o.delete_at key_position} else parts.values_at *field_positions end [key, values] end |
#get_values_single(parts) ⇒ Object
75 76 77 78 79 80 |
# File 'lib/rbbt/tsv/parser.rb', line 75 def get_values_single(parts) return parts.shift, parts.first if field_positions.nil? and key_position.nil? key = parts[key_position] value = parts[field_positions.nil? ? 0 : field_positions.first] [key, value] end |
#get_values_single_from_flat(parts) ⇒ Object
65 66 67 68 69 70 71 72 73 |
# File 'lib/rbbt/tsv/parser.rb', line 65 def get_values_single_from_flat(parts) return parts.shift, parts.first if field_positions.nil? and key_position.nil? if key_position == 0 [parts.shift, parts] else key = parts.shift [parts, [key]] end end |
#parse_header(stream) ⇒ Object
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/rbbt/tsv/parser.rb', line 14 def parse_header(stream) = {} # Get line line = stream.gets raise "Empty content" if line.nil? line.chomp! # Process options line if line and line =~ /^#{@header_hash}: (.*)/ = Misc.string2hash $1 line = stream.gets end # Determine separator @sep = [:sep] if [:sep] # Process fields line if line and line =~ /^#{@header_hash}/ line.chomp! @fields = line.split(@sep) @key_field = @fields.shift @key_field = @key_field[(0 + header_hash.length)..-1] # Remove initial hash character line = stream.gets end @first_line = line end |
#process(line) ⇒ Object
49 50 51 52 53 54 55 |
# File 'lib/rbbt/tsv/parser.rb', line 49 def process(line) l = line.chomp raise Parser::SKIP_LINE if Proc === @select and not @select.call l l = @fix.call l if Proc === @fix raise Parser::END_PARSING unless l l end |
#rescue_first_line ⇒ Object
237 238 239 |
# File 'lib/rbbt/tsv/parser.rb', line 237 def rescue_first_line @first_line end |