Module: FileDefinitions::ClassMethods

Defined in:
lib/datashift/file_definitions.rb

Instance Method Summary collapse

Instance Method Details

#add_field(field, add_accessor = true) ⇒ Object



127
128
129
130
131
# File 'lib/datashift/file_definitions.rb', line 127

def add_field(field, add_accessor = true)
  @field_definition ||= []
  @field_definition << field.to_s
  attr_accessor field  if add_accessor
end

#create_field_attr_accessorsObject

Create accessors for each field



173
174
175
# File 'lib/datashift/file_definitions.rb', line 173

def create_field_attr_accessors
  field_definition.each { |f| attr_accessor f }
end

#create_field_definition(*fields) ⇒ Object

Helper to generate methods to store and return the complete list of fields in this File definition (also creates member @field_definition) and parse a line.

e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }



108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
# File 'lib/datashift/file_definitions.rb', line 108

def create_field_definition( *fields )
  instance_eval <<-end_eval
      @field_definition ||= %w{ #{fields.join(' ')} }
      def field_definition
        @field_definition
      end
  end_eval

  class_eval <<-end_eval
    def parse( line )
      @current_line = line
      before_parse  if respond_to? :before_parse
      @current_line.split(field_delim()).each_with_index {|x, i| instance_variable_set(\"@\#{self.class.field_definition[i]}\", x) }
      after_parse  if respond_to? :after_parse
      generate_key if respond_to? :generate_key
    end
  end_eval
end

#create_fixed_definition(field_range_map) ⇒ Object

Helper to generate methods that return the complete list of fixed width fields and associated ranges in this File definition, and parse a line. e.g create_field_definition %w{ trade_id drOrCr ccy costCentre postingDate amount }

Raises:

  • (ArgumentError)


137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
# File 'lib/datashift/file_definitions.rb', line 137

def create_fixed_definition( field_range_map )

  raise ArgumentError, 'Please supply hash to create_fixed_definition' unless field_range_map.is_a?(Hash)

  keys = field_range_map.keys.collect(&:to_s)
  string_map = Hash[*keys.zip(field_range_map.values).flatten]

  instance_eval <<-end_eval
    def fixed_definition
      @fixed_definition ||= #{string_map.inspect}
      @fixed_definition
    end
  end_eval

  instance_eval <<-end_eval
    def field_definition
      @field_definition ||= %w{  #{keys.join(' ')} }
      @field_definition
    end
  end_eval

  class_eval <<-end_eval
    def parse( line )
      @current_line = line
      before_parse  if respond_to? :before_parse
      self.class.fixed_definition.each do |key, range|
        instance_variable_set(\"@\#{key}\", @current_line[range])
      end
      after_parse  if respond_to? :after_parse
      generate_key if respond_to? :generate_key
    end
  end_eval

end

#file_set_field_by_map(file_name, fields, value_map, regex = nil) ⇒ Object

Open and parse a file, replacing a value in the specfied field. Does not update the file itself. Does not write a new output file.

Returns :

1) full collection of updated lines
2) collection of file def objects (self), with updated value.

Finds values matching old_value in given map

Replaces matches with new_value in map.

Accepts more than one field, if files is either and array of strings or comma seperated list of fields.



294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
# File 'lib/datashift/file_definitions.rb', line 294

def file_set_field_by_map( file_name, fields, value_map, regex = nil )

  lines = []
  objects = []

  attribs = if fields.is_a?(Array)
              fields
            else
              fields.to_s.split(',')
            end

  attribs.collect! do |attrib|
    raise ArgumentError, "Field: #{attrib} is not a field on #{self.class.name}" unless new.respond_to?(attrib)
  end

  log :info, "#{self.class.name} - updating field(s) #{fields} in #{file_name}"

  File.open( file_name ) do |t|
    t.each do |line|
      if line.chomp.empty?
        lines << line
        objects << new
        next
      end
      x = new(line)

      attribs.each do |a|
        old_value = x.instance_variable_get( "@#{a}" )
        if value_map[old_value] || (regex && old_value.keys.detect { |k| k.match(regx) })
          x.instance_variable_set( "@#{a}", value_map[old_value] )
        end
      end

      objects << x
      lines << x.to_s
    end
  end

  [lines, objects]
end

#parse_file(file, options = {}) ⇒ Object

Parse a complete file and return array of self, one per line



182
183
184
185
186
187
188
189
190
191
# File 'lib/datashift/file_definitions.rb', line 182

def parse_file( file, options = {} )
  limit = options[:limit]
  count = 0
  lines = []
  File.new(file).each_line do |line|
    break if limit && ((count += 1) > limit)
    lines << new( line )
  end
  lines
end

#split_on(file_name, field, options = {}) ⇒ Object

Split a file, whose field definition is represented by self, into seperate streams, based on one if it’s fields.

Returns a map of Field value => File def object

We return the File Def object as this is now enriched, e.g with key fields, compared to the raw file.

Users can get at the raw line simply by calling the line() method on File Def object

Options:

:output_path => directory to write the individual streams files to

:filter      => Optional Regular Expression to act as filter be applid to the field.
               For example split by Ccy but filter to only include certain ccys pass
               filter => 'GBP|USD|EUR'


244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
# File 'lib/datashift/file_definitions.rb', line 244

def split_on( file_name, field, options = {} )

  regex = options[:filter] ? Regexp.new(options[:filter]) : nil

  log :debug, "Using REGEX: #{regex.inspect}" if regex

  filtered = {}

  if new.respond_to?(field)

    log :info, "Splitting on #{field}"

    File.open( file_name ) do |t|
      t.each do |line|
        next unless line && line.chomp!
        x = new(line)

        value = x.send( field.to_sym ) # the actual field value from the specified field column
        next if value.nil?

        if regex.nil? || value.match(regex)
          filtered[value] ? filtered[value] << x : filtered[value] = [x]
        end
      end
    end
  else
    log :warn, "Field [#{field}] nor defined for file definition #{self.class.name}"
  end

  if options[:sort]
    filtered.values.each( &:sort )
    return filtered
  end
  filtered
end

#split_on_write(file_name, field, output_path, options = {}) ⇒ Object

Split a file, whose field definition is represented by self, into seperate streams, based on the values of one if it’s fields.

Writes the results, one file per split stream, to directory specified by output_path

Options:

:keys       => Also write split files of the key fields

:filter     => Optional Regular Expression to act as filter be applid to the field.
               For example split by Ccy but filter to only include certain ccys pass
               filter => '[GBP|USD]'


206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
# File 'lib/datashift/file_definitions.rb', line 206

def split_on_write( file_name, field, output_path, options = {} )

  path = output_path || '.'

  filtered = split_on( file_name, field, options )

  unless filtered.empty?
    log :info, "Writing seperate streams to #{path}"

    if options.key?(:keys)
      filtered.each do |strm, objects|
        RecsBase.write( { "keys_#{field}_#{strm}.csv" => objects.collect(&:key).join("\n") }, path)
      end
    end

    filtered.each do |strm, objects|
      RecsBase.write( { "#{field}_#{strm}.csv" => objects.collect(&:current_line).join("\n") }, path)
    end
  end
end