Class: OMF::OML::OmlCsvTable

Inherits:
OmlTable
  • Object
show all
Defined in:
lib/omf_oml/csv_table.rb

Overview

This class represents a table whose content is initially stored in a CSV file.

Instance Attribute Summary

Attributes inherited from OmlTable

#max_size, #name, #offset, #schema

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from OmlTable

#<<, #add_row, #add_rows, #data_sources, #describe, #indexed_by, #on_before_row_added, #on_content_changed, #on_row_added, #rows, #to_a

Constructor Details

#initialize(tname, schema, opts = {}, &on_before_row_added) ⇒ OmlCsvTable

tname - Name of table schema - OmlSchema or Array containing [name, type*] for every column in table

Table adds a '__id__' column at the beginning which keeps track of the rows unique id


38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
# File 'lib/omf_oml/csv_table.rb', line 38

def initialize(tname, schema, opts = {}, &on_before_row_added)
  file_name = opts[:file_name]
  unless File.readable?(file_name)
    raise "Can't read CSV file '#{file_name}'"
  end
  csv_opts = {}
  csv_opts[:headers] = (opts[:has_csv_header] == true) #(opts.delete(:has_csv_header) == true)
  unless csv_opts[:headers]
    raise "Current implementation only works with CSV files which inlcude a schema description in the first line"
  end

  encoding =  opts[:encoding] #opts.delete(:encoding)
  mode =  "rb"
  mode << ":#{encoding}" if encoding
  csv = CSV.open(file_name, mode, csv_opts)

  unless schema = opts[:schema]
    unless csv_opts[:headers]
      raise "No schema given and ':has_csv_header' not set to capture schema from file header"
    end
    first_row = csv.shift.fields # force reading the first row to have the header parsed
    #csv.shift.each do |h, v| puts "#{h} => #{v.class}" end
    schema = csv.headers.map do |c|
      c = c.encode('UTF-8', :invalid => :replace, :undef => :replace, :replace => '?')
      name, type = c.split(':')
      [name.strip, (type || 'string').strip]
    end
  end
  super tname, schema, opts

  if first_row # from getting the CSV header
    first_row.insert(0, @row_id += 1) if @add_index
    @rows = [@schema.cast_row(first_row)]
  end

  # This assumes that CSV reader is setup with proper schema converters
  csv.each do |r|
    # Convert any strange strings into a clean ruby string
    row = r.fields.map do |e|
      e ? e.encode('UTF-8', :invalid => :replace, :undef => :replace, :replace => '?') : nil
    end
    row.insert(0, @row_id += 1) if @add_index
    @rows << @schema.cast_row(row)
  end

end

Class Method Details

.create(tname, file_name, opts = {}, &on_before_row_added) ⇒ Object

Parameters:

  • opts (defaults to: {})


22
23
24
25
# File 'lib/omf_oml/csv_table.rb', line 22

def self.create(tname, file_name, opts = {}, &on_before_row_added)
  opts[:file_name] = file_name
  self.new(tname, opts[:schema], opts, &on_before_row_added)
end

Instance Method Details

#create_sliced_table(col_name, col_value, table_opts = {}) ⇒ Object

Return a new table which only contains the rows of this table whose value in column ‘col_name’ is equal to ‘col_value’



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
# File 'lib/omf_oml/csv_table.rb', line 88

def create_sliced_table(col_name, col_value, table_opts = {})
  sname = "#{@name}_slice_#{Kernel.rand(100000)}"
  st = OmlTable.new(sname, @schema, table_opts)
  index = @schema.index_for_col(col_name)
  first_row = true
  @rows.each do |row|
    if row[index] == col_value
      #row = row[1 .. -1] # remove the row_id
      debug "Add first row '#{row.inspect}'" if first_row
      st.add_row(row)
      first_row = false
    end
  end
  def st.release
    # do nothing
  end

  debug "Created sliced table '#{sname}' from '#{@name}' (rows: #{st.rows.length} from #{@rows.length})"
  st
end