Module: DaruLite::IO

Defined in:
lib/daru_lite/io/io.rb,
lib/daru_lite/io/csv/converters.rb,
lib/daru_lite/io/sql_data_source.rb

Defined Under Namespace

Modules: CSV Classes: SqlDataSource

Class Method Summary collapse

Class Method Details

.dataframe_write_csv(dataframe, path, opts = {}) ⇒ Object



106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/daru_lite/io/io.rb', line 106

def dataframe_write_csv(dataframe, path, opts = {})
  options = {
    converters: :numeric
  }.merge(opts)

  writer = ::CSV.open(path, 'w', **options)
  writer << dataframe.vectors.to_a unless options[:headers] == false

  dataframe.each_row do |row|
    writer << if options[:convert_comma]
                row.map { |v| v.to_s.tr('.', ',') }
              else
                row.to_a
              end
  end

  writer.close
end

.dataframe_write_excel(dataframe, path, _opts = {}) ⇒ Object



75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/daru_lite/io/io.rb', line 75

def dataframe_write_excel(dataframe, path, _opts = {})
  book   = Spreadsheet::Workbook.new
  sheet  = book.create_worksheet
  format = Spreadsheet::Format.new color: :blue, weight: :bold

  sheet.row(0).concat(dataframe.vectors.to_a.map(&:to_s)) # Unfreeze strings
  sheet.row(0).default_format = format
  i = 1
  dataframe.each_row do |row|
    sheet.row(i).concat(row.to_a)
    i += 1
  end

  book.write(path)
end

.dataframe_write_sql(ds, dbh, table) ⇒ Object



136
137
138
139
140
141
142
# File 'lib/daru_lite/io/io.rb', line 136

def dataframe_write_sql(ds, dbh, table)
  require 'dbi'
  query = "INSERT INTO #{table} (#{ds.vectors.to_a.join(',')}) VALUES (#{(['?'] * ds.vectors.size).join(',')})"
  sth   = dbh.prepare(query)
  ds.each_row { |c| sth.execute(*c.to_a) }
  true
end

.from_activerecord(relation, *fields) ⇒ Object

Load dataframe from AR::Relation

Parameters:

  • relation (ActiveRecord::Relation)

    A relation to be used to load the contents of dataframe

Returns:

  • A dataframe containing the data in the given relation



149
150
151
152
153
154
155
# File 'lib/daru_lite/io/io.rb', line 149

def from_activerecord(relation, *fields)
  fields = relation.klass.column_names if fields.empty?
  fields = fields.map(&:to_sym)

  result = relation.pluck(*fields).transpose
  DaruLite::DataFrame.new(result, order: fields).tap(&:update)
end

.from_csv(path, opts = {}) ⇒ Object

Functions for loading/writing CSV files



92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/daru_lite/io/io.rb', line 92

def from_csv(path, opts = {})
  daru_options, opts = from_csv_prepare_opts opts
  # Preprocess headers for detecting and correcting repetition in
  # case the :headers option is not specified.
  hsh =
    if opts[:headers]
      from_csv_hash_with_headers(path, opts)
    else
      from_csv_hash(path, opts)
        .tap { |hash| daru_options[:order] = hash.keys }
    end
  DaruLite::DataFrame.new(hsh, daru_options)
end

.from_excel(path, opts = {}) ⇒ Object

Functions for loading/writing Excel files.



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/daru_lite/io/io.rb', line 46

def from_excel(path, opts = {})
  opts = {
    worksheet_id: 0,
    row_id: 0
  }.merge opts

  worksheet, headers = read_from_excel(path, opts)
  df = DaruLite::DataFrame.new({})
  headers.each_with_index do |h, i|
    col = worksheet.column(i).to_a
    col.delete_at 0
    df[h] = col
  end

  df
end

.from_plaintext(filename, fields) ⇒ Object

Loading data from plain text files



159
160
161
162
163
164
165
166
167
168
169
170
171
# File 'lib/daru_lite/io/io.rb', line 159

def from_plaintext(filename, fields)
  ds = DaruLite::DataFrame.new({}, order: fields)
  fp = File.open(filename, 'r')
  fp.each_line do |line|
    row = DaruLite::IOHelpers.process_row(line.strip.split(/\s+/), [''])
    next if row == ["\x1A"]

    ds.add_row(row)
  end
  ds.update
  fields.each { |f| ds[f].rename f }
  ds
end

.from_sql(db, query) ⇒ Object

Execute a query and create a data frame from the result

Parameters:

  • db (DBI::DatabaseHandle, String)

    A DBI connection OR Path to a SQlite3 database.

  • query (String)

    The query to be executed

Returns:

  • A dataframe containing the data resulting from the query



131
132
133
134
# File 'lib/daru_lite/io/io.rb', line 131

def from_sql(db, query)
  require 'daru_lite/io/sql_data_source'
  SqlDataSource.make_dataframe(db, query)
end

.load(filename) ⇒ Object



180
181
182
183
184
185
186
187
188
# File 'lib/daru_lite/io/io.rb', line 180

def load(filename)
  if File.exist? filename
    o = false
    File.open(filename, 'r') { |fp| o = Marshal.load(fp) }
    o
  else
    false
  end
end

.read_from_excel(path, opts) ⇒ Object



63
64
65
66
67
68
69
70
71
72
73
# File 'lib/daru_lite/io/io.rb', line 63

def read_from_excel(path, opts)
  optional_gem 'spreadsheet', '~>1.3.0'

  worksheet_id = opts[:worksheet_id]
  row_id       = opts[:row_id]
  book         = Spreadsheet.open path
  worksheet    = book.worksheet worksheet_id
  headers      = ArrayHelper.recode_repeated(worksheet.row(row_id)).map(&:to_sym)

  [worksheet, headers]
end

.save(klass, filename) ⇒ Object

Loading and writing Marshalled DataFrame/Vector



174
175
176
177
178
# File 'lib/daru_lite/io/io.rb', line 174

def save(klass, filename)
  fp = File.open(filename, 'w')
  Marshal.dump(klass, fp)
  fp.close
end