Module: Daru::IO

Defined in:
lib/daru/io/io.rb,
lib/daru/io/csv/converters.rb,
lib/daru/io/sql_data_source.rb

Overview

rubocop:disable Metrics/ModuleLength

Defined Under Namespace

Modules: CSV Classes: SqlDataSource

Class Method Summary collapse

Class Method Details

.dataframe_write_csv(dataframe, path, opts = {}) ⇒ Object



93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/daru/io/io.rb', line 93

def dataframe_write_csv dataframe, path, opts={}
  options = {
    converters: :numeric
  }.merge(opts)

  writer = ::CSV.open(path, 'w', options)
  writer << dataframe.vectors.to_a unless options[:headers] == false

  dataframe.each_row do |row|
    writer << if options[:convert_comma]
                row.map { |v| v.to_s.tr('.', ',') }
              else
                row.to_a
              end
  end

  writer.close
end

.dataframe_write_excel(dataframe, path, _opts = {}) ⇒ Object



62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# File 'lib/daru/io/io.rb', line 62

def dataframe_write_excel dataframe, path, _opts={}
  book   = Spreadsheet::Workbook.new
  sheet  = book.create_worksheet
  format = Spreadsheet::Format.new color: :blue, weight: :bold

  sheet.row(0).concat(dataframe.vectors.to_a.map(&:to_s)) # Unfreeze strings
  sheet.row(0).default_format = format
  i = 1
  dataframe.each_row do |row|
    sheet.row(i).concat(row.to_a)
    i += 1
  end

  book.write(path)
end

.dataframe_write_sql(ds, dbh, table) ⇒ Object



123
124
125
126
127
128
129
# File 'lib/daru/io/io.rb', line 123

def dataframe_write_sql ds, dbh, table
  require 'dbi'
  query = "INSERT INTO #{table} ("+ds.vectors.to_a.join(',')+') VALUES ('+(['?']*ds.vectors.size).join(',')+')'
  sth   = dbh.prepare(query)
  ds.each_row { |c| sth.execute(*c.to_a) }
  true
end

.from_activerecord(relation, *fields) ⇒ Object

Load dataframe from AR::Relation

Parameters:

  • relation (ActiveRecord::Relation)

    A relation to be used to load the contents of dataframe

Returns:

  • A dataframe containing the data in the given relation



136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# File 'lib/daru/io/io.rb', line 136

def from_activerecord(relation, *fields)
  if fields.empty?
    records = relation.map do |record|
      record.attributes.symbolize_keys
    end
    return Daru::DataFrame.new(records)
  else
    fields = fields.map(&:to_sym)
  end

  vectors = fields.map { |name| [name, Daru::Vector.new([], name: name)] }.to_h

  Daru::DataFrame.new(vectors, order: fields).tap do |df|
    relation.pluck(*fields).each do |record|
      df.add_row(Array(record))
    end
    df.update
  end
end

.from_csv(path, opts = {}) ⇒ Object

Functions for loading/writing CSV files



79
80
81
82
83
84
85
86
87
88
89
90
91
# File 'lib/daru/io/io.rb', line 79

def from_csv path, opts={}
  daru_options, opts = from_csv_prepare_opts opts
  # Preprocess headers for detecting and correcting repetition in
  # case the :headers option is not specified.
  hsh =
    if opts[:headers]
      from_csv_hash_with_headers(path, opts)
    else
      from_csv_hash(path, opts)
        .tap { |hash| daru_options[:order] = hash.keys }
    end
  Daru::DataFrame.new(hsh,daru_options)
end

.from_excel(path, opts = {}) ⇒ Object

Functions for loading/writing Excel files.



41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/daru/io/io.rb', line 41

def from_excel path, opts={}
  optional_gem 'spreadsheet', '~>1.1.1'
  opts = {
    worksheet_id: 0
  }.merge opts

  worksheet_id = opts[:worksheet_id]
  book         = Spreadsheet.open path
  worksheet    = book.worksheet worksheet_id
  headers      = ArrayHelper.recode_repeated(worksheet.row(0)).map(&:to_sym)

  df = Daru::DataFrame.new({})
  headers.each_with_index do |h,i|
    col = worksheet.column(i).to_a
    col.delete_at 0
    df[h] = col
  end

  df
end

.from_html(path, opts) ⇒ Object



188
189
190
191
192
193
194
195
196
# File 'lib/daru/io/io.rb', line 188

def from_html path, opts
  optional_gem 'mechanize', '~>2.7.5'
  page = Mechanize.new.get(path)
  page.search('table').map { |table| html_parse_table table }
      .keep_if { |table| html_search table, opts[:match] }
      .compact
      .map { |table| html_decide_values table, opts }
      .map { |table| html_table_to_dataframe table }
end

.from_plaintext(filename, fields) ⇒ Object

Loading data from plain text files



158
159
160
161
162
163
164
165
166
167
168
169
# File 'lib/daru/io/io.rb', line 158

def from_plaintext filename, fields
  ds = Daru::DataFrame.new({}, order: fields)
  fp = File.open(filename,'r')
  fp.each_line do |line|
    row = Daru::IOHelpers.process_row(line.strip.split(/\s+/),[''])
    next if row == ["\x1A"]
    ds.add_row(row)
  end
  ds.update
  fields.each { |f| ds[f].rename f }
  ds
end

.from_sql(db, query) ⇒ Object

Execute a query and create a data frame from the result

Parameters:

  • db (DBI::DatabaseHandle, String)

    A DBI connection OR Path to a SQlite3 database.

  • query (String)

    The query to be executed

Returns:

  • A dataframe containing the data resulting from the query



118
119
120
121
# File 'lib/daru/io/io.rb', line 118

def from_sql(db, query)
  require 'daru/io/sql_data_source'
  SqlDataSource.make_dataframe(db, query)
end

.load(filename) ⇒ Object



178
179
180
181
182
183
184
185
186
# File 'lib/daru/io/io.rb', line 178

def load filename
  if File.exist? filename
    o = false
    File.open(filename, 'r') { |fp| o = Marshal.load(fp) }
    o
  else
    false
  end
end

.save(klass, filename) ⇒ Object

Loading and writing Marshalled DataFrame/Vector



172
173
174
175
176
# File 'lib/daru/io/io.rb', line 172

def save klass, filename
  fp = File.open(filename, 'w')
  Marshal.dump(klass, fp)
  fp.close
end