Class: Mikon::DataFrame
- Inherits:
-
Object
- Object
- Mikon::DataFrame
- Defined in:
- lib/mikon/core/dataframe.rb,
lib/mikon/plot.rb,
lib/mikon/pivot.rb
Overview
The main data structure in Mikon gem. DataFrame consists of labels(column name), index(row name), and labels.
Instance Attribute Summary collapse
-
#index ⇒ Object
readonly
Returns the value of attribute index.
-
#labels ⇒ Object
readonly
Returns the value of attribute labels.
-
#name ⇒ Object
readonly
Returns the value of attribute name.
Class Method Summary collapse
-
.from_csv(path, options = {}) {|csv| ... } ⇒ Object
Create Mikon::DataFrame from a csv/tsv file.
Instance Method Summary collapse
-
#[](arg) ⇒ Object
Accessor for column and rows.
- #_check_if_valid ⇒ Object
-
#all?(&block) ⇒ Boolean
Mikon::Row DSL.
-
#any?(&block) ⇒ Boolean
Mikon::Row DSL.
-
#column(label) ⇒ Object
Access column with its name.
-
#delete(label) ⇒ Object
Delete column.
- #dup ⇒ Object
- #dup_only_valid ⇒ Object
-
#each(&block) ⇒ Object
Iterate rows using Mikon::Row DSL.
-
#each_row(&block) ⇒ Object
Iterate row.
-
#fillna(value = 0) ⇒ Object
Replace NaN with specified value (destructive).
-
#head(num) ⇒ Object
same as head of Linux.
-
#initialize(source, options = {}) ⇒ DataFrame
constructor
A new instance of DataFrame.
-
#insert_column(*args, &block) ⇒ Object
Insert column using Mikon::Row DSL or raw Array.
-
#length ⇒ Object
return the length of columns.
-
#map(&block) ⇒ Object
(also: #collect)
Iterate rows using Mikon::Row DSL and return new Mikon::Series.
-
#pivot(args = {}) ⇒ Object
Experimental Implementation.
- #plot(args = {}) ⇒ Object
-
#row(index) ⇒ Object
Access row using index.
-
#select(&block) ⇒ Object
(also: #filter)
Select rows using Mikon::Row DSL and create new DataFrame.
-
#sort(label, ascending = true) ⇒ Object
Sort by label.
-
#sort_by(ascending = true, &block) ⇒ Object
Sort using Mikon::Row DSL.
-
#tail(num) ⇒ Object
same as tail of Linux.
-
#to_html(threshold = 50) ⇒ Object
IRuby notebook automatically call this method.
-
#to_json(*args) ⇒ Object
Compartible with Nyaplot::DataFrame.to_json.
- #to_s(threshold = 50) ⇒ Object
Constructor Details
#initialize(source, options = {}) ⇒ DataFrame
Returns a new instance of DataFrame.
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 |
# File 'lib/mikon/core/dataframe.rb', line 12 def initialize(source, ={}) = { name: SecureRandom.uuid(), index: nil, labels: nil }.merge() case when source.is_a?(Array) case when source.length == 0 @data = [DArray.new([])] when source.all? {|el| el.is_a?(Mikon::Series)} raise "NotImplementedError" + source.to_s when source.all? {|el| el.is_a?(Mikon::DArray)} @data = source when source.all? {|el| el.is_a?(Mikon::Row)} @labels = source.first.labels @index = source.map{|row| row.index} @data = source.map{|row| row.to_hash.values}.transpose.map do |arr| Mikon::DArray.new(arr) end when source.all? {|el| el.is_a?(Hash)} @labels = source.first.keys @data = source.map{|hash| hash.values}.transpose.map do |arr| Mikon::DArray.new(arr) end when source.all? {|el| el.is_a?(Array)} @data = source.map do |arr| Mikon::DArray.new(arr) end else raise "Non-acceptable Arguments Error" end when source.is_a?(Hash) case when source.values.all? {|val| val.is_a?(Array)} @labels = source.keys @data = source.values.map do |arr| Mikon::DArray.new(arr) end when source.all? {|arr| arr[1].is_a?(Series)} else raise "Non-acceptable Arguments Error" end else raise "Non-acceptable Arguments Error" end @labels = [:labels] unless [:labels].nil? @name = [:name] unless (index = [:index]).nil? if index.is_a?(Symbol) raise "labels should be set" if @labels.nil? pos = @labels.index(index) raise "Thre is no column named" + index.to_s if pos.nil? name = @labels.delete(index) @index = @data.delete(@data[pos]) elsif index.is_a?(Array) @index = index else raise "Invalid index type" end end _check_if_valid end |
Instance Attribute Details
#index ⇒ Object (readonly)
Returns the value of attribute index.
366 367 368 |
# File 'lib/mikon/core/dataframe.rb', line 366 def index @index end |
#labels ⇒ Object (readonly)
Returns the value of attribute labels.
366 367 368 |
# File 'lib/mikon/core/dataframe.rb', line 366 def labels @labels end |
#name ⇒ Object (readonly)
Returns the value of attribute name.
366 367 368 |
# File 'lib/mikon/core/dataframe.rb', line 366 def name @name end |
Class Method Details
.from_csv(path, options = {}) {|csv| ... } ⇒ Object
Create Mikon::DataFrame from a csv/tsv file
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
# File 'lib/mikon/core/dataframe.rb', line 113 def self.from_csv(path, ={}) = { :col_sep => ',', :headers => true, :converters => :numeric, :header_converters => :symbol, } = .merge() raise ArgumentError, "options[:hearders] should be set" if [:headers] == false .delete(:header_converters) if [:headers].is_a?(Array) csv = CSV.readlines(path, "r", ) yield csv if block_given? hash = {} csv.by_col.each {|label, arr| hash[label] = arr} .keys.each{|key| .delete(key)} self.new(hash, ) end |
Instance Method Details
#[](arg) ⇒ Object
Accessor for column and rows
140 141 142 143 144 145 146 147 148 149 |
# File 'lib/mikon/core/dataframe.rb', line 140 def [](arg) case when arg.is_a?(Range) index = @index.select{|i| arg.include?(i)} Mikon::DataFrame.new(index.map{|i| self.row(i)}, {index: index}) when arg.is_a?(Symbol) self.column(arg) end end |
#_check_if_valid ⇒ Object
85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 |
# File 'lib/mikon/core/dataframe.rb', line 85 def _check_if_valid # All array should should have the same length length = @data.map{|darr| darr.length}.max @data.each{|darr| darr.(length) if darr.length < length} # DataFrame should have index object @index = (0..(length-1)).to_a if @index.nil? raise "index should have the same length as arrays" if @index.length != length # Labels should be an instance of Symbol if @labels.nil? @labels = @data.map.with_index{|darr, i| i.to_s.to_sym} elsif @labels.any?{|label| !label.is_a?(Symbol)} @labels = @labels.map{|label| label.to_sym} end end |
#all?(&block) ⇒ Boolean
Mikon::Row DSL
246 247 248 249 |
# File 'lib/mikon/core/dataframe.rb', line 246 def all?(&block) self.each_row {|row| return false unless row.instance_eval(&block)} true end |
#any?(&block) ⇒ Boolean
Mikon::Row DSL
252 253 254 255 |
# File 'lib/mikon/core/dataframe.rb', line 252 def any?(&block) self.each_row {|row| return true if row.instance_eval(&block)} false end |
#column(label) ⇒ Object
Access column with its name
152 153 154 155 156 |
# File 'lib/mikon/core/dataframe.rb', line 152 def column(label) pos = @labels.index(label) raise "There is no column named " + label if pos.nil? Mikon::Series.new(label, @data[pos], index: @index) end |
#delete(label) ⇒ Object
Delete column
351 352 353 354 355 356 |
# File 'lib/mikon/core/dataframe.rb', line 351 def delete(label) pos = @labels.index(label) raise "there is no column named " + label.to_s if pos.nil? @labels.delete_at(pos) @data.delete_at(pos) end |
#dup ⇒ Object
358 359 360 |
# File 'lib/mikon/core/dataframe.rb', line 358 def dup Mikon::DataFrame.new(@data.map{|darr| darr.dup}, {index: @index, label: @labels}) end |
#dup_only_valid ⇒ Object
362 363 364 |
# File 'lib/mikon/core/dataframe.rb', line 362 def dup_only_valid self.dup end |
#each(&block) ⇒ Object
Iterate rows using Mikon::Row DSL
225 226 227 228 229 230 231 |
# File 'lib/mikon/core/dataframe.rb', line 225 def each(&block) return self.to_enum(:each) unless block_given? self.each_row do |row| row.instance_eval(&block) end self end |
#each_row(&block) ⇒ Object
Iterate row
334 335 336 337 338 339 340 341 |
# File 'lib/mikon/core/dataframe.rb', line 334 def each_row(&block) return self.to_enum(:each_row) unless block_given? @index.each.with_index do |el, i| row_arr = @data.map{|darr| darr[i]} row = Mikon::Row.new(@labels, row_arr, @index[i]) block.call(row) end end |
#fillna(value = 0) ⇒ Object
Replace NaN with specified value (destructive)
345 346 347 348 |
# File 'lib/mikon/core/dataframe.rb', line 345 def fillna(value=0) @data.each {|darr| darr.fillna(value)} self end |
#head(num) ⇒ Object
same as head of Linux
159 160 161 |
# File 'lib/mikon/core/dataframe.rb', line 159 def head(num) self[0..(num-1)] end |
#insert_column(*args, &block) ⇒ Object
Insert column using Mikon::Row DSL or raw Array
290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 |
# File 'lib/mikon/core/dataframe.rb', line 290 def insert_column(*args, &block) if block_given? rows = [] name = args[0] self.each_row do |row| val = row.instance_eval(&block) row[name] = val rows.push(row) end @data = rows.map{|row| row.to_hash.values}.transpose.map do |arr| Mikon::DArray.new(arr) end @labels = rows.first.labels else if args[0].is_a?(Symbol) name = args[0] case when args[1].is_a?(Mikon::DArray) @data.push(args[1]) when args[1].is_a?(Mikon::Series) @data.push(args[1].to_darr) when args[1].is_a?(Array) @data.push(Mikon::DArray.new(args[1])) else raise ArgumentError end elsif args[0].is_a?(Mikon::Series) @data.push(args[0].to_darr) name = args[0].name end @labels.push(name) end _check_if_valid return self end |
#length ⇒ Object
return the length of columns
103 104 105 |
# File 'lib/mikon/core/dataframe.rb', line 103 def length @data.first.length end |
#map(&block) ⇒ Object Also known as: collect
Iterate rows using Mikon::Row DSL and return new Mikon::Series
234 235 236 237 238 239 240 241 |
# File 'lib/mikon/core/dataframe.rb', line 234 def map(&block) return self.to_enum(:map) unless block_given? arr = [] self.each_row do |row| arr.push(row.instance_eval(&block)) end Mikon::Series.new(:new_series, arr, index: @index.clone) end |
#pivot(args = {}) ⇒ Object
Experimental Implementation. DO NOT USE THIS METHOD
5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
# File 'lib/mikon/pivot.rb', line 5 def pivot(args={}) args = { column: nil, row: nil, value: nil, fill_value: Float::NAN }.merge(args) raise ArgumentError unless [:column, :row, :value].all?{|sym| args[sym].is_a?(Symbol)} column = self[args[:column]].factors index = self[args[:row]].factors source = column.reduce({}) do |memo, label| arr = [] df = self.select{|row| row[args[:column]] == label} index.each do |i| unless df.any?{|row| row[args[:row]] == i} arr.push(args[:fill_value]) else column = df.select{|row| row[args[:row]] == i}[args[:value]] arr.push(column.to_a[0]) end end memo[label] = arr memo end Mikon::DataFrame.new(source, index: index) end |
#plot(args = {}) ⇒ Object
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/mikon/plot.rb', line 24 def plot(args={}) args = { :type => :line, :x => nil, :y => nil, :fill_by => nil, :color => nil }.merge(args) plot = Nyaplot::Plot.new plot.x_label("") plot.y_label("") unless args[:color].nil? colors = Nyaplot::Colors.send(args[:color]).to_a else colors = Nyaplot::Colors.qual.to_a end case args[:type] when :line @data.each.with_index do |darr, i| line = plot.add(:line, @index, darr.to_a) line.color(colors.pop) line.title(@labels[i]) end plot.legend(true) when :box plot.add_with_df(self, :box, *@labels) when :scatter sc = plot.add_with_df(self, :scatter, args[:x], args[:y]) sc.color(colors) sc.fill_by(args[:fill_by]) unless args[:fill_by].nil? plot.x_label(args[:x]) plot.y_label(args[:y]) end plot end |
#row(index) ⇒ Object
Access row using index
327 328 329 330 331 |
# File 'lib/mikon/core/dataframe.rb', line 327 def row(index) pos = @index.index(index) arr = @data.map{|column| column[pos]} Mikon::Row.new(@labels, arr, index) end |
#select(&block) ⇒ Object Also known as: filter
Select rows using Mikon::Row DSL and create new DataFrame
210 211 212 213 214 215 216 217 218 219 220 |
# File 'lib/mikon/core/dataframe.rb', line 210 def select(&block) return self.to_enum(:select) unless block_given? rows = [] i = 0 self.each_row do |row| if row.instance_eval(&block) rows.push(row) end end Mikon::DataFrame.new(rows) end |
#sort(label, ascending = true) ⇒ Object
Sort by label
273 274 275 276 277 278 279 |
# File 'lib/mikon/core/dataframe.rb', line 273 def sort(label, ascending=true) i = @labels.index(label) raise "No column named" + label.to_s if i.nil? order = @data[i].sorted_indices order.reverse! unless ascending self.sort_by.with_index{|val, i| order.index(i)} end |
#sort_by(ascending = true, &block) ⇒ Object
Sort using Mikon::Row DSL
260 261 262 263 264 265 266 267 |
# File 'lib/mikon/core/dataframe.rb', line 260 def sort_by(ascending=true, &block) return self.to_enum(:sort_by) unless block_given? order = self.map(&block).to_darr.sorted_indices order.reverse! unless ascending data = @data.map{|darr| darr.sort_by.with_index{|val, i| order.index(i)}} index = @index.sort_by.with_index{|val, i| order.index(i)} Mikon::DataFrame.new(data, {index: index, labels: @labels}) end |
#tail(num) ⇒ Object
same as tail of Linux
164 165 166 167 |
# File 'lib/mikon/core/dataframe.rb', line 164 def tail(num) last = self.length-1 self[(last-num+1)..last] end |
#to_html(threshold = 50) ⇒ Object
IRuby notebook automatically call this method
179 180 181 182 183 184 185 186 187 188 189 190 191 |
# File 'lib/mikon/core/dataframe.rb', line 179 def to_html(threshold=50) html = "<html><table><tr><td></td>" html += @labels.map{|label| "<th>" + label.to_s + "</th>"}.join html += "</tr>" self.each_row.with_index do |row, pos| next if pos > threshold && pos != self.length-1 html += "<tr><th>" + @index[pos].to_s + "</th>" html += @labels.map{|label| "<td>" + row[label].to_s + "</td>"}.join html += "</tr>" html += "<tr><th>...</th>" + "<td>...</td>"*@labels.length + "</tr>" if pos == threshold end html += "</table>" end |
#to_json(*args) ⇒ Object
Compartible with Nyaplot::DataFrame.to_json
170 171 172 173 174 175 176 |
# File 'lib/mikon/core/dataframe.rb', line 170 def to_json(*args) rows = [] self.each_row do |row| rows.push(row.to_hash) end rows.to_json end |
#to_s(threshold = 50) ⇒ Object
193 194 195 196 197 198 199 200 201 202 203 |
# File 'lib/mikon/core/dataframe.rb', line 193 def to_s(threshold=50) arr = [] self.each_row.with_index do |row, pos| next nil if pos > threshold && pos != self.length-1 arr.push({"" => @index[pos]}.merge(row.to_hash)) if pos == threshold arr.push(@labels.reduce({"" => "..."}){|memo, label| memo[label] = "..."; memo}) end end Formatador.display_table(arr.select{|el| !(el.nil?)}) end |