Class: DataFrame

Inherits:
Object
  • Object
show all
Extended by:
DF::Import::ClassMethods
Includes:
DF::ARFF, DF::ColumnManagement, DF::Filter, DF::ID3, DF::Import::InstanceMethods, DF::KMeans, DF::MLP, DF::PreProcess, DF::SBN, DF::Saving, Training
Defined in:
lib/data_frame/model.rb,
lib/data_frame/id3.rb,
lib/data_frame/mlp.rb,
lib/data_frame/sbn.rb,
lib/data_frame/arff.rb,
lib/data_frame/kmeans.rb,
lib/data_frame/data_frame.rb,
lib/data_frame/core/filter.rb,
lib/data_frame/core/import.rb,
lib/data_frame/core/saving.rb,
lib/data_frame/core/training.rb,
lib/data_frame/core/pre_process.rb,
lib/data_frame/core/column_management.rb

Overview

This allows me to have named columns and optionally named rows in a data frame, to work calculations (usually on the columns), to transpose the matrix and store the transposed matrix until the object is tainted.

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from DF::Import::ClassMethods

from_csv

Methods included from DF::ColumnManagement

#append!, #drop!, #drop_one!, #duplicate!, #move_to_last!, #rename!, #replace!, #replace_column!, #subset_from_columns

Methods included from DF::PreProcess

#categorize!, #j_binary_ize!, #numericize!

Methods included from Training

#test_set, #training_set

Methods included from DF::Saving

#save

Methods included from DF::Import::InstanceMethods

#add_item, #import

Methods included from DF::Filter

#filter, #filter!, #filter_by_category, #filter_by_category!

Methods included from DF::ARFF

#to_arff, #to_csv

Methods included from DF::ID3

#create_id3, #id3

Constructor Details

#initialize(*labels) ⇒ DataFrame

Returns a new instance of DataFrame.



21
22
23
24
25
# File 'lib/data_frame/data_frame.rb', line 21

def initialize(*labels)
  labels = labels.first if labels.size == 1 and labels.first.is_a?(Array)
  @labels = labels.map {|e| e.to_underscore_sym }
  @items = TransposableArray.new
end

Dynamic Method Handling

This class handles dynamic methods through the method_missing method

#method_missing(sym, *args, &block) ⇒ Object



70
71
72
73
74
75
76
77
78
79
80
# File 'lib/data_frame/data_frame.rb', line 70

def method_missing(sym, *args, &block)
  if self.labels.include?(sym)
    render_column(sym)
  elsif self.row_labels.include?(sym)
    render_row(sym)
  elsif @items.respond_to?(sym)
    @items.send(sym, *args, &block)
  else
    super
  end
end

Instance Attribute Details

#itemsObject Also known as: rows

The items stored in the frame



16
17
18
# File 'lib/data_frame/data_frame.rb', line 16

def items
  @items
end

#labelsObject (readonly) Also known as: variables

The labels of the data items



12
13
14
# File 'lib/data_frame/data_frame.rb', line 12

def labels
  @labels
end

#nameObject

An optional name, useful for arff files



19
20
21
# File 'lib/data_frame/data_frame.rb', line 19

def name
  @name
end

Instance Method Details

#columns(reset = false) ⇒ Object Also known as: to_hash, to_dictionary

The columns as a Dictionary or Hash This is cached, call columns(true) to reset the cache.



54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/data_frame/data_frame.rb', line 54

def columns(reset=false)
  @columns = nil if reset
  return @columns if @columns
  
  container = defined?(Dictionary) ? Dictionary.new : Hash.new
  i = 0
  
  @columns = @items.transpose.inject(container) do |cont, col|
    cont[@labels[i]] = col
    i += 1
    cont
  end
end

#inspectObject



7
8
9
# File 'lib/data_frame/data_frame.rb', line 7

def inspect
  "DataFrame rows: #{self.rows.size} labels: #{self.labels.inspect}"
end

#model(name = nil, &block) ⇒ Object

Returns a model if defined Defines a model with a block, if given and not defined Stores the model in the models container, which gives us access like: df.models.new_model_name…



8
9
10
11
12
13
14
15
16
# File 'lib/data_frame/model.rb', line 8

def model(name=nil, &block)
  return self.models[name] if self.models.table.keys.include?(name)
  return false unless block
  @pc = ParameterCapture.new(&block)
  model = self.filter(Hash) do |row|
    @pc.filter(row)
  end
  self.models.table[name] = model
end

#modelsObject



18
19
20
# File 'lib/data_frame/model.rb', line 18

def models
  @models ||= OpenStruct.new
end

#render_column(sym) ⇒ Object

Return the column, given its name



46
47
48
49
50
# File 'lib/data_frame/data_frame.rb', line 46

def render_column(sym)
  i = @labels.index(sym.to_underscore_sym)
  return nil unless i
  @items.transpose[i]
end

#render_row(sym) ⇒ Object



39
40
41
42
43
# File 'lib/data_frame/data_frame.rb', line 39

def render_row(sym)
  i = self.row_labels.index(sym)
  return nil unless i
  @items[i]
end

#row_labelsObject



27
28
29
# File 'lib/data_frame/data_frame.rb', line 27

def row_labels
  @row_labels ||= []
end

#row_labels=(ary) ⇒ Object

Raises:

  • (ArgumentError)


31
32
33
34
# File 'lib/data_frame/data_frame.rb', line 31

def row_labels=(ary)
  raise ArgumentError, "Row labels must be an array" unless ary.is_a?(Array)
  @row_labels = ary
end