Module: DF::ColumnManagement

Included in:
DataFrame
Defined in:
lib/data_frame/core/column_management.rb

Overview

:nodoc:

Instance Method Summary collapse

Instance Method Details

#append!(column_name, value = nil) ⇒ Object

Adds a unique column to the table

Raises:

  • (ArgumentError)


23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
# File 'lib/data_frame/core/column_management.rb', line 23

def append!(column_name, value=nil)
  raise ArgumentError, "Can't have duplicate column names" if self.labels.include?(column_name)
  self.labels << column_name.to_underscore_sym
  if value.is_a?(Array)
    self.items.each_with_index do |item, i|
      item << value[i]
    end
  else
    self.items.each do |item|
      item << value
    end
  end
  self.columns(true)
  # Because we are tainting the sub arrays, the TaintableArray doesn't know it's been changed.
  self.items.taint
end

#drop!(*labels) ⇒ Object

Drop one or more columns



72
73
74
75
76
77
# File 'lib/data_frame/core/column_management.rb', line 72

def drop!(*labels)
  labels.each do |label|
    drop_one!(label)
  end
  self
end

#drop_one!(label) ⇒ Object

Drop a single column



80
81
82
83
84
85
86
87
88
# File 'lib/data_frame/core/column_management.rb', line 80

def drop_one!(label)
  i = self.labels.index(label)
  return nil unless i
  self.items.each do |item|
    item.delete_at(i)
  end
  self.labels.delete_at(i)
  self
end

#duplicate!(column_name) ⇒ Object

Duplicates a column, the values only. This is useful when creating a related column, such as values by category.



105
106
107
108
109
110
111
# File 'lib/data_frame/core/column_management.rb', line 105

def duplicate!(column_name)
  return false unless self.labels.include?(column_name)
  i = 1
  i += 1 while self.labels.include?(new_column_name(column_name, i))
  self.append!(new_column_name(column_name, i), self.render_column(column_name).dup)
  true
end

#move_to_last!(orig_name) ⇒ Object

Raises:

  • (ArgumentError)


4
5
6
7
8
9
10
# File 'lib/data_frame/core/column_management.rb', line 4

def move_to_last!(orig_name)
  raise ArgumentError, "Column not found" unless self.labels.include?(orig_name)
  new_name = (orig_name.to_s + "_a_unique_name").to_sym
  self.append!(new_name, self.render_column(orig_name))
  self.drop!(orig_name)
  self.rename!(orig_name, new_name)
end

#rename!(new_name, orig_name) ⇒ Object

In the order of alias: new_name, orig_name

Raises:

  • (ArgumentError)


13
14
15
16
17
18
19
20
# File 'lib/data_frame/core/column_management.rb', line 13

def rename!(new_name, orig_name)
  new_name = new_name.to_underscore_sym
  orig_name = orig_name.to_underscore_sym
  raise ArgumentError, "Column not found" unless self.labels.include?(orig_name)
  raise ArgumentError, "Cannot name #{orig_name} to #{new_name}, that column already exists." if self.labels.include?(new_name)
  i = self.labels.index(orig_name)
  self.labels[i] = new_name
end

#replace!(column, values = nil, &block) ⇒ Object



40
41
42
43
44
45
46
47
48
# File 'lib/data_frame/core/column_management.rb', line 40

def replace!(column, values=nil, &block)
  column = validate_column(column)
  if not values
    values = self.send(column)
    values.map! {|e| block.call(e)}
  end
  replace_column!(column, values)
  self
end

#replace_column!(column, values) ⇒ Object

Replace a single column with an array of values. It is helpful to have the values the same size as the rest of the data frame.



53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/data_frame/core/column_management.rb', line 53

def replace_column!(column, values)
  store_range_hashes
  column = validate_column(column)
  index = self.labels.index(column)
  @items.each_with_index do |item, i|
    item[index] = values[i]
  end
  
  # Make sure we recalculate things after changing a column
  self.items.taint
  @columns = nil
  self.columns
  restore_range_hashes
  
  # Return the items
  @items
end

#subset_from_columns(*cols) ⇒ Object

Creates a new data frame, only with the specified columns.



91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/data_frame/core/column_management.rb', line 91

def subset_from_columns(*cols)
  new_labels = self.labels.inject([]) do |list, label|
    list << label if cols.include?(label)
    list
  end
  new_data_frame = DataFrame.new(*self.labels)
  new_data_frame.import(self.items)
  self.labels.each do |label|
    new_data_frame.drop!(label) unless new_labels.include?(label)
  end
  new_data_frame
end