Module: DF::PreProcess

Included in:
DataFrame
Defined in:
lib/data_frame/core/pre_process.rb

Overview

:nodoc:

Instance Method Summary collapse

Instance Method Details

#j_binary_ize!(*columns) ⇒ Object

A weird name. This creates a column for every category in a column and marks each row by its value



5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
# File 'lib/data_frame/core/pre_process.rb', line 5

def j_binary_ize!(*columns)
  # Allows to mix a hash with the columns.
  options = columns.find_all {|e| e.is_a?(Hash)}.inject({}) {|h, e| h.merge!(e)}
  columns.delete_if {|e| e.is_a?(Hash)}

  # Generates new columns
  columns.each do |col|
    values = render_column(col.to_underscore_sym)
    values.categories.each do |category|
      full_name = (col.to_s + "_" + category.to_s).to_sym
      if options[:allow_overlap]
        category_map = values.inject([]) do |list, e|
          list << values.all_categories(e)
        end
        self.append!(full_name, category_map.map{|e| e.include?(category)})
      else
        self.append!(full_name, values.category_map.map{|e| e == category})
      end
    end
  end
end

#numericize!(*columns) ⇒ Object

Adds a column, numerical_column_name that shows the same data as a nominal value, but as a number.



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'lib/data_frame/core/pre_process.rb', line 29

def numericize!(*columns)
  columns.each do |col|
    values = render_column(col.to_underscore_sym)
    categories = values.categories
    value_categories = values.map {|v| values.category(v)}

    i = 0
    category_map = value_categories.uniq.inject({}) do |h, c|
      h[c] = i
      i += 1
      h
    end

    blank = Array.new(category_map.size, 0)
    reverse_category_map = category_map.inject({}) {|h, e| h[e.last] = e.first; h}

    new_values = values.inject([]) do |list, val|
      a = blank.dup
      a[category_map[values.category(val)]] = 1
      list << a
    end

    new_name = "numerical #{col.to_s}".to_underscore_sym
    self.append!(new_name, new_values)
  end
end