Class: IsoTree::Dataset

Inherits:
Object
  • Object
show all
Defined in:
lib/isotree/dataset.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data) ⇒ Dataset

Returns a new instance of Dataset.

Raises:

  • (ArgumentError)


5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/isotree/dataset.rb', line 5

def initialize(data)
  @data = data

  if defined?(Rover::DataFrame) && data.is_a?(Rover::DataFrame)
    @vectors = data.vectors
    @numeric_columns, @categorical_columns = data.keys.partition { |k, v| ![:object, :bool].include?(data[k].type) }
    @array_type = false
  elsif defined?(Numo::NArray) && data.is_a?(Numo::NArray)
    raise ArgumentError, "Input must have 2 dimensions" if data.ndim != 2

    data = data.cast_to(Numo::DFloat)
    ncols = data.shape[1]

    @numeric_columns = ncols.times.to_a
    @categorical_columns = []

    @vectors = {}
    @numeric_columns.each do |k|
      @vectors[k] = data[true, k]
    end
    @array_type = true
  else
    data = data.to_a

    hashes = data.all? { |d| d.is_a?(Hash) }
    arrays = !hashes && data.all? { |d| d.is_a?(Array) }
    unless hashes || arrays
      raise ArgumentError, "Array elements must be all hashes or arrays"
    end

    ncols = data.first ? data.first.size : 0
    if data.any? { |r| r.size != ncols }
      raise ArgumentError, "All rows must have the same number of columns"
    end

    keys =
      if hashes
        data.flat_map(&:keys).uniq
      else
        ncols.times.to_a
      end

    @vectors = {}
    keys.each do |k|
      @vectors[k] = []
    end
    data.each do |d|
      keys.each do |k|
        @vectors[k] << d[k]
      end
    end

    @numeric_columns, @categorical_columns = keys.partition { |k| @vectors[k].all? { |v| v.nil? || v.is_a?(Numeric) } }
    @array_type = arrays
  end

  raise ArgumentError, "No data" if size == 0
end

Instance Attribute Details

#array_typeObject (readonly)

Returns the value of attribute array_type.



3
4
5
# File 'lib/isotree/dataset.rb', line 3

def array_type
  @array_type
end

#categorical_columnsObject (readonly)

Returns the value of attribute categorical_columns.



3
4
5
# File 'lib/isotree/dataset.rb', line 3

def categorical_columns
  @categorical_columns
end

#numeric_columnsObject (readonly)

Returns the value of attribute numeric_columns.



3
4
5
# File 'lib/isotree/dataset.rb', line 3

def numeric_columns
  @numeric_columns
end

Instance Method Details

#[](k) ⇒ Object



64
65
66
# File 'lib/isotree/dataset.rb', line 64

def [](k)
  @vectors[k]
end

#sizeObject



68
69
70
# File 'lib/isotree/dataset.rb', line 68

def size
  @vectors.any? ? @vectors.values.first.size : 0
end