Class: LightGBM::Dataset

Inherits:
Object
  • Object
show all
Includes:
Utils
Defined in:
lib/lightgbm/dataset.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto", feature_names: nil) ⇒ Dataset

Returns a new instance of Dataset.



5
6
7
8
9
10
11
12
13
14
15
16
17
# File 'lib/lightgbm/dataset.rb', line 5

def initialize(data, label: nil, weight: nil, group: nil, params: nil, reference: nil, used_indices: nil, categorical_feature: "auto", feature_names: nil)
  @data = data
  @label = label
  @weight = weight
  @group = group
  @params = params
  @reference = reference
  @used_indices = used_indices
  @categorical_feature = categorical_feature
  @feature_names = feature_names

  construct
end

Instance Attribute Details

#dataObject (readonly)

Returns the value of attribute data.



3
4
5
# File 'lib/lightgbm/dataset.rb', line 3

def data
  @data
end

#paramsObject (readonly)

Returns the value of attribute params.



3
4
5
# File 'lib/lightgbm/dataset.rb', line 3

def params
  @params
end

Class Method Details

.finalize(addr) ⇒ Object



112
113
114
115
# File 'lib/lightgbm/dataset.rb', line 112

def self.finalize(addr)
  # must use proc instead of stabby lambda
  proc { FFI.LGBM_DatasetFree(::FFI::Pointer.new(:pointer, addr)) }
end

Instance Method Details

#feature_namesObject



27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/lightgbm/dataset.rb', line 27

def feature_names
  # must preallocate space
  num_feature_names = ::FFI::MemoryPointer.new(:int)
  out_buffer_len = ::FFI::MemoryPointer.new(:size_t)
  len = 1000
  out_strs = ::FFI::MemoryPointer.new(:pointer, len)
  buffer_len = 255
  str_ptrs = len.times.map { ::FFI::MemoryPointer.new(:char, buffer_len) }
  out_strs.write_array_of_pointer(str_ptrs)
  check_result FFI.LGBM_DatasetGetFeatureNames(handle_pointer, len, num_feature_names, buffer_len, out_buffer_len, out_strs)

  num_features = num_feature_names.read_int
  actual_len = out_buffer_len.read(:size_t)
  if num_features > len || actual_len > buffer_len
    out_strs = ::FFI::MemoryPointer.new(:pointer, num_features) if num_features > len
    str_ptrs = num_features.times.map { ::FFI::MemoryPointer.new(:char, actual_len) }
    out_strs.write_array_of_pointer(str_ptrs)
    check_result FFI.LGBM_DatasetGetFeatureNames(handle_pointer, num_features, num_feature_names, actual_len, out_buffer_len, out_strs)
  end

  # should be the same, but get number of features
  # from most recent call (instead of num_features)
  str_ptrs[0, num_feature_names.read_int].map(&:read_string)
end

#feature_names=(feature_names) ⇒ Object



67
68
69
70
71
72
# File 'lib/lightgbm/dataset.rb', line 67

def feature_names=(feature_names)
  @feature_names = feature_names
  c_feature_names = ::FFI::MemoryPointer.new(:pointer, feature_names.size)
  c_feature_names.write_array_of_pointer(feature_names.map { |v| ::FFI::MemoryPointer.from_string(v) })
  check_result FFI.LGBM_DatasetSetFeatureNames(handle_pointer, c_feature_names, feature_names.size)
end

#group=(group) ⇒ Object



62
63
64
65
# File 'lib/lightgbm/dataset.rb', line 62

def group=(group)
  @group = group
  set_field("group", group, type: :int32)
end

#handle_pointerObject



108
109
110
# File 'lib/lightgbm/dataset.rb', line 108

def handle_pointer
  @handle.read_pointer
end

#labelObject



19
20
21
# File 'lib/lightgbm/dataset.rb', line 19

def label
  field("label")
end

#label=(label) ⇒ Object



52
53
54
55
# File 'lib/lightgbm/dataset.rb', line 52

def label=(label)
  @label = label
  set_field("label", label)
end

#num_dataObject



82
83
84
85
86
# File 'lib/lightgbm/dataset.rb', line 82

def num_data
  out = ::FFI::MemoryPointer.new(:int)
  check_result FFI.LGBM_DatasetGetNumData(handle_pointer, out)
  out.read_int
end

#num_featureObject



88
89
90
91
92
# File 'lib/lightgbm/dataset.rb', line 88

def num_feature
  out = ::FFI::MemoryPointer.new(:int)
  check_result FFI.LGBM_DatasetGetNumFeature(handle_pointer, out)
  out.read_int
end

#reference=(reference) ⇒ Object

TODO only update reference if not in chain



75
76
77
78
79
80
# File 'lib/lightgbm/dataset.rb', line 75

def reference=(reference)
  if reference != @reference
    @reference = reference
    construct
  end
end

#save_binary(filename) ⇒ Object



94
95
96
# File 'lib/lightgbm/dataset.rb', line 94

def save_binary(filename)
  check_result FFI.LGBM_DatasetSaveBinary(handle_pointer, filename)
end

#subset(used_indices, params: nil) ⇒ Object



98
99
100
101
102
103
104
105
106
# File 'lib/lightgbm/dataset.rb', line 98

def subset(used_indices, params: nil)
  # categorical_feature passed via params
  params ||= self.params
  Dataset.new(nil,
    params: params,
    reference: self,
    used_indices: used_indices
  )
end

#weightObject



23
24
25
# File 'lib/lightgbm/dataset.rb', line 23

def weight
  field("weight")
end

#weight=(weight) ⇒ Object



57
58
59
60
# File 'lib/lightgbm/dataset.rb', line 57

def weight=(weight)
  @weight = weight
  set_field("weight", weight)
end