Module: Rumale::Dataset
- Defined in:
- lib/rumale/dataset.rb
Overview
Module for loading and saving a dataset file.
Class Method Summary collapse
-
.dump_libsvm_file(data, labels, filename, zero_based: false) ⇒ Object
Dump the dataset with the libsvm file format.
-
.load_libsvm_file(filename, zero_based: false, dtype: Numo::DFloat) ⇒ Array<Numo::NArray>
Load a dataset with the libsvm file format into Numo::NArray.
-
.make_circles(n_samples, shuffle: true, noise: nil, factor: 0.8, random_seed: nil) ⇒ Object
Generate a two-dimensional data set consisting of an inner circle and an outer circle.
-
.make_moons(n_samples, shuffle: true, noise: nil, random_seed: nil) ⇒ Object
Generate a two-dimensional data set consisting of two half circles shifted.
Class Method Details
.dump_libsvm_file(data, labels, filename, zero_based: false) ⇒ Object
Dump the dataset with the libsvm file format.
38 39 40 41 42 43 44 45 46 47 48 49 50 |
# File 'lib/rumale/dataset.rb', line 38 def dump_libsvm_file(data, labels, filename, zero_based: false) n_samples = [data.shape[0], labels.shape[0]].min single_label = labels.shape[1].nil? label_type = detect_dtype(labels) value_type = detect_dtype(data) File.open(filename, 'w') do |file| n_samples.times do |n| label = single_label ? labels[n] : labels[n, true].to_a file.puts(dump_libsvm_line(label, data[n, true], label_type, value_type, zero_based)) end end end |
.load_libsvm_file(filename, zero_based: false, dtype: Numo::DFloat) ⇒ Array<Numo::NArray>
Load a dataset with the libsvm file format into Numo::NArray.
19 20 21 22 23 24 25 26 27 28 29 30 |
# File 'lib/rumale/dataset.rb', line 19 def load_libsvm_file(filename, zero_based: false, dtype: Numo::DFloat) ftvecs = [] labels = [] n_features = 0 CSV.foreach(filename, col_sep: "\s", headers: false) do |line| label, ftvec, max_idx = parse_libsvm_line(line, zero_based) labels.push(label) ftvecs.push(ftvec) n_features = max_idx if n_features < max_idx end [convert_to_matrix(ftvecs, n_features, dtype), Numo::NArray.asarray(labels)] end |
.make_circles(n_samples, shuffle: true, noise: nil, factor: 0.8, random_seed: nil) ⇒ Object
Generate a two-dimensional data set consisting of an inner circle and an outer circle.
60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 |
# File 'lib/rumale/dataset.rb', line 60 def make_circles(n_samples, shuffle: true, noise: nil, factor: 0.8, random_seed: nil) Rumale::Validation.check_params_integer(n_samples: n_samples) Rumale::Validation.check_params_boolean(shuffle: shuffle) Rumale::Validation.check_params_type_or_nil(Float, noise: noise) Rumale::Validation.check_params_float(factor: factor) Rumale::Validation.check_params_type_or_nil(Integer, random_seed: random_seed) raise ArgumentError, 'The number of samples must be more than 2.' if n_samples <= 1 raise RangeError, 'The interval of factor is (0, 1).' if factor <= 0 || factor >= 1 # initialize some variables. rs = random_seed rs ||= srand rng = Random.new(rs) n_samples_out = n_samples.fdiv(2).to_i n_samples_in = n_samples - n_samples_out # make two circles. linsp_out = Numo::DFloat.linspace(0, 2 * Math::PI, n_samples_out) linsp_in = Numo::DFloat.linspace(0, 2 * Math::PI, n_samples_in) circle_out = Numo::DFloat[Numo::NMath.cos(linsp_out), Numo::NMath.sin(linsp_out)].transpose circle_in = Numo::DFloat[Numo::NMath.cos(linsp_in), Numo::NMath.sin(linsp_in)].transpose x = Numo::DFloat.vstack([circle_out, factor * circle_in]) y = Numo::Int32.hstack([Numo::Int32.zeros(n_samples_out), Numo::Int32.ones(n_samples_in)]) # shuffle data indices. if shuffle rand_ids = [*0...n_samples].shuffle(random: rng.dup) x = x[rand_ids, true].dup y = y[rand_ids].dup end # add gaussian noise. x += Rumale::Utils.rand_normal(x.shape, rng.dup, 0.0, noise) unless noise.nil? [x, y] end |
.make_moons(n_samples, shuffle: true, noise: nil, random_seed: nil) ⇒ Object
Generate a two-dimensional data set consisting of two half circles shifted.
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 |
# File 'lib/rumale/dataset.rb', line 99 def make_moons(n_samples, shuffle: true, noise: nil, random_seed: nil) Rumale::Validation.check_params_integer(n_samples: n_samples) Rumale::Validation.check_params_boolean(shuffle: shuffle) Rumale::Validation.check_params_type_or_nil(Float, noise: noise) Rumale::Validation.check_params_type_or_nil(Integer, random_seed: random_seed) raise ArgumentError, 'The number of samples must be more than 2.' if n_samples <= 1 # initialize some variables. rs = random_seed rs ||= srand rng = Random.new(rs) n_samples_out = n_samples.fdiv(2).to_i n_samples_in = n_samples - n_samples_out # make two half circles. linsp_out = Numo::DFloat.linspace(0, Math::PI, n_samples_out) linsp_in = Numo::DFloat.linspace(0, Math::PI, n_samples_in) circle_out = Numo::DFloat[Numo::NMath.cos(linsp_out), Numo::NMath.sin(linsp_out)].transpose circle_in = Numo::DFloat[1 - Numo::NMath.cos(linsp_in), 1 - Numo::NMath.sin(linsp_in) - 0.5].transpose x = Numo::DFloat.vstack([circle_out, circle_in]) y = Numo::Int32.hstack([Numo::Int32.zeros(n_samples_out), Numo::Int32.ones(n_samples_in)]) # shuffle data indices. if shuffle rand_ids = [*0...n_samples].shuffle(random: rng.dup) x = x[rand_ids, true].dup y = y[rand_ids].dup end # add gaussian noise. x += Rumale::Utils.rand_normal(x.shape, rng.dup, 0.0, noise) unless noise.nil? [x, y] end |