Module: LightGBM

Defined in:: lib/lightgbm.rb,
lib/lightgbm/ffi.rb,
lib/lightgbm/model.rb,
lib/lightgbm/utils.rb,
lib/lightgbm/ranker.rb,
lib/lightgbm/booster.rb,
lib/lightgbm/dataset.rb,
lib/lightgbm/version.rb,
lib/lightgbm/regressor.rb,
lib/lightgbm/classifier.rb

Defined Under Namespace

Modules: FFI, Utils Classes: Booster, Classifier, Dataset, Error, Model, Ranker, Regressor

Constant Summary collapse

VERSION =

"0.3.4"

Class Attribute Summary collapse

.ffi_lib ⇒ Object

Returns the value of attribute ffi_lib.

Class Method Summary collapse

Class Attribute Details

.ffi_lib ⇒ `Object`

Returns the value of attribute ffi_lib.



20
21
22

# File 'lib/lightgbm.rb', line 20

def ffi_lib
  @ffi_lib
end

Class Method Details

.cv(params, train_set, num_boost_round: 100, nfold: 5, seed: 0, shuffle: true, early_stopping_rounds: nil, verbose_eval: nil, show_stdv: true) ⇒ `Object`

# File 'lib/lightgbm.rb', line 113

def cv(params, train_set, num_boost_round: 100, nfold: 5, seed: 0, shuffle: true, early_stopping_rounds: nil, verbose_eval: nil, show_stdv: true)
  rand_idx = (0...train_set.num_data).to_a
  rand_idx.shuffle!(random: Random.new(seed)) if shuffle

  kstep = rand_idx.size / nfold
  test_id = rand_idx.each_slice(kstep).to_a[0...nfold]
  train_id = []
  nfold.times do |i|
    idx = test_id.dup
    idx.delete_at(i)
    train_id << idx.flatten
  end

  boosters = []
  folds = train_id.zip(test_id)
  folds.each do |(train_idx, test_idx)|
    fold_train_set = train_set.subset(train_idx)
    fold_valid_set = train_set.subset(test_idx)
    booster = Booster.new(params: params, train_set: fold_train_set)
    booster.add_valid(fold_valid_set, "valid")
    boosters << booster
  end

  eval_hist = {}

  if early_stopping_rounds
    best_score = {}
    best_iter = {}
    best_iteration = nil
  end

  num_boost_round.times do |iteration|
    boosters.each(&:update)

    scores = {}
    boosters.map(&:eval_valid).flat_map(&:reverse).each do |r|
      (scores[r[1]] ||= []) << r[2]
    end

    message_parts = ["[#{iteration + 1}]"]

    means = {}
    scores.each do |eval_name, vals|
      mean = mean(vals)
      stdev = stdev(vals)

      (eval_hist["#{eval_name}-mean"] ||= []) << mean
      (eval_hist["#{eval_name}-stdv"] ||= []) << stdev

      means[eval_name] = mean

      if show_stdv
        message_parts << "cv_agg's %s: %g + %g" % [eval_name, mean, stdev]
      else
        message_parts << "cv_agg's %s: %g" % [eval_name, mean]
      end
    end

    puts message_parts.join("\t") if verbose_eval

    if early_stopping_rounds
      stop_early = false
      means.each do |k, score|
        # TODO fix higher better
        if best_score[k].nil? || score < best_score[k]
          best_score[k] = score
          best_iter[k] = iteration
        elsif iteration - best_iter[k] >= early_stopping_rounds
          best_iteration = best_iter[k]
          stop_early = true
          break
        end
      end
      break if stop_early
    end
  end

  if early_stopping_rounds
    # use best iteration from first metric if not stopped early
    best_iteration ||= best_iter[best_iter.keys.first]
    eval_hist.each_key do |k|
      eval_hist[k] = eval_hist[k].first(best_iteration + 1)
    end
  end

  eval_hist
end

.train(params, train_set, num_boost_round: 100, valid_sets: [], valid_names: [], early_stopping_rounds: nil, verbose_eval: true) ⇒ `Object`

Raises:

(ArgumentError)

# File 'lib/lightgbm.rb', line 35

def train(params, train_set, num_boost_round: 100, valid_sets: [], valid_names: [], early_stopping_rounds: nil, verbose_eval: true)
  booster = Booster.new(params: params, train_set: train_set)

  valid_contain_train = false
  valid_sets.zip(valid_names).each_with_index do |(data, name), i|
    if data == train_set
      booster.train_data_name = name || "training"
      valid_contain_train = true
    else
      # ensure the validation set references the training set
      data.reference = train_set
      booster.add_valid(data, name || "valid_#{i}")
    end
  end

  raise ArgumentError, "For early stopping, at least one validation set is required" if early_stopping_rounds && !valid_sets.any? { |v| v != train_set }

  booster.best_iteration = 0

  if early_stopping_rounds
    best_score = []
    best_iter = []
    best_message = []

    puts "Training until validation scores don't improve for #{early_stopping_rounds.to_i} rounds." if verbose_eval
  end

  num_boost_round.times do |iteration|
    booster.update

    if valid_sets.any?
      # print results
      messages = []

      eval_valid = booster.eval_valid
      if valid_contain_train
        eval_valid = eval_valid + booster.eval_train
      end
      # not sure why reversed in output
      eval_valid.reverse!

      eval_valid.each do |res|
        messages << "%s's %s: %g" % [res[0], res[1], res[2]]
      end

      message = "[#{iteration + 1}]\t#{messages.join("\t")}"

      puts message if verbose_eval

      if early_stopping_rounds
        stop_early = false
        eval_valid.each_with_index do |(_, _, score, higher_better), i|
          op = higher_better ? :> : :<
          if best_score[i].nil? || score.send(op, best_score[i])
            best_score[i] = score
            best_iter[i] = iteration
            best_message[i] = message
          elsif iteration - best_iter[i] >= early_stopping_rounds
            booster.best_iteration = best_iter[i] + 1
            puts "Early stopping, best iteration is:\n#{best_message[i]}" if verbose_eval
            stop_early = true
            break
          end
        end

        break if stop_early

        if iteration == num_boost_round - 1
          booster.best_iteration = best_iter[0] + 1
          puts "Did not meet early stopping. Best iteration is: #{best_message[0]}" if verbose_eval
        end
      end
    end
  end

  booster
end

Module: LightGBM

Defined Under Namespace

Constant Summary collapse

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

.ffi_lib ⇒ Object

Class Method Details

.cv(params, train_set, num_boost_round: 100, nfold: 5, seed: 0, shuffle: true, early_stopping_rounds: nil, verbose_eval: nil, show_stdv: true) ⇒ Object

.train(params, train_set, num_boost_round: 100, valid_sets: [], valid_names: [], early_stopping_rounds: nil, verbose_eval: true) ⇒ Object

.ffi_lib ⇒ `Object`

.cv(params, train_set, num_boost_round: 100, nfold: 5, seed: 0, shuffle: true, early_stopping_rounds: nil, verbose_eval: nil, show_stdv: true) ⇒ `Object`

.train(params, train_set, num_boost_round: 100, valid_sets: [], valid_names: [], early_stopping_rounds: nil, verbose_eval: true) ⇒ `Object`