Class: IsoTree::IsolationForest

Inherits:
Object
  • Object
show all
Defined in:
lib/isotree/isolation_forest.rb

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(sample_size: "auto", ntrees: 500, ndim: 3, ntry: 1, max_depth: "auto", ncols_per_tree: nil, prob_pick_pooled_gain: 0.0, prob_pick_avg_gain: 0.0, prob_pick_full_gain: 0.0, prob_pick_dens: 0.0, prob_pick_col_by_range: 0.0, prob_pick_col_by_var: 0.0, prob_pick_col_by_kurt: 0.0, min_gain: 0.0, missing_action: "auto", new_categ_action: "auto", categ_split_type: "auto", all_perm: false, coef_by_prop: false, weights_as_sample_prob: true, sample_with_replacement: false, penalize_range: false, standardize_data: true, scoring_metric: "depth", fast_bratio: true, weigh_by_kurtosis: false, coefs: "uniform", assume_full_distr: true, min_imp_obs: 3, depth_imp: "higher", weigh_imp_rows: "inverse", random_seed: 1, use_long_double: false, nthreads: -1) ⇒ IsolationForest

Returns a new instance of IsolationForest.



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/isotree/isolation_forest.rb', line 3

def initialize(
  sample_size: "auto", ntrees: 500, ndim: 3, ntry: 1,
  # categ_cols: nil,
  max_depth: "auto", ncols_per_tree: nil,
  prob_pick_pooled_gain: 0.0, prob_pick_avg_gain: 0.0,
  prob_pick_full_gain: 0.0, prob_pick_dens: 0.0,
  prob_pick_col_by_range: 0.0, prob_pick_col_by_var: 0.0, prob_pick_col_by_kurt: 0.0,
  min_gain: 0.0, missing_action: "auto", new_categ_action: "auto",
  categ_split_type: "auto", all_perm: false, coef_by_prop: false,
  # recode_categ: false,
  weights_as_sample_prob: true,
  sample_with_replacement: false, penalize_range: false, standardize_data: true,
  scoring_metric: "depth", fast_bratio: true,
  weigh_by_kurtosis: false, coefs: "uniform", assume_full_distr: true,
  # build_imputer: false,
  min_imp_obs: 3, depth_imp: "higher",
  weigh_imp_rows: "inverse", random_seed: 1, use_long_double: false, nthreads: -1
)

  @sample_size = sample_size
  @ntrees = ntrees
  @ndim = ndim
  @ntry = ntry
  # @categ_cols = categ_cols
  @max_depth = max_depth
  @ncols_per_tree = ncols_per_tree
  @prob_pick_pooled_gain = prob_pick_pooled_gain
  @prob_pick_avg_gain = prob_pick_avg_gain
  @prob_pick_full_gain = prob_pick_full_gain
  @prob_pick_dens = prob_pick_dens
  @prob_pick_col_by_range = prob_pick_col_by_range
  @prob_pick_col_by_var = prob_pick_col_by_var
  @prob_pick_col_by_kurt = prob_pick_col_by_kurt
  @min_gain = min_gain
  @missing_action = missing_action
  @new_categ_action = new_categ_action
  @categ_split_type = categ_split_type
  @all_perm = all_perm
  @coef_by_prop = coef_by_prop
  # @recode_categ = recode_categ
  @weights_as_sample_prob = weights_as_sample_prob
  @sample_with_replacement = sample_with_replacement
  @penalize_range = penalize_range
  @standardize_data = standardize_data
  @scoring_metric = scoring_metric
  @fast_bratio = fast_bratio
  @weigh_by_kurtosis = weigh_by_kurtosis
  @coefs = coefs
  @assume_full_distr = assume_full_distr
  @min_imp_obs = min_imp_obs
  @depth_imp = depth_imp
  @weigh_imp_rows = weigh_imp_rows
  @random_seed = random_seed
  @use_long_double = use_long_double

  # etc module returns virtual cores
  nthreads = Etc.nprocessors if nthreads < 0
  @nthreads = nthreads
end

Class Method Details

.import_model(path) ⇒ Object



112
113
114
115
116
117
118
# File 'lib/isotree/isolation_forest.rb', line 112

def self.import_model(path)
  model = new
  ext_iso_forest,  = Ext.deserialize_combined(path)
  model.instance_variable_set(:@ext_iso_forest, ext_iso_forest)
  model.send(:import_metadata, JSON.parse())
  model
end

Instance Method Details

#export_model(path, add_metada_file: false) ⇒ Object

same format as Python so models are compatible



101
102
103
104
105
106
107
108
109
110
# File 'lib/isotree/isolation_forest.rb', line 101

def export_model(path, add_metada_file: false)
  check_fit

   = 
  if add_metada_file
    # indent 4 spaces like Python
    File.write("#{path}.metadata", JSON.pretty_generate(, indent: "    "))
  end
  Ext.serialize_combined(@ext_iso_forest, path, JSON.generate())
end

#fit(x) ⇒ Object



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
# File 'lib/isotree/isolation_forest.rb', line 63

def fit(x)
  # make export consistent with Python library
  update_params

  x = Dataset.new(x)
  prep_fit(x)
  options = data_options(x).merge(fit_options)

  if options[:sample_size] == "auto"
    options[:sample_size] = [options[:nrows], 10000].min
  end

  # prevent segfault
  options[:sample_size] = options[:nrows] if options[:sample_size] > options[:nrows]

  @ext_iso_forest = Ext.fit_iforest(options)
end

#predict(x, output: "score") ⇒ Object



81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/isotree/isolation_forest.rb', line 81

def predict(x, output: "score")
  check_fit

  x = Dataset.new(x)
  prep_predict(x)

  options = data_options(x).merge(nthreads: @nthreads)
  case output
  when "score"
    options[:standardize] = true
  when "avg_depth"
    options[:standardize] = false
  else
    raise ArgumentError, "Unknown output"
  end

  Ext.predict_iforest(@ext_iso_forest, options)
end