Class: IsoTree::IsolationForest
- Inherits:
-
Object
- Object
- IsoTree::IsolationForest
- Defined in:
- lib/isotree/isolation_forest.rb
Class Method Summary collapse
Instance Method Summary collapse
-
#export_model(path, add_metada_file: false) ⇒ Object
same format as Python so models are compatible.
- #fit(x) ⇒ Object
-
#initialize(sample_size: "auto", ntrees: 500, ndim: 3, ntry: 1, max_depth: "auto", ncols_per_tree: nil, prob_pick_pooled_gain: 0.0, prob_pick_avg_gain: 0.0, prob_pick_full_gain: 0.0, prob_pick_dens: 0.0, prob_pick_col_by_range: 0.0, prob_pick_col_by_var: 0.0, prob_pick_col_by_kurt: 0.0, min_gain: 0.0, missing_action: "auto", new_categ_action: "auto", categ_split_type: "auto", all_perm: false, coef_by_prop: false, weights_as_sample_prob: true, sample_with_replacement: false, penalize_range: false, standardize_data: true, scoring_metric: "depth", fast_bratio: true, weigh_by_kurtosis: false, coefs: "uniform", assume_full_distr: true, min_imp_obs: 3, depth_imp: "higher", weigh_imp_rows: "inverse", random_seed: 1, use_long_double: false, nthreads: -1) ⇒ IsolationForest
constructor
A new instance of IsolationForest.
- #predict(x, output: "score") ⇒ Object
Constructor Details
#initialize(sample_size: "auto", ntrees: 500, ndim: 3, ntry: 1, max_depth: "auto", ncols_per_tree: nil, prob_pick_pooled_gain: 0.0, prob_pick_avg_gain: 0.0, prob_pick_full_gain: 0.0, prob_pick_dens: 0.0, prob_pick_col_by_range: 0.0, prob_pick_col_by_var: 0.0, prob_pick_col_by_kurt: 0.0, min_gain: 0.0, missing_action: "auto", new_categ_action: "auto", categ_split_type: "auto", all_perm: false, coef_by_prop: false, weights_as_sample_prob: true, sample_with_replacement: false, penalize_range: false, standardize_data: true, scoring_metric: "depth", fast_bratio: true, weigh_by_kurtosis: false, coefs: "uniform", assume_full_distr: true, min_imp_obs: 3, depth_imp: "higher", weigh_imp_rows: "inverse", random_seed: 1, use_long_double: false, nthreads: -1) ⇒ IsolationForest
Returns a new instance of IsolationForest.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
# File 'lib/isotree/isolation_forest.rb', line 3 def initialize( sample_size: "auto", ntrees: 500, ndim: 3, ntry: 1, # categ_cols: nil, max_depth: "auto", ncols_per_tree: nil, prob_pick_pooled_gain: 0.0, prob_pick_avg_gain: 0.0, prob_pick_full_gain: 0.0, prob_pick_dens: 0.0, prob_pick_col_by_range: 0.0, prob_pick_col_by_var: 0.0, prob_pick_col_by_kurt: 0.0, min_gain: 0.0, missing_action: "auto", new_categ_action: "auto", categ_split_type: "auto", all_perm: false, coef_by_prop: false, # recode_categ: false, weights_as_sample_prob: true, sample_with_replacement: false, penalize_range: false, standardize_data: true, scoring_metric: "depth", fast_bratio: true, weigh_by_kurtosis: false, coefs: "uniform", assume_full_distr: true, # build_imputer: false, min_imp_obs: 3, depth_imp: "higher", weigh_imp_rows: "inverse", random_seed: 1, use_long_double: false, nthreads: -1 ) @sample_size = sample_size @ntrees = ntrees @ndim = ndim @ntry = ntry # @categ_cols = categ_cols @max_depth = max_depth @ncols_per_tree = ncols_per_tree @prob_pick_pooled_gain = prob_pick_pooled_gain @prob_pick_avg_gain = prob_pick_avg_gain @prob_pick_full_gain = prob_pick_full_gain @prob_pick_dens = prob_pick_dens @prob_pick_col_by_range = prob_pick_col_by_range @prob_pick_col_by_var = prob_pick_col_by_var @prob_pick_col_by_kurt = prob_pick_col_by_kurt @min_gain = min_gain @missing_action = missing_action @new_categ_action = new_categ_action @categ_split_type = categ_split_type @all_perm = all_perm @coef_by_prop = coef_by_prop # @recode_categ = recode_categ @weights_as_sample_prob = weights_as_sample_prob @sample_with_replacement = sample_with_replacement @penalize_range = penalize_range @standardize_data = standardize_data @scoring_metric = scoring_metric @fast_bratio = fast_bratio @weigh_by_kurtosis = weigh_by_kurtosis @coefs = coefs @assume_full_distr = assume_full_distr @min_imp_obs = min_imp_obs @depth_imp = depth_imp @weigh_imp_rows = weigh_imp_rows @random_seed = random_seed @use_long_double = use_long_double # etc module returns virtual cores nthreads = Etc.nprocessors if nthreads < 0 @nthreads = nthreads end |
Class Method Details
.import_model(path) ⇒ Object
112 113 114 115 116 117 118 |
# File 'lib/isotree/isolation_forest.rb', line 112 def self.import_model(path) model = new ext_iso_forest, = Ext.deserialize_combined(path) model.instance_variable_set(:@ext_iso_forest, ext_iso_forest) model.send(:import_metadata, JSON.parse()) model end |
Instance Method Details
#export_model(path, add_metada_file: false) ⇒ Object
same format as Python so models are compatible
101 102 103 104 105 106 107 108 109 110 |
# File 'lib/isotree/isolation_forest.rb', line 101 def export_model(path, add_metada_file: false) check_fit = if # indent 4 spaces like Python File.write("#{path}.metadata", JSON.pretty_generate(, indent: " ")) end Ext.serialize_combined(@ext_iso_forest, path, JSON.generate()) end |
#fit(x) ⇒ Object
63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 |
# File 'lib/isotree/isolation_forest.rb', line 63 def fit(x) # make export consistent with Python library update_params x = Dataset.new(x) prep_fit(x) = (x).merge() if [:sample_size] == "auto" [:sample_size] = [[:nrows], 10000].min end # prevent segfault [:sample_size] = [:nrows] if [:sample_size] > [:nrows] @ext_iso_forest = Ext.fit_iforest() end |
#predict(x, output: "score") ⇒ Object
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 |
# File 'lib/isotree/isolation_forest.rb', line 81 def predict(x, output: "score") check_fit x = Dataset.new(x) prep_predict(x) = (x).merge(nthreads: @nthreads) case output when "score" [:standardize] = true when "avg_depth" [:standardize] = false else raise ArgumentError, "Unknown output" end Ext.predict_iforest(@ext_iso_forest, ) end |