Class: OutlierTree::Model

Inherits:
Object
  • Object
show all
Defined in:
lib/outliertree/model.rb

Instance Method Summary collapse

Constructor Details

#initialize(max_depth: 4, min_gain: 0.01, z_norm: 2.67, z_outlier: 8.0, pct_outliers: 0.01, min_size_numeric: 25, min_size_categ: 50, categ_split: "binarize", categ_outliers: "tail", numeric_split: "raw", follow_all: false, gain_as_pct: true, nthreads: -1) ⇒ Model

Returns a new instance of Model.



3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
# File 'lib/outliertree/model.rb', line 3

def initialize(
  max_depth: 4, min_gain: 0.01, z_norm: 2.67, z_outlier: 8.0, pct_outliers: 0.01,
  min_size_numeric: 25, min_size_categ: 50, categ_split: "binarize", categ_outliers: "tail",
  numeric_split: "raw", follow_all: false, gain_as_pct: true, nthreads: -1
)

  # TODO validate values
  @max_depth = max_depth
  @min_gain = min_gain
  @z_norm = z_norm
  @z_outlier = z_outlier
  @pct_outliers = pct_outliers
  @min_size_numeric = min_size_numeric
  @min_size_categ = min_size_categ
  @categ_split = categ_split
  @categ_outliers = categ_outliers
  @numeric_split = numeric_split
  @follow_all = follow_all
  @gain_as_pct = gain_as_pct

  # etc module returns virtual cores
  nthreads = Etc.nprocessors if nthreads < 0
  @nthreads = nthreads
end

Instance Method Details

#fit(df) ⇒ Object



28
29
30
31
32
33
# File 'lib/outliertree/model.rb', line 28

def fit(df)
  df = Dataset.new(df)
  prep_fit(df)
  options = data_options(df).merge(fit_options)
  @model_outputs = Ext.fit_outliers_models(options)
end

#outliers(df) ⇒ Object



35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/outliertree/model.rb', line 35

def outliers(df)
  raise "Not fit" unless @model_outputs

  df = Dataset.new(df)
  prep_predict(df)
  options = data_options(df).merge(nthreads: @nthreads)
  model_outputs = Ext.find_new_outliers(@model_outputs, options)

  Result.new(
    model_outputs: model_outputs,
    df: df,
    numeric_columns: @numeric_columns,
    categorical_columns: @categorical_columns,
    categories: @categories
  ).process
end