Class: FSelector::BaseEnsemble

Inherits:
Base
  • Object
show all
Defined in:
lib/fselector/ensemble.rb

Overview

Note:

ensemble feature selectors share the same feature selection interface as single feature selector

feature selection by an ensemble of feature selectors

for the type of feature weighting algorithms, call one of the following two functions first before calling select_feature_by_score! or
select_feature_by_rank! for feature selection:

  • ensemble_by_score() # ensemble scores are based on that of individual selector
  • ensemble_by_rank() # ensemble ranks are based on that of individual selector

for the type of feature subset selection algorithms, use select_feature! for feature selection (based on feature frequency count)

Direct Known Subclasses

EnsembleMultiple, EnsembleSingle

Instance Method Summary collapse

Methods inherited from Base

#each_class, #each_feature, #each_sample, #get_class_labels, #get_classes, #get_data, #get_data_copy, #get_feature_type, #get_feature_values, #get_features, #get_opt, #get_sample_size, #select_feature!, #select_feature_by_rank!, #select_feature_by_score!, #set_classes, #set_data, #set_feature_type, #set_features, #set_opt

Methods included from ReplaceMissingValues

#replace_by_fixed_value!, #replace_by_knn_value!, #replace_by_mean_value!, #replace_by_median_value!, #replace_by_most_seen_value!

Methods included from FileIO

#data_from_csv, #data_from_libsvm, #data_from_random, #data_from_url, #data_from_weka, #data_to_csv, #data_to_libsvm, #data_to_weka

Constructor Details

#initialize(data = nil) ⇒ BaseEnsemble

initialize from an existing data structure



24
25
26
# File 'lib/fselector/ensemble.rb', line 24

def initialize(data=nil)
  super(data)
end

Instance Method Details

#algo_type

override algo_type for BaseEnsemble

get the type of ensemble feature selectors at instance-level



32
33
34
# File 'lib/fselector/ensemble.rb', line 32

def algo_type
  @algo_type # instance-level variable
end

#ensemble_by_rank(ensem_method = :by_sum)

ensemble ranks are made from that of individual feature selector

Parameters:

  • ensem_method (Symbol) (defaults to: :by_sum)

    how the ensemble rank should be derived from those of individual feature selector
    allowed values are:

    • :by_min # use min rank
    • :by_max # use max rank
    • :by_ave # use ave rank
    • :by_sum # use sum rank


125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# File 'lib/fselector/ensemble.rb', line 125

def ensemble_by_rank(ensem_method=:by_sum)
  if not [:by_min, :by_max, :by_ave, :by_sum].include? ensem_method
    abort "[#{__FILE__}@#{__LINE__}]: \n"+
          "  only :by_min, :by_max and :by_ave are supported ensemble methods!"
  end
  
  # get ranks from individual feature selector in ensemble
  ensem_ranks = get_ensemble_ranks
  
  ranks = {}
  
  each_feature do |f|
    # feature rank from individual feature selector
    rank_arr = ensem_ranks.collect { |er| er[f] }
    # ensemble rank
    ranks[f] = self.send(ensem_method, rank_arr)
  end
  #pp ranks
  new_ranks = {}
  
  sorted_features = ranks.keys.sort do |x, y|
    ranks[x] <=> ranks[y]
  end
  sorted_features.each_with_index do |sf, si|
    new_ranks[sf] = si+1
  end
  
  @ranks = new_ranks
end

#ensemble_by_score(ensem_method = :by_max, norm_method = :by_zscore)

Note:

scores from different feature selectors are often incompatible with each other, so we need to normalize them first

ensemble scores are made from that of individual feature selector

Parameters:

  • ensem_method (Symbol) (defaults to: :by_max)

    how the ensemble score should be derived from those of individual feature selector
    allowed values are:

    • :by_min # use min score
    • :by_max # use max score
    • :by_ave # use ave score
    • :by_sum # use sum score
  • norm_method (Symbol) (defaults to: :by_zscore)

    score normalization method

    • :none # use score as is
    • :by_min_max # score scaled to [0, 1]
    • :by_zscore # score converted to zscore


82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
# File 'lib/fselector/ensemble.rb', line 82

def ensemble_by_score(ensem_method=:by_max, norm_method=:by_zscore)
  if not [:by_min, :by_max, :by_ave, :by_sum].include? ensem_method
    abort "[#{__FILE__}@#{__LINE__}]: \n"+
          "  only :by_min, :by_max and :by_ave are supported ensemble methods!"
  end
  
  if not [:none, :by_min_max, :by_zscore].include? norm_method
    abort "[#{__FILE__}@#{__LINE__}]: \n"+
          "  only :none, :by_min_max and :by_zscore are supported normalization methods!"
  end
  
  # get score from each feature selector in the ensemble
  ensem_scores = get_ensemble_scores
  
  # normalization (if needed)
  self.send(norm_method, ensem_scores) if not norm_method == :none
  
  scores = {}
  
  each_feature do |f|
    scores[f] = {}
    # feature score from individual feature selector
    score_arr = ensem_scores.collect { |es| es[f][:BEST] }
    # ensemble score
    scores[f][:BEST] = self.send(ensem_method, score_arr)
  end
  
  #pp scores
  @scores = scores
end

#get_feature_ranks

override get_feature_ranks() for BaseEnsemble



51
52
53
54
55
56
57
58
59
60
61
# File 'lib/fselector/ensemble.rb', line 51

def get_feature_ranks
  return @ranks if @ranks
  
  if @scores # calc ranks based on scores
    set_ranks_from_scores
    return @ranks
  else
    abort "[#{__FILE__}@#{__LINE__}]: \n"+
          "  please call one ensemble method first!"
  end
end

#get_feature_scores

override get_feature_scores() for BaseEnsemble



40
41
42
43
44
45
# File 'lib/fselector/ensemble.rb', line 40

def get_feature_scores
  return @scores if @scores
  
  abort "[#{__FILE__}@#{__LINE__}]: \n"+
        "  please call one ensemble method first!"
end