Class: FSelector::BaseEnsemble

Inherits:

Base

Object
Base
FSelector::BaseEnsemble

show all

Defined in:: lib/fselector/ensemble.rb

Overview

Note:

ensemble feature selectors share the same feature selection interface as single feature selector

feature selection by an ensemble of feature selectors

for the type of feature weighting algorithms, call one of the following two functions first before calling select_feature_by_score! or
select_feature_by_rank! for feature selection:

ensemble_by_score() # ensemble scores are based on that of individual selector
ensemble_by_rank() # ensemble ranks are based on that of individual selector

for the type of feature subset selection algorithms, use select_feature! for feature selection (based on feature frequency count)

Direct Known Subclasses

EnsembleMultiple, EnsembleSingle

Instance Method Summary collapse

#algo_type
override algo_type for BaseEnsemble.
#ensemble_by_rank(ensem_method = :by_sum)
ensemble ranks are made from that of individual feature selector.
#ensemble_by_score(ensem_method = :by_max, norm_method = :by_zscore)
ensemble scores are made from that of individual feature selector.
#get_feature_ranks
override get_feature_ranks() for BaseEnsemble.
#get_feature_scores
override get_feature_scores() for BaseEnsemble.
#initialize(data = nil) ⇒ BaseEnsemble constructor
initialize from an existing data structure.

Constructor Details

#initialize(data = nil) ⇒ `BaseEnsemble`

initialize from an existing data structure



24
25
26

# File 'lib/fselector/ensemble.rb', line 24

def initialize(data=nil)
  super(data)
end

Instance Method Details

#algo_type

override algo_type for BaseEnsemble

get the type of ensemble feature selectors at instance-level



32
33
34

# File 'lib/fselector/ensemble.rb', line 32

def algo_type
  @algo_type # instance-level variable
end

#ensemble_by_rank(ensem_method = :by_sum)

ensemble ranks are made from that of individual feature selector

Parameters:

ensem_method (Symbol) (defaults to: :by_sum) —
how the ensemble rank should be derived from those of individual feature selector
allowed values are:
- :by_min # use min rank
- :by_max # use max rank
- :by_ave # use ave rank
- :by_sum # use sum rank

# File 'lib/fselector/ensemble.rb', line 125

def ensemble_by_rank(ensem_method=:by_sum)
  if not [:by_min, :by_max, :by_ave, :by_sum].include? ensem_method
    abort "[#{__FILE__}@#{__LINE__}]: \n"+
          "  only :by_min, :by_max and :by_ave are supported ensemble methods!"
  end
  
  # get ranks from individual feature selector in ensemble
  ensem_ranks = get_ensemble_ranks
  
  ranks = {}
  
  each_feature do |f|
    # feature rank from individual feature selector
    rank_arr = ensem_ranks.collect { |er| er[f] }
    # ensemble rank
    ranks[f] = self.send(ensem_method, rank_arr)
  end
  #pp ranks
  new_ranks = {}
  
  sorted_features = ranks.keys.sort do |x, y|
    ranks[x] <=> ranks[y]
  end
  sorted_features.each_with_index do |sf, si|
    new_ranks[sf] = si+1
  end
  
  @ranks = new_ranks
end

#ensemble_by_score(ensem_method = :by_max, norm_method = :by_zscore)

Note:

scores from different feature selectors are often incompatible with each other, so we need to normalize them first

ensemble scores are made from that of individual feature selector

Parameters:

ensem_method (Symbol) (defaults to: :by_max) —
how the ensemble score should be derived from those of individual feature selector
allowed values are:
- :by_min # use min score
- :by_max # use max score
- :by_ave # use ave score
- :by_sum # use sum score
norm_method (Symbol) (defaults to: :by_zscore) —
score normalization method
- :none # use score as is
- :by_min_max # score scaled to [0, 1]
- :by_zscore # score converted to zscore

# File 'lib/fselector/ensemble.rb', line 82

def ensemble_by_score(ensem_method=:by_max, norm_method=:by_zscore)
  if not [:by_min, :by_max, :by_ave, :by_sum].include? ensem_method
    abort "[#{__FILE__}@#{__LINE__}]: \n"+
          "  only :by_min, :by_max and :by_ave are supported ensemble methods!"
  end
  
  if not [:none, :by_min_max, :by_zscore].include? norm_method
    abort "[#{__FILE__}@#{__LINE__}]: \n"+
          "  only :none, :by_min_max and :by_zscore are supported normalization methods!"
  end
  
  # get score from each feature selector in the ensemble
  ensem_scores = get_ensemble_scores
  
  # normalization (if needed)
  self.send(norm_method, ensem_scores) if not norm_method == :none
  
  scores = {}
  
  each_feature do |f|
    scores[f] = {}
    # feature score from individual feature selector
    score_arr = ensem_scores.collect { |es| es[f][:BEST] }
    # ensemble score
    scores[f][:BEST] = self.send(ensem_method, score_arr)
  end
  
  #pp scores
  @scores = scores
end

#get_feature_ranks

override get_feature_ranks() for BaseEnsemble

# File 'lib/fselector/ensemble.rb', line 51

def get_feature_ranks
  return @ranks if @ranks
  
  if @scores # calc ranks based on scores
    set_ranks_from_scores
    return @ranks
  else
    abort "[#{__FILE__}@#{__LINE__}]: \n"+
          "  please call one ensemble method first!"
  end
end

#get_feature_scores

override get_feature_scores() for BaseEnsemble

# File 'lib/fselector/ensemble.rb', line 40

def get_feature_scores
  return @scores if @scores
  
  abort "[#{__FILE__}@#{__LINE__}]: \n"+
        "  please call one ensemble method first!"
end