Class: FSelector::Base
- Inherits:
-
Object
- Object
- FSelector::Base
- Includes:
- FileIO, ReplaceMissingValues
- Defined in:
- lib/fselector/algo_base/base.rb
Overview
base class for a single feature selection algorithm
Direct Known Subclasses
BaseCFS, BaseContinuous, BaseDiscrete, BaseEnsemble, BaseRelief, BaseReliefF, LasVegasFilter, LasVegasIncremental, Random, RandomSubset
Class Attribute Summary collapse
-
.algo_type
class-level instance variable, type of feature selection algorithm.
Instance Method Summary collapse
-
#algo_type
get the type of feature selection algorithm at class-level.
-
#each_class
iterator for each class, a block must be given.
-
#each_feature
iterator for each feature, a block must be given.
-
#each_sample
iterator for each sample with class label, a block must be given.
-
#get_class_labels ⇒ Array<Symbol>
get class labels for all samples.
-
#get_classes ⇒ Array<Symbol>
get (unique) classes labels.
-
#get_data ⇒ Hash
get internal data.
-
#get_data_copy ⇒ Hash
get a copy of internal data, by means of the standard Marshal library.
-
#get_feature_ranks ⇒ Hash
get the ranked features based on their best scores.
-
#get_feature_scores ⇒ Hash
get scores of all features for all classes.
-
#get_feature_type(feature = nil)
get the feature type stored in @types.
-
#get_feature_values(f, mv = nil, ck = nil) ⇒ Hash
get feature values.
-
#get_features ⇒ Array<Symbol>
get (unique) features.
-
#get_opt(key = nil) ⇒ Any
get non-data information for a given key.
-
#get_sample_size ⇒ Integer
number of samples.
-
#initialize(data = nil) ⇒ Base
constructor
initialize from an existing data structure.
-
#select_feature!
reconstruct data with selected features.
-
#select_feature_by_rank!(criterion, my_ranks = nil)
reconstruct data by feature rank satisfying criterion.
-
#select_feature_by_score!(criterion, my_scores = nil)
reconstruct data by feature score satisfying criterion.
-
#set_classes(classes)
set classes.
-
#set_data(data) ⇒ nil
set data and clean relevant variables in case of data change.
-
#set_feature_type(feature, type)
set feature name-type pair.
-
#set_features(features)
set features.
-
#set_opt(key, value)
set non-data information as a key-value pair.
Methods included from ReplaceMissingValues
#replace_by_fixed_value!, #replace_by_knn_value!, #replace_by_mean_value!, #replace_by_median_value!, #replace_by_most_seen_value!
Methods included from FileIO
#data_from_csv, #data_from_libsvm, #data_from_random, #data_from_url, #data_from_weka, #data_to_csv, #data_to_libsvm, #data_to_weka
Constructor Details
#initialize(data = nil) ⇒ Base
initialize from an existing data structure
30 31 32 |
# File 'lib/fselector/algo_base/base.rb', line 30 def initialize(data=nil) @data = data # store data end |
Class Attribute Details
.algo_type
derived class (except for Base*** class) must set its own type with one of the following two:
- :feature_weighting # when algo outputs weight for each feature
- :feature_subset_selection # when algo outputs a subset of features
class-level instance variable, type of feature selection algorithm.
20 21 22 |
# File 'lib/fselector/algo_base/base.rb', line 20 def algo_type @algo_type end |
Instance Method Details
#algo_type
get the type of feature selection algorithm at class-level
24 25 26 |
# File 'lib/fselector/algo_base/base.rb', line 24 def algo_type self.class.algo_type end |
#each_class
iterator for each class, a block must be given. e.g.
each_class do |k|
puts k
end
42 43 44 45 46 47 48 49 |
# File 'lib/fselector/algo_base/base.rb', line 42 def each_class if not block_given? abort "[#{__FILE__}@#{__LINE__}]: \n"+ " block must be given!" else get_classes.each { |k| yield k } end end |
#each_feature
iterator for each feature, a block must be given. e.g.
each_feature do |f|
puts f
end
59 60 61 62 63 64 65 66 |
# File 'lib/fselector/algo_base/base.rb', line 59 def each_feature if not block_given? abort "[#{__FILE__}@#{__LINE__}]: \n"+ " block must be given!" else get_features.each { |f| yield f } end end |
#each_sample
iterator for each sample with class label, a block must be given. e.g.
each_sample do |k, s|
print k
s.each { |f, v| print " #{v}" }
puts
end
79 80 81 82 83 84 85 86 87 88 |
# File 'lib/fselector/algo_base/base.rb', line 79 def each_sample if not block_given? abort "[#{__FILE__}@#{__LINE__}]: \n"+ " block must be given!" else get_data.each do |k, samples| samples.each { |s| yield k, s } end end end |
#get_class_labels ⇒ Array<Symbol>
get class labels for all samples
107 108 109 110 111 112 113 114 115 116 117 |
# File 'lib/fselector/algo_base/base.rb', line 107 def get_class_labels if not @cv @cv = [] each_sample do |k, s| @cv << k end end @cv end |
#get_classes ⇒ Array<Symbol>
get (unique) classes labels
96 97 98 |
# File 'lib/fselector/algo_base/base.rb', line 96 def get_classes @classes ||= @data.keys end |
#get_data ⇒ Hash
get internal data
228 229 230 |
# File 'lib/fselector/algo_base/base.rb', line 228 def get_data @data end |
#get_data_copy ⇒ Hash
get a copy of internal data, by means of the standard Marshal library
238 239 240 |
# File 'lib/fselector/algo_base/base.rb', line 238 def get_data_copy Marshal.load(Marshal.dump(@data)) if @data end |
#get_feature_ranks ⇒ Hash
get the ranked features based on their best scores
328 329 330 331 332 333 334 335 |
# File 'lib/fselector/algo_base/base.rb', line 328 def get_feature_ranks return @ranks if @ranks # already done # make feature ranks from feature scores set_ranks_from_scores @ranks end |
#get_feature_scores ⇒ Hash
get scores of all features for all classes
305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 |
# File 'lib/fselector/algo_base/base.rb', line 305 def get_feature_scores return @scores if @scores # already done each_feature do |f| calc_contribution(f) end # best score for feature @scores.each do |f, ks| # the larger, the better @scores[f][:BEST] = ks.values.max end #@scores.each { |x,v| puts "#{x} => #{v[:BEST]}" } @scores end |
#get_feature_type(feature = nil)
get the feature type stored in @types
202 203 204 205 206 207 208 |
# File 'lib/fselector/algo_base/base.rb', line 202 def get_feature_type(feature=nil) if @types feature ? @types[feature] : @types else nil end end |
#get_feature_values(f, mv = nil, ck = nil) ⇒ Hash
get feature values
156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 |
# File 'lib/fselector/algo_base/base.rb', line 156 def get_feature_values(f, mv=nil, ck=nil) @fvs ||= {} if not @fvs.has_key? f @fvs[f] = {} each_sample do |k, s| @fvs[f][k] = [] if not @fvs[f].has_key? k if s.has_key? f @fvs[f][k] << s[f] else @fvs[f][k] << nil # for missing featue values end end end if mv # include missing feature values return ck ? @fvs[f][ck] : @fvs[f].values.flatten else # don't include return ck ? @fvs[f][ck].compact : @fvs[f].values.flatten.compact end end |
#get_features ⇒ Array<Symbol>
get (unique) features
139 140 141 |
# File 'lib/fselector/algo_base/base.rb', line 139 def get_features @features ||= @data.collect { |x| x[1].collect { |y| y.keys } }.flatten.uniq end |
#get_opt(key = nil) ⇒ Any
return all non-data as a Hash if key == nil
get non-data information for a given key
271 272 273 274 275 276 277 |
# File 'lib/fselector/algo_base/base.rb', line 271 def get_opt(key=nil) if @opts key ? @opts[key] : @opts else nil end end |
#get_sample_size ⇒ Integer
number of samples
294 295 296 |
# File 'lib/fselector/algo_base/base.rb', line 294 def get_sample_size @sz ||= get_classes.inject(0) { |sz, k| sz+get_data[k].size } end |
#select_feature!
data structure will be altered. Derived class must implement its own get_feature_subset(). This is only available for the subset selection type of algorithms, see README
reconstruct data with selected features
345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 |
# File 'lib/fselector/algo_base/base.rb', line 345 def select_feature! if not self.algo_type == :filter_by_feature_searching abort "[#{__FILE__}@#{__LINE__}]: \n"+ " select_feature! is the interface for the type of feature subset selection algorithms only. \n" + " please consider select_featue_by_score! or select_feature_by_rank!, \n" + " which is the interface for the type of feature weighting algorithms" end # derived class must implement its own one subset = get_feature_subset return if subset.empty? my_data = {} each_sample do |k, s| my_data[k] ||= [] my_s = s.select { |f, v| subset.include? f } my_data[k] << my_s if not my_s.empty? end set_data(my_data) end |
#select_feature_by_rank!(criterion, my_ranks = nil)
data structure will be altered. This is only available for the weighting type of algorithms, see README
reconstruct data by feature rank satisfying criterion
413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 |
# File 'lib/fselector/algo_base/base.rb', line 413 def select_feature_by_rank!(criterion, my_ranks=nil) if not self.algo_type == :filter_by_feature_weighting abort "[#{__FILE__}@#{__LINE__}]: \n"+ " select_feature_by_rank! is the interface for the type of feature weighting algorithms only. \n" + " please consider select_featue!, \n" + " which is the interface for the type of feature subset selection algorithms" end # user ranks or internal ranks ranks = my_ranks || get_feature_ranks return if ranks.empty? my_data = {} each_sample do |k, s| my_data[k] ||= [] my_s = s.select { |f, v| eval("#{ranks[f]} #{criterion}") } my_data[k] << my_s if not my_s.empty? end set_data(my_data) end |
#select_feature_by_score!(criterion, my_scores = nil)
data structure will be altered. This is only available for the weighting type of algorithms, see README
reconstruct data by feature score satisfying criterion
379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 |
# File 'lib/fselector/algo_base/base.rb', line 379 def select_feature_by_score!(criterion, my_scores=nil) if not self.algo_type == :filter_by_feature_weighting abort "[#{__FILE__}@#{__LINE__}]: \n"+ " select_feature_by_score! is the interface for the type of feature weighting algorithms only. \n" + " please consider select_featue!, \n" + " which is the interface for the type of feature subset selection algorithms" end # user scores or internal scores scores = my_scores || get_feature_scores return if scores.empty? my_data = {} each_sample do |k, s| my_data[k] ||= [] my_s = s.select { |f, v| eval("#{scores[f][:BEST]} #{criterion}") } my_data[k] << my_s if not my_s.empty? end set_data(my_data) end |
#set_classes(classes)
set classes
125 126 127 128 129 130 131 132 |
# File 'lib/fselector/algo_base/base.rb', line 125 def set_classes(classes) if classes and classes.class == Array @classes = classes else abort "[#{__FILE__}@#{__LINE__}]: \n"+ " classes must be a Array object!" end end |
#set_data(data) ⇒ nil
set data and clean relevant variables in case of data change
249 250 251 252 253 254 255 256 257 258 259 260 |
# File 'lib/fselector/algo_base/base.rb', line 249 def set_data(data) if data and data.class == Hash # clear variables clear_vars if @data @data = data # set new data structure else abort "[#{__FILE__}@#{__LINE__}]: \n"+ " data must be a Hash object!" end nil # suppress console echo of data in irb end |
#set_feature_type(feature, type)
set feature name-type pair
217 218 219 220 |
# File 'lib/fselector/algo_base/base.rb', line 217 def set_feature_type(feature, type) @types ||= {} @types[feature] = type end |
#set_features(features)
set features
185 186 187 188 189 190 191 192 |
# File 'lib/fselector/algo_base/base.rb', line 185 def set_features(features) if features and features.class == Array @features = features else abort "[#{__FILE__}@#{__LINE__}]: \n"+ " features must be a Array object!" end end |
#set_opt(key, value)
set non-data information as a key-value pair
283 284 285 286 |
# File 'lib/fselector/algo_base/base.rb', line 283 def set_opt(key, value) @opts ||= {} # store non-data information @opts[key] = value end |