Class: FSelector::Base

Inherits:
Object
  • Object
show all
Includes:
FileIO, ReplaceMissingValues
Defined in:
lib/fselector/algo_base/base.rb

Overview

base class for a single feature selection algorithm

Class Attribute Summary collapse

Instance Method Summary collapse

Methods included from ReplaceMissingValues

#replace_by_fixed_value!, #replace_by_knn_value!, #replace_by_mean_value!, #replace_by_median_value!, #replace_by_most_seen_value!

Methods included from FileIO

#data_from_csv, #data_from_libsvm, #data_from_random, #data_from_url, #data_from_weka, #data_to_csv, #data_to_libsvm, #data_to_weka

Constructor Details

#initialize(data = nil) ⇒ Base

initialize from an existing data structure



30
31
32
# File 'lib/fselector/algo_base/base.rb', line 30

def initialize(data=nil)
  @data = data # store data
end

Class Attribute Details

.algo_type

Note:

derived class (except for Base*** class) must set its own type with one of the following two:

  • :feature_weighting # when algo outputs weight for each feature
  • :feature_subset_selection # when algo outputs a subset of features

class-level instance variable, type of feature selection algorithm.



20
21
22
# File 'lib/fselector/algo_base/base.rb', line 20

def algo_type
  @algo_type
end

Instance Method Details

#algo_type

get the type of feature selection algorithm at class-level



24
25
26
# File 'lib/fselector/algo_base/base.rb', line 24

def algo_type
  self.class.algo_type
end

#each_class

iterator for each class, a block must be given. e.g.

each_class do |k|
  puts k
end


42
43
44
45
46
47
48
49
# File 'lib/fselector/algo_base/base.rb', line 42

def each_class
  if not block_given?
    abort "[#{__FILE__}@#{__LINE__}]: \n"+
          "  block must be given!"
  else
    get_classes.each { |k| yield k }
  end
end

#each_feature

iterator for each feature, a block must be given. e.g.

each_feature do |f|
  puts f
end


59
60
61
62
63
64
65
66
# File 'lib/fselector/algo_base/base.rb', line 59

def each_feature
  if not block_given?
    abort "[#{__FILE__}@#{__LINE__}]: \n"+
          "  block must be given!"
  else
    get_features.each { |f| yield f }
  end
end

#each_sample

iterator for each sample with class label, a block must be given. e.g.

each_sample do |k, s|
  print k
  s.each { |f, v| print " #{v}" }
  puts
end


79
80
81
82
83
84
85
86
87
88
# File 'lib/fselector/algo_base/base.rb', line 79

def each_sample
  if not block_given?
    abort "[#{__FILE__}@#{__LINE__}]: \n"+
          " block must be given!"
  else      
    get_data.each do |k, samples|
      samples.each { |s| yield k, s }
    end
  end
end

#get_class_labelsArray<Symbol>

get class labels for all samples

Returns:

  • (Array<Symbol>)

    class labels for all classes, same size as the number of samples



107
108
109
110
111
112
113
114
115
116
117
# File 'lib/fselector/algo_base/base.rb', line 107

def get_class_labels
  if not @cv
    @cv = []
    
    each_sample do |k, s|
      @cv << k
    end
  end
  
  @cv
end

#get_classesArray<Symbol>

get (unique) classes labels

Returns:

  • (Array<Symbol>)

    unique class labels



96
97
98
# File 'lib/fselector/algo_base/base.rb', line 96

def get_classes
  @classes ||= @data.keys
end

#get_dataHash

get internal data

Returns:

  • (Hash)

    internal data



228
229
230
# File 'lib/fselector/algo_base/base.rb', line 228

def get_data
  @data
end

#get_data_copyHash

get a copy of internal data, by means of the standard Marshal library

Returns:

  • (Hash)

    a copy of internal data



238
239
240
# File 'lib/fselector/algo_base/base.rb', line 238

def get_data_copy
  Marshal.load(Marshal.dump(@data)) if @data
end

#get_feature_ranksHash

get the ranked features based on their best scores

Returns:

  • (Hash)

    feature ranks



328
329
330
331
332
333
334
335
# File 'lib/fselector/algo_base/base.rb', line 328

def get_feature_ranks
  return @ranks if @ranks # already done
  
  # make feature ranks from feature scores
  set_ranks_from_scores
  
  @ranks
end

#get_feature_scoresHash

get scores of all features for all classes

Returns:

  • (Hash)

    { feature => { class_1 => score_1, class_2 => score_2, :BEST => score_best } }



305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
# File 'lib/fselector/algo_base/base.rb', line 305

def get_feature_scores
  return @scores if @scores # already done
  
  each_feature do |f|
    calc_contribution(f)
  end
  
  # best score for feature
  @scores.each do |f, ks|
    # the larger, the better
    @scores[f][:BEST] = ks.values.max
  end
  #@scores.each { |x,v| puts "#{x} => #{v[:BEST]}" }
  
  @scores
end

#get_feature_type(feature = nil)

get the feature type stored in @types

Parameters:

  • feature (Symbol) (defaults to: nil)

    feature of interest
    return all feature name-type pairs if nil, otherwise reture the type for the feature of interest



202
203
204
205
206
207
208
# File 'lib/fselector/algo_base/base.rb', line 202

def get_feature_type(feature=nil)
  if @types
    feature ? @types[feature] : @types
  else
    nil
  end
end

#get_feature_values(f, mv = nil, ck = nil) ⇒ Hash

get feature values

Parameters:

  • f (Symbol)

    feature of interest

  • mv (Symbol) (defaults to: nil)

    including missing feature values? don't include missing feature values (recorded as nils) if nil, include otherwise

  • ck (Symbol) (defaults to: nil)

    class of interest. return feature values for all classes, otherwise return feature values for the specific class (ck)

Returns:

  • (Hash)

    feature values



156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
# File 'lib/fselector/algo_base/base.rb', line 156

def get_feature_values(f, mv=nil, ck=nil)
  @fvs ||= {}
  
  if not @fvs.has_key? f
    @fvs[f] = {}
    
    each_sample do |k, s|
      @fvs[f][k] = [] if not @fvs[f].has_key? k
      if s.has_key? f
        @fvs[f][k] << s[f]
      else
        @fvs[f][k] << nil # for missing featue values
      end
    end
  end
  
  if mv # include missing feature values
    return ck ? @fvs[f][ck] : @fvs[f].values.flatten
  else # don't include
    return ck ? @fvs[f][ck].compact : @fvs[f].values.flatten.compact
  end  
end

#get_featuresArray<Symbol>

get (unique) features

Returns:

  • (Array<Symbol>)

    unique features



139
140
141
# File 'lib/fselector/algo_base/base.rb', line 139

def get_features
  @features ||= @data.collect { |x| x[1].collect { |y| y.keys } }.flatten.uniq
end

#get_opt(key = nil) ⇒ Any

Note:

return all non-data as a Hash if key == nil

get non-data information for a given key

Parameters:

  • key (Symbol) (defaults to: nil)

    key of non-data

Returns:

  • (Any)

    value of non-data, can be any type



271
272
273
274
275
276
277
# File 'lib/fselector/algo_base/base.rb', line 271

def get_opt(key=nil)
  if @opts
    key ? @opts[key] : @opts
  else
    nil
  end
end

#get_sample_sizeInteger

number of samples

Returns:

  • (Integer)

    sample size



294
295
296
# File 'lib/fselector/algo_base/base.rb', line 294

def get_sample_size
  @sz ||= get_classes.inject(0) { |sz, k| sz+get_data[k].size }
end

#select_feature!

Note:

data structure will be altered. Derived class must implement its own get_feature_subset(). This is only available for the subset selection type of algorithms, see README

reconstruct data with selected features



345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
# File 'lib/fselector/algo_base/base.rb', line 345

def select_feature!
  if not self.algo_type == :filter_by_feature_searching
    abort "[#{__FILE__}@#{__LINE__}]: \n"+
          "  select_feature! is the interface for the type of feature subset selection algorithms only. \n" +
          "  please consider select_featue_by_score! or select_feature_by_rank!, \n" +
          "  which is the interface for the type of feature weighting algorithms"
  end
  
  # derived class must implement its own one
  subset = get_feature_subset
  return if subset.empty?
  
  my_data = {}
  
  each_sample do |k, s|
    my_data[k] ||= []
    my_s = s.select { |f, v| subset.include? f }        
    my_data[k] << my_s if not my_s.empty?
  end
  
  set_data(my_data)
end

#select_feature_by_rank!(criterion, my_ranks = nil)

Note:

data structure will be altered. This is only available for the weighting type of algorithms, see README

reconstruct data by feature rank satisfying criterion

Parameters:

  • criterion (String)

    valid criterion can be '>11', '>=10', '==1', '<=10' or '<20'

  • my_ranks (Hash) (defaults to: nil)

    user customized feature ranks



413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
# File 'lib/fselector/algo_base/base.rb', line 413

def select_feature_by_rank!(criterion, my_ranks=nil)
  if not self.algo_type == :filter_by_feature_weighting
    abort "[#{__FILE__}@#{__LINE__}]: \n"+
          "  select_feature_by_rank! is the interface for the type of feature weighting algorithms only. \n" +
          "  please consider select_featue!, \n" +
          "  which is the interface for the type of feature subset selection algorithms"
  end
  
  # user ranks or internal ranks
  ranks = my_ranks || get_feature_ranks
  return if ranks.empty?
  
  my_data = {}
  
  each_sample do |k, s|
    my_data[k] ||= []
    my_s = s.select { |f, v| eval("#{ranks[f]} #{criterion}") }        
    my_data[k] << my_s if not my_s.empty?
  end
  
  set_data(my_data)
end

#select_feature_by_score!(criterion, my_scores = nil)

Note:

data structure will be altered. This is only available for the weighting type of algorithms, see README

reconstruct data by feature score satisfying criterion

Parameters:

  • criterion (String)

    valid criterion can be '>0.5', '>=0.4', '==2.0', '<=1.0' or '<0.2'

  • my_scores (Hash) (defaults to: nil)

    user customized feature scores



379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
# File 'lib/fselector/algo_base/base.rb', line 379

def select_feature_by_score!(criterion, my_scores=nil)
  if not self.algo_type == :filter_by_feature_weighting
    abort "[#{__FILE__}@#{__LINE__}]: \n"+
          "  select_feature_by_score! is the interface for the type of feature weighting algorithms only. \n" +
          "  please consider select_featue!, \n" +
          "  which is the interface for the type of feature subset selection algorithms"
  end
  
  # user scores or internal scores
  scores = my_scores || get_feature_scores
  return if scores.empty?
  
  my_data = {}
  
  each_sample do |k, s|
    my_data[k] ||= []
    my_s = s.select { |f, v| eval("#{scores[f][:BEST]} #{criterion}") }       
    my_data[k] << my_s if not my_s.empty?
  end
      
  set_data(my_data)
end

#set_classes(classes)

set classes

Parameters:

  • classes (Array<Symbol>)

    source unique class labels



125
126
127
128
129
130
131
132
# File 'lib/fselector/algo_base/base.rb', line 125

def set_classes(classes)
  if classes and classes.class == Array
    @classes = classes
  else
    abort "[#{__FILE__}@#{__LINE__}]: \n"+
          "  classes must be a Array object!"
  end
end

#set_data(data) ⇒ nil

set data and clean relevant variables in case of data change

Parameters:

  • data (Hash)

    source data structure

Returns:

  • (nil)

    to suppress console echo of data in irb



249
250
251
252
253
254
255
256
257
258
259
260
# File 'lib/fselector/algo_base/base.rb', line 249

def set_data(data)
  if data and data.class == Hash
    # clear variables
    clear_vars if @data
    @data = data # set new data structure
  else
    abort "[#{__FILE__}@#{__LINE__}]: \n"+
          "  data must be a Hash object!"
  end
  
  nil # suppress console echo of data in irb
end

#set_feature_type(feature, type)

set feature name-type pair

Parameters:

  • feature (Symbol)

    feature name

  • type (Symbol)

    feature type



217
218
219
220
# File 'lib/fselector/algo_base/base.rb', line 217

def set_feature_type(feature, type)
  @types ||= {}
  @types[feature] = type
end

#set_features(features)

set features

Parameters:

  • features (Array<Symbol>)

    source unique features



185
186
187
188
189
190
191
192
# File 'lib/fselector/algo_base/base.rb', line 185

def set_features(features)
  if features and features.class == Array
    @features = features
  else
    abort "[#{__FILE__}@#{__LINE__}]: \n"+
          "  features must be a Array object!"
  end
end

#set_opt(key, value)

set non-data information as a key-value pair

Parameters:

  • key (Symbol)

    key of non-data

  • value (Any)

    value of non-data, can be any type



283
284
285
286
# File 'lib/fselector/algo_base/base.rb', line 283

def set_opt(key, value)
  @opts ||= {} # store non-data information
  @opts[key] = value
end