Top Level Namespace
Defined Under Namespace
Modules: Eluka, GridSearch
Constant Summary collapse
- VERBOSE_MAX =
100
- VERBOSE_ITER =
3
- VERBOSE_GRID_TIME =
2
- VERBOSE_TIME =
1
Instance Method Summary collapse
- #arg_process ⇒ Object
-
#cal_feat_imp(label, sample) ⇒ Object
cal importance of features return fscore_dict and feat with desc order.
- #feat_num_try(f_tuple) ⇒ Object
-
#feat_num_try_half(max_index) ⇒ Object
Decide sizes of selected feautures #####.
-
#initlog(name) ⇒ Object
Log related #####.
- #random_shuffle(label, sample) ⇒ Object
-
#readdata(filename) ⇒ Object
svm data IO ######.
-
#select(sample, feat_v) ⇒ Object
select features and return new data.
- #value_cmpf(x) ⇒ Object
- #writedata(samples, labels, filename) ⇒ Object
- #writelog(str, vlevel = VERBOSE_MAX) ⇒ Object
Instance Method Details
#arg_process ⇒ Object
32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/fselect.rb', line 32 def arg_process unless (ARGV.size == 2 or ARGV.size == 3) puts 'Usage: #{ARGV[0]} training_file [testing_file]' exit end @train_pathfile = ARGV[1] raise "training file not found" unless File.exist? @train_pathfile @train_file = File.basename(@train_pathfile) if ARGV.size == 3 @test_pathfile = ARGV[1] raise "testing file not found" unless File.exist? @test_pathfile @test_file = File.basename(@test_pathfile) end end |
#cal_feat_imp(label, sample) ⇒ Object
cal importance of features return fscore_dict and feat with desc order
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 |
# File 'lib/fselect.rb', line 99 def cal_feat_imp(label, sample) puts("calculating fsc...") score_dict = cal_Fscore(label, sample) #NOTE: Convert the following two lines carefully score_tuples = list(score_dict.items()) score_tuples.sort(key = value_cmpf) feat_v = score_tuples for i in 0...feat_v.size feat_v[i] = score_tuples[i][0] end puts("fsc done") return score_dict,feat_v end |
#feat_num_try(f_tuple) ⇒ Object
61 62 63 64 65 66 67 68 69 70 |
# File 'lib/fselect.rb', line 61 def feat_num_try(f_tuple) for i in 0...f_tuple.size do if f_tuple[i][1] < 1e-20 i = i - 1 break end end #only take first eight numbers (>1%) return feat_num_try_half(i+1)[0...8] end |
#feat_num_try_half(max_index) ⇒ Object
Decide sizes of selected feautures #####
52 53 54 55 56 57 58 59 |
# File 'lib/fselect.rb', line 52 def feat_num_try_half(max_index) v=[] while max_index > 1 do v.push(max_index) max_index /= 2 end return v end |
#initlog(name) ⇒ Object
Log related #####
235 236 237 238 |
# File 'lib/fselect.rb', line 235 def initlog(name) @logname = name logfile = File.open(@logname, "w").close end |
#random_shuffle(label, sample) ⇒ Object
72 73 74 75 76 77 78 79 80 81 82 83 84 |
# File 'lib/fselect.rb', line 72 def random_shuffle(label, sample) srand 1 size = label.size for i in 0...label.size ri = rand(size) tmp = label[ri] label[ri] = label[size-i-1] label[size-i-1] = tmp tmp = sample[ri] sample[ri] = sample[size-i-1] sample[size-i-1] = tmp end end |
#readdata(filename) ⇒ Object
svm data IO ######
250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 |
# File 'lib/fselect.rb', line 250 def readdata(filename) labels = Array.new samples = Array.new max_index = 0 f = File.open(filename) f.each_line do |line| line.chomp! next if line[0] == "#" elems = line.split(" ") sample = Hash.new label_read = false elements.each do |e| unless label_read labels.push e.to_f label_read = true next end feature, value = e.split(":") p0 = feature.chomp.to_i p1 = value.chomp.to_f sample[p0] = p1 max_index = p0 if p0 > max_index samples.push(sample) end end f.close return labels, samples, max_index end |
#select(sample, feat_v) ⇒ Object
select features and return new data
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
# File 'lib/fselect.rb', line 121 def select(sample, feat_v) new_samp = [] feat_v.sort() #for each sample sample.each do |key, s| #NOTE: Extremely doubtful conversion point = Hash.new #for each feature to select feat_v.each do |f| if s[f] point[f]=s[f] end end new_samp.push(point) end return new_samp end |
#value_cmpf(x) ⇒ Object
93 94 95 |
# File 'lib/fselect.rb', line 93 def value_cmpf(x) return (-x[1]) end |
#writedata(samples, labels, filename) ⇒ Object
287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 |
# File 'lib/fselect.rb', line 287 def writedata(samples, labels, filename) fp = $stdout if filename fp = File.open(filename, "w") end num = samples.size samples.each_index do |i| if labels fp.print label[i] else fp.print "0" end samples[i].keys.sort.each do |k| fp.print(" #{k}:#{samples[i][k]}") end fp.puts "" end fp.close end |
#writelog(str, vlevel = VERBOSE_MAX) ⇒ Object
240 241 242 243 244 245 246 |
# File 'lib/fselect.rb', line 240 def writelog(str, vlevel = VERBOSE_MAX) if vlevel > VERBOSE_ITER logfile = File.open(@logname, "a") logfile.print(str) logfile.close end end |