Top Level Namespace

Defined Under Namespace

Modules: Eluka, GridSearch

Constant Summary collapse

VERBOSE_MAX =

VERBOSE_ITER =

VERBOSE_GRID_TIME =

VERBOSE_TIME =

Instance Method Summary collapse

#arg_process ⇒ Object
#cal_feat_imp(label, sample) ⇒ Object

cal importance of features return fscore_dict and feat with desc order.
#feat_num_try(f_tuple) ⇒ Object
#feat_num_try_half(max_index) ⇒ Object

Decide sizes of selected feautures #####.
#initlog(name) ⇒ Object

Log related #####.
#random_shuffle(label, sample) ⇒ Object
#readdata(filename) ⇒ Object

svm data IO ######.
#select(sample, feat_v) ⇒ Object

select features and return new data.
#value_cmpf(x) ⇒ Object

compare function used in list.sort(): sort by element def value_cmpf(x,y): if x>y: return -1 if x<y: return 1 return 0.
#writedata(samples, labels, filename) ⇒ Object
#writelog(str, vlevel = VERBOSE_MAX) ⇒ Object

Instance Method Details

#arg_process ⇒ `Object`

# File 'lib/fselect.rb', line 32

def arg_process
  unless (ARGV.size == 2 or ARGV.size == 3)
    puts 'Usage: #{ARGV[0]} training_file [testing_file]'
    exit
  end
  
  @train_pathfile = ARGV[1]
  raise "training file not found" unless File.exist? @train_pathfile
  @train_file = File.basename(@train_pathfile)
  
  if ARGV.size == 3
    @test_pathfile = ARGV[1]
    raise "testing file not found" unless File.exist? @test_pathfile
    @test_file = File.basename(@test_pathfile)    
  end
end

#cal_feat_imp(label, sample) ⇒ `Object`

cal importance of features return fscore_dict and feat with desc order

# File 'lib/fselect.rb', line 99

def cal_feat_imp(label, sample)

	puts("calculating fsc...")

	score_dict = cal_Fscore(label, sample)

  #NOTE: Convert the following two lines carefully
	score_tuples = list(score_dict.items())
	score_tuples.sort(key = value_cmpf)

	feat_v = score_tuples
	for i in 0...feat_v.size
    feat_v[i] = score_tuples[i][0]
  end
  
	puts("fsc done")
	return score_dict,feat_v
end

#feat_num_try(f_tuple) ⇒ `Object`

# File 'lib/fselect.rb', line 61

def feat_num_try(f_tuple)
	for i in 0...f_tuple.size do
		if f_tuple[i][1] < 1e-20
			i = i - 1
      break
    end
  end
	#only take first eight numbers (>1%)
	return feat_num_try_half(i+1)[0...8]
end

#feat_num_try_half(max_index) ⇒ `Object`

Decide sizes of selected feautures #####

# File 'lib/fselect.rb', line 52

def feat_num_try_half(max_index)
	v=[]
	while max_index > 1 do
		v.push(max_index)
		max_index /= 2
  end
	return v
end

#initlog(name) ⇒ `Object`

Log related #####

# File 'lib/fselect.rb', line 235

def initlog(name)
  @logname = name
  logfile = File.open(@logname, "w").close
end

#random_shuffle(label, sample) ⇒ `Object`

# File 'lib/fselect.rb', line 72

def random_shuffle(label, sample)
  srand 1
	size = label.size
	for i in 0...label.size
		ri = rand(size)
		tmp = label[ri]
		label[ri] = label[size-i-1]
		label[size-i-1] = tmp
		tmp = sample[ri]
		sample[ri] = sample[size-i-1]
		sample[size-i-1] = tmp
  end
end

#readdata(filename) ⇒ `Object`

svm data IO ######

# File 'lib/fselect.rb', line 250

def readdata(filename)
  labels = Array.new
  samples = Array.new
  max_index = 0
  
  f = File.open(filename)
  
  f.each_line do |line| 
    line.chomp!
    next if line[0] == "#"
    
    elems = line.split(" ")
    sample = Hash.new
    label_read = false
    elements.each do |e|
      unless label_read
        labels.push e.to_f
        label_read = true
        next
      end
      
      feature, value = e.split(":")
      p0 = feature.chomp.to_i
      p1 = value.chomp.to_f
      sample[p0] = p1
      
      max_index = p0 if p0 > max_index
      
      samples.push(sample)
    end
  end
  
  f.close
  
  return labels, samples, max_index
end

#select(sample, feat_v) ⇒ `Object`

select features and return new data

# File 'lib/fselect.rb', line 121

def select(sample, feat_v)
	new_samp = []

	feat_v.sort()

	#for each sample
  sample.each do |key, s| #NOTE: Extremely doubtful conversion
    point = Hash.new
		#for each feature to select
    feat_v.each do |f|
			if s[f] 
        point[f]=s[f]
      end
    end
		new_samp.push(point)
  end
	return new_samp
end

#value_cmpf(x) ⇒ `Object`

compare function used in list.sort(): sort by element def value_cmpf(x,y): if x>y: return -1 if x<y: return 1 return 0



93
94
95

# File 'lib/fselect.rb', line 93

def value_cmpf(x)
	return (-x[1])
end

#writedata(samples, labels, filename) ⇒ `Object`

# File 'lib/fselect.rb', line 287

def writedata(samples, labels, filename)
  fp = $stdout
  if filename
		fp = File.open(filename, "w")
  end

	num = samples.size
  samples.each_index do |i|
    if labels
      fp.print label[i]
    else
      fp.print "0"
    end
    samples[i].keys.sort.each do |k|
      fp.print(" #{k}:#{samples[i][k]}")
    end
    fp.puts ""
  end
  fp.close  
end

#writelog(str, vlevel = VERBOSE_MAX) ⇒ `Object`

# File 'lib/fselect.rb', line 240

def writelog(str, vlevel = VERBOSE_MAX)
  if vlevel > VERBOSE_ITER
    logfile = File.open(@logname, "a")
    logfile.print(str)
    logfile.close
  end
end

Top Level Namespace

Defined Under Namespace

Constant Summary collapse

Instance Method Summary collapse

Instance Method Details

#arg_process ⇒ Object

#cal_feat_imp(label, sample) ⇒ Object

#feat_num_try(f_tuple) ⇒ Object

#feat_num_try_half(max_index) ⇒ Object

#initlog(name) ⇒ Object

#random_shuffle(label, sample) ⇒ Object

#readdata(filename) ⇒ Object

#select(sample, feat_v) ⇒ Object

#value_cmpf(x) ⇒ Object

#writedata(samples, labels, filename) ⇒ Object

#writelog(str, vlevel = VERBOSE_MAX) ⇒ Object