Class: SVMLab

Inherits:
Object
  • Object
show all
Defined in:
lib/svmlab.rb,
lib/svmlab-plot.rb,
lib/svmlab-optim.rb

Overview

An SVMLab object is created giving the configuration either as a file object or as a string. The configuration is in YAML format:


Feature:

<See SVMFeature class documentation>

SVM:

C: <parameter C>
g: <RBF kernel's gamma>
e: <epsilon for regression>
Scale:
  <Feature1>: 
  - <Scale1>
  - <Scale2>
  - ...
  - <ScaleN>
  <Feature2>: <Scale>

The Scale setup has to match the features given in Feature configuration and each scale can be given as scalar or as array.

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(cfg) ⇒ SVMLab

All examples are centered and scaled and the centered/scaled examples are stored in the object variable @examples. Information about the centering/scaling is stored in the @cfg part of the configuration hash There are three ways to initialize.

1) With an SVMLabConfig object
2) With a configuration file File object
3) With a string giving the configuration


46
47
48
49
50
51
52
53
54
55
56
57
58
# File 'lib/svmlab.rb', line 46

def initialize(cfg)
  @cfg = if cfg.is_a? SVMLabConfig then cfg
         else SVMLabConfig.new(cfg) end
  @features = SVMFeature.new(@cfg['Feature'].to_yaml)
  @examples = @features.getAllFeatures
  @ndimensions = nil

  checkScales(@cfg)
  checkOptimization(@cfg)
  scaleExamples
  @groups = setGroups()
  #puts 'Groups:', @groups.map{ |group,members| {group => members.map{|name,feat| name}.size} }.to_yaml
end

Instance Attribute Details

#cfgObject (readonly)

Returns the value of attribute cfg.



37
38
39
# File 'lib/svmlab.rb', line 37

def cfg
  @cfg
end

#featuresObject (readonly)

Returns the value of attribute features.



37
38
39
# File 'lib/svmlab.rb', line 37

def features
  @features
end

#pslogObject (readonly)

Returns the value of attribute pslog.



37
38
39
# File 'lib/svmlab.rb', line 37

def pslog
  @pslog
end

Instance Method Details

#C=(arg) ⇒ Object

Set the penalty factor C.



91
92
93
# File 'lib/svmlab.rb', line 91

def C=(arg)
  @cfg['SVM']['C'] = arg.to_f
end

#crossvalidateObject

crossvalidation on a grouping made from “Groups” in cfg Return values:

  • Predictions hash :

    key : example name
    value : 'truth' => the true value
             'pred' => the predicted value
    

– Remaining issues:

2) No of parallel computations should be in cfg

++



232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
# File 'lib/svmlab.rb', line 232

def crossvalidate()
  #parr = @groups.keys.forkoff do |group|
  parr = @groups.keys.map do |group|
    members = @groups[group]
    trainingex = @groups.inject([]){ |exarr,(trgroup,trmem)|
      (trgroup == group) ? exarr : exarr + trmem }
    model = self.train(trainingex)
    # Predict each member of the group left out of training
    pred = members.inject({}) do |p,predname|
      p[predname] = {
        'truth' => if @cfg['Feature']['PosClassFrom'] 
                     @examples[predname][0].round
                   else @examples[predname][0] / 
                         @cfg['SVM']['Scale'][@cfg['Feature']['Features'][0]][0] + 
                         @cfg['SVM']['Center'][@cfg['Feature']['Features'][0]][0]
                   end,
        'pred' => self.predict(predname,model) } if @examples[predname]
      p
    end
  end
  predictions = parr.inject SVMPrediction.new do |p,predhash|
    predhash.each { |exname,phash| p[exname] = phash } ; p
  end
end

#dist(a, b) ⇒ Object



129
130
131
132
# File 'lib/svmlab.rb', line 129

def dist(a,b)
  raise "Cannot calculate distance" if a.size != b.size
  Math.sqrt(a.zip(b).inject(0){|d,(ai,bi)| d+(ai-bi).abs**2})
end

#e=(arg) ⇒ Object

Set epsilon for Support Vector Regression.



96
97
98
# File 'lib/svmlab.rb', line 96

def e=(arg)
  @cfg['SVM']['e'] = arg.to_f
end

#featurecorrelationplot(feature, file = '', title = 'Feature correlation') ⇒ Object

— featurecorrelationplot — Plots target feature on the Y axis vs. selected feature on the X axis



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/svmlab-plot.rb', line 88

def featurecorrelationplot( feature, file = '', title = 'Feature correlation')
  x,y = @examples.inject([[],[]]) do |data,(example,val)|
    raise "#{feature} outside feature range"if not (0...val.size) === feature
    data[0].push(val[feature] / @scale[feature] + @center[feature])
    data[1].push(val[0] / @scale[0] + @center[0]) 
    data
  end
  plotdata = [ Gnuplot::DataSet.new( [x,y] ) { |ds|
                 ds.using = '1:2'
                 ds.with = "points"
                 ds.title = "Feature #{feature} vs target feature"
                 ds.linewidth = 1
                 ds.matrix = nil } ]
  genericplot(plotdata, file, title, "Feature #{feature}", "Target feature")
end

#g=(arg) ⇒ Object

Set gamma for the RBF kernel.



101
102
103
# File 'lib/svmlab.rb', line 101

def g=(arg)
  @cfg['SVM']['g'] = arg.to_f
end

#getNeighbors(example, n = 1) ⇒ Object

Returns the n closest neighbors of the @example hash to example Possibly broken - check @feature hash if an erranous example



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
# File 'lib/svmlab.rb', line 107

def getNeighbors(example, n = 1)
  arr = @examples[example]
  distance = @examples.sort_by { |a| 
    dist(arr[1...arr.size],a[1][1...a[1].size]) }
  distance[0..n].map { |a| 
    i = 0
    a[0] + ' : ' +   # Name
    "%.3f \n"%dist(arr[1...arr.size], a[1][1...a[1].size]) + # Distance
    #" : %.3f\n"%(a[1][0] - arr[0]) + # Distance in target value
    @cfg['Feature']['Features'].inject('') { |string,feature|
      nvector = @features.getExFeature(a[0],feature)
      featdist = dist(arr[i...i+nvector.size], 
                      @examples[a[0]][i...i+nvector.size])
      i += nvector.size
      pretty = if feature==@cfg['Feature']['Features'][0] then ' *** ' else ' --- ' end
      string += pretty + "(%.2f)"%featdist + pretty +
      feature + " :  " +
      nvector.join(' ') + "\n"
    }
  }
end

#getOutliers(n = (1..1), n2 = 3, predictions = nil) ⇒ Object

Finds those examples that have been predicted most far off the correct value. Returns a string consisting of those examples along with the closest neighbors.



137
138
139
140
141
142
143
144
145
146
147
148
149
150
# File 'lib/svmlab.rb', line 137

def getOutliers(n = (1..1), n2 = 3, predictions = nil)
  if !predictions
    predictions = self.crossvalidate
  end
  sortedpred = predictions.sort_by { |(k,v)| 
    - (v['pred'] - v['truth']).abs }
  n = if n.is_a? Fixnum then (n..n) else n end
  n.map do |i|
    "OUTLIER %d : \n"%i + 
      sortedpred[i-1][0] + " was predicted %.3f"%sortedpred[i-1][1]['pred'] +
      " but the truth is %.3f :\n"%sortedpred[i-1][1]['truth'] +
      getNeighbors(sortedpred[i-1][0],n2).join('')
  end.join("\n")
end

#onefeatureplot(file = '', title = 'SVM Prediction') ⇒ Object

— onefeatureplot —



125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
# File 'lib/svmlab-plot.rb', line 125

def onefeatureplot(file='', title = 'SVM Prediction')
  xp = [] # Don't initialize in one line : x=y=[]
  yp = [] # If doing that, they will both refer to the same array
  xt = [] # Don't initialize in one line : x=y=[]
  yt = [] # If doing that, they will both refer to the same array
  @examples.each {|example, features|
    xt.push(features[1].to_f)
    yt.push(features[0].to_f)
  }
  (0..1000).each {|i|
    x = (i * (xt.max-xt.min) / 1000 + xt.min).to_f
    xp.push(x)
    yp.push(@model.predict([x]) / @scale[0] + @center[0])
  }
  Gnuplot.open do |gp| # This could be either a file or the gnuplot process that we pipe to
    Gnuplot::Plot.new( gp ) do |plot|
      plot.title  title
      plot.xlabel "Truth"
      plot.ylabel "Prediction"
      plot.set "grid"
      if file =~ /(png)|(ps)$/
        plot.terminal "png size 800,600 small" if file =~ /png$/
        plot.terminal "postscript" if file =~ /ps$/          
        plot.output file
      end
      plot.data = [
                   Gnuplot::DataSet.new( [xp,yp] ) { |ds|
                     ds.using = '1:2'
                     ds.with = "lines"
                     ds.title = "SVM prediction"
                     ds.linewidth = 1
                     ds.matrix = nil
                   },    
                   Gnuplot::DataSet.new( [xt, yt] ) { |ds|
                     ds.using = '1:2'
                     ds.with = "points"
                     ds.title = "Correct prediction"
                     ds.linewidth = 1
                     ds.matrix = nil
                   }
                  ]
    end
  end
end

#predict(examples, model = nil) ⇒ Object

An outer binding for the RubySVM predict function. This binding introduces inverse centering and scaling of the predicted value. This in order to give a real prediction value.



162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# File 'lib/svmlab.rb', line 162

def predict(examples, model = nil)
  if !model then model = self.train end
  examples = [ examples ] if examples.is_a? String
  predictions = examples.map do |example|
    begin
      vector = 
        if !@examples[example]
          fcfg = Marshal.load(Marshal.dump(@cfg['Feature']))
          fcfg.delete('DataSet')
          fcfg['Example'] = example
            scaleExample(SVMFeature.new(fcfg.to_yaml).getExAllFeatures(example))
        else
          @examples[example]
        end
      if @cfg['Feature']['PosClassFrom']
        model.predict(vector[1..-1]).round
      else
        model.predict(vector[1..-1]) / 
          @cfg['SVM']['Scale'][ @cfg['Feature']['Features'][0] ][0] + 
          @cfg['SVM']['Center'][ @cfg['Feature']['Features'][0] ][0]
      end
    rescue
      $!
    end
  end
  if predictions.size==1 then predictions[0]
  else predictions end
end

#predplotgroups(predarr, file = '', legends = [], title = 'SVM Prediction', err = nil) ⇒ Object

— predplotgroups —



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
# File 'lib/svmlab-plot.rb', line 105

def predplotgroups(predarr, file = '', legends = [], title = 'SVM Prediction', err = nil)
  substr = @cfg['Feature']['Groups']
  groups = @examples.map{|k,v| k[(eval substr)] }.uniq
  # For each group
  groups.each do |group|
    predarr2 = predarr.map { |preds|
      preds.find_all { |k,v| k[(eval substr)] == group }.
        inject({}) { |k,a| k[ a[0] ] = a[1] 
        k }
    }
    predplot(predarr2 ,
             if file.size==0 then file
             else [file.split(/\./)[0...-1],group,file.split(/\./).last].join('.') end ,
             legends, title + " on #{group}", err)
  end
end

#printExamplesObject

Returns a String of all examples with features.



153
154
155
156
157
# File 'lib/svmlab.rb', line 153

def printExamples
  @examples.inject('') do |str,(exname,vector)|
    str += vector.map{|v| v.to_s}.join(' ') + "\n"
  end
end

#publish_crossvalidate(path) ⇒ Object

Same as crossvalidate, but also outputs configuration and result to a file.



259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
# File 'lib/svmlab.rb', line 259

def publish_crossvalidate(path)
  predictions = self.crossvalidate
  time = DateTime.now
  info = {
    'Time' => time,
    'Evaluation' => {
      'RMSD' => predictions.rmsd,
      #'MeanErr' => predictions.meanerr,
      'CC' => predictions.cc },
      #'AUC' => auc(predictions,1),
      #'PBRMSD' => pbrmsd(predictions, @cfg['Feature']['Groups']),      
      #'WRMSD' => wrmsd(predictions,1), 
      #'F1' => f1(predictions,2) },
    'Configuration' => @cfg,
    'Predictions' => predictions.predictions
  }
  pdepth = 0
  (patharr = path.split(/\//))[1...-1].each do
    dir = patharr[0..pdepth+=1].join('/')
    Dir.mkdir dir if !File.exists?(dir)
  end
  File.open(path,'w') { |f| YAML.dump(info,f) }
  return predictions
end

#setGroupsObject

— setGroups — Return value:

groups hash:
   key : group name
   value : array of example names

Returns nil if @cfg[‘Groups’] is not set



66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
# File 'lib/svmlab.rb', line 66

def setGroups()
  if groups = @cfg['Feature']['Groups']
    # If using the (n1..n2) syntax
    if groups =~ /^\(\d(\.{2}|\.{3})\d\)$/
      hashkeys = @examples.map{|k,v| k[eval(groups)] }.uniq
      hashkeys.inject({}) { |hash,key| 
        hash[key] = @examples.find_all{ |exname,val| exname[eval(groups)]==key }.map{|i| i[0]} 
        hash }
    # If using the file prefix syntax 
    elsif (gfiles = Dir::entries(@cfg['Feature']['BaseDir']).grep(/^#{groups}\d+$/)).size>0
      hashkeys = gfiles.map { |file| file.split(groups).last }
      #puts '---','Groups :',hashkeys,'---'
      hashkeys.inject({}) { |hash,key| 
        hash[key] = open(@cfg['Feature']['BaseDir']+groups+key){|f| f.read}.split(/\n/)
        hash }
    end
  else # If no groups set, use leave-one-out crossvalidation
    @examples.inject({}) { |hash,(key, value)|
      hash[key] = [ key ]
      hash
    }
  end
end

#train(examples = nil) ⇒ Object

An outer binding for the RubySVM training function. If no training examples given, it will train on all data in the dataset.



193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
# File 'lib/svmlab.rb', line 193

def train(examples = nil)
  svm = SVM::Problem.new
  if examples then examples.each { |exname|
      @ndimensions = @examples[exname].size - 1 if !@ndimensions
      if @examples[exname]
        svm.addExample( @examples[exname][0], @examples[exname][1..-1] )
      end }
  else @examples.each { |name,vector|
      @ndimensions = vector.size-1 if !@ndimensions
      svm.addExample( vector[0], vector[1..-1] ) }
  end
  begin
    errout = STDERR.clone
    out = STDOUT.clone
    STDERR.reopen(File.open('/dev/null','w'))
    STDOUT.reopen(File.open('/dev/null','w'))
    @par = SVM::Parameter.new
    @par.svm_type = @cfg['Feature']['PosClassFrom'] ? 0 : 3
    if c=@cfg['SVM']['C'] then @par.C = c.to_f end
    if e=@cfg['SVM']['e'] then @par.eps = e.to_f end
    @par.gamma = if g=@cfg['SVM']['g'] then g.to_f 
                 else 1.0 / @ndimensions end
    SVM::Model.new(svm,@par)
  ensure
    STDERR.reopen(errout)
    STDOUT.reopen(out)
  end
end