Module: OpenTox::Algorithm::Neighbors

Defined in:
lib/utils.rb,
lib/algorithm.rb

Overview

neighbors

Class Method Summary collapse

Class Method Details

.get_confidence(params) ⇒ Object

Get confidence. @param Required keys: :sims, :acts @return Confidence



500
501
502
503
504
505
# File 'lib/utils.rb', line 500

def self.get_confidence(params)
  conf = params[:sims].inject{|sum,x| sum + x }
  confidence = conf/params[:sims].size
  LOGGER.debug "Confidence is: '" + confidence.to_s + "'."
  return confidence
end

.local_svm_classification(params) ⇒ Numeric

Local support vector regression from neighbors

Parameters:

  • params (Hash)

    Keys ‘:props, :acts, :sims, :min_train_performance` are required

Returns:

  • (Numeric)

    A prediction value.



381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
# File 'lib/algorithm.rb', line 381

def self.local_svm_classification(params)

  begin
    confidence = 0.0
    prediction = nil

    LOGGER.debug "Local SVM."
    if params[:acts].size>0
      if params[:props]
        n_prop = params[:props][0].collect
        q_prop = params[:props][1].collect
        props = [ n_prop, q_prop ]
      end
      acts = params[:acts].collect
      acts = acts.collect{|v| "Val" + v.to_s} # Convert to string for R to recognize classification
      prediction = local_svm_prop( props, acts, params[:min_train_performance]) # params[:props].nil? signals non-prop setting
      prediction = prediction.sub(/Val/,"") if prediction # Convert back to Float
      confidence = 0.0 if prediction.nil?
      LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
      confidence = get_confidence({:sims => params[:sims][1], :acts => params[:acts]})
    end
    {:prediction => prediction, :confidence => confidence}
  rescue Exception => e
    LOGGER.debug "#{e.class}: #{e.message}"
    LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
  end

end

.local_svm_prop(props, acts, min_train_performance) ⇒ Numeric

Local support vector prediction from neighbors. Uses propositionalized setting. Not to be called directly (use local_svm_regression or local_svm_classification).

Parameters:

  • props, (Array)

    propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ]

  • acts, (Array)

    activities for neighbors.

  • min_train_performance, (Float)

    parameter to control censoring

Returns:

  • (Numeric)

    A prediction value.



419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
# File 'lib/algorithm.rb', line 419

def self.local_svm_prop(props, acts, min_train_performance)

  LOGGER.debug "Local SVM (Propositionalization / Kernlab Kernel)."
  n_prop = props[0] # is a matrix, i.e. two nested Arrays.
  q_prop = props[1] # is an Array.

  prediction = nil
  if Algorithm::zero_variance? acts
    prediction = acts[0]
  else
    #LOGGER.debug gram_matrix.to_yaml
    @r = RinRuby.new(true,false) # global R instance leads to Socket errors after a large number of requests
    @r.eval "suppressPackageStartupMessages(library('caret'))" # requires R packages "caret" and "kernlab"
    @r.eval "suppressPackageStartupMessages(library('doMC'))" # requires R packages "multicore"
    @r.eval "registerDoMC()" # switch on parallel processing
    @r.eval "set.seed(1)"
    begin

      # set data
      LOGGER.debug "Setting R data ..."
      @r.n_prop = n_prop.flatten
      @r.n_prop_x_size = n_prop.size
      @r.n_prop_y_size = n_prop[0].size
      @r.y = acts
      @r.q_prop = q_prop
      #@r.eval "y = matrix(y)"
      @r.eval "prop_matrix = matrix(n_prop, n_prop_x_size, n_prop_y_size, byrow=T)"
      @r.eval "q_prop = matrix(q_prop, 1, n_prop_y_size, byrow=T)"

      # prepare data
      LOGGER.debug "Preparing R data ..."
      @r.eval <<-EOR
        weights=NULL
        if (class(y) == 'character') { 
          y = factor(y)
          suppressPackageStartupMessages(library('class')) 
          #weights=unlist(as.list(prop.table(table(y))))
        }
      EOR

      @r.eval <<-EOR
        rem = nearZeroVar(prop_matrix)
        if (length(rem) > 0) {
          prop_matrix = prop_matrix[,-rem,drop=F]
          q_prop = q_prop[,-rem,drop=F]
        }
        rem = findCorrelation(cor(prop_matrix))
        if (length(rem) > 0) {
          prop_matrix = prop_matrix[,-rem,drop=F]
          q_prop = q_prop[,-rem,drop=F]
        }
      EOR

      # model + support vectors
      LOGGER.debug "Creating R SVM model ..."
      train_success = @r.eval <<-EOR
        # AM: TODO: evaluate class weight effect by altering:
        # AM: comment in 'weights' above run and class.weights=weights vs. class.weights=1-weights
        # AM: vs
        # AM: comment out 'weights' above (status quo), thereby disabling weights
        model = train(prop_matrix,y,
                       method="svmradial",
                       preProcess=c("center", "scale"),
                       class.weights=weights,
                       trControl=trainControl(method="LGOCV",number=10),
                       tuneLength=8
                     )
        perf = ifelse ( class(y)!='numeric', max(model$results$Accuracy), model$results[which.min(model$results$RMSE),]$Rsquared )
      EOR


      # prediction
      LOGGER.debug "Predicting ..."
      @r.eval "p = predict(model,q_prop)"
      @r.eval "if (class(y)!='numeric') p = as.character(p)"
      prediction = @r.p

      # censoring
      prediction = nil if ( @r.perf.nan? || @r.perf < min_train_performance )
      prediction = nil unless train_success
      LOGGER.debug "Performance: #{sprintf("%.2f", @r.perf)}"
    rescue Exception => e
      LOGGER.debug "#{e.class}: #{e.message}"
      LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
    end
    @r.quit # free R
  end
  prediction
end

.local_svm_regression(params) ⇒ Numeric

Local support vector regression from neighbors

Parameters:

  • params (Hash)

    Keys ‘:props, :acts, :sims, :min_train_performance` are required

Returns:

  • (Numeric)

    A prediction value.



349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
# File 'lib/algorithm.rb', line 349

def self.local_svm_regression(params)

  begin
    confidence = 0.0
    prediction = nil

    LOGGER.debug "Local SVM."
    if params[:acts].size>0
      if params[:props]
        n_prop = params[:props][0].collect
        q_prop = params[:props][1].collect
        props = [ n_prop, q_prop ]
      end
      acts = params[:acts].collect
      prediction = local_svm_prop( props, acts, params[:min_train_performance]) # params[:props].nil? signals non-prop setting
      prediction = nil if (!prediction.nil? && prediction.infinite?)
      LOGGER.debug "Prediction is: '" + prediction.to_s + "'."
      confidence = get_confidence({:sims => params[:sims][1], :acts => params[:acts]})
      confidence = 0.0 if prediction.nil?
    end
    {:prediction => prediction, :confidence => confidence}
  rescue Exception => e
    LOGGER.debug "#{e.class}: #{e.message}"
    LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
  end

end

.weighted_majority_vote(params) ⇒ Numeric

Classification with majority vote from neighbors weighted by similarity

Parameters:

  • params (Hash)

    Keys ‘:acts, :sims, :value_map` are required

Returns:

  • (Numeric)

    A prediction value.



305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
# File 'lib/algorithm.rb', line 305

def self.weighted_majority_vote(params)

  neighbor_contribution = 0.0
  confidence_sum = 0.0
  confidence = 0.0
  prediction = nil

  LOGGER.debug "Weighted Majority Vote Classification."

  params[:acts].each_index do |idx|
    neighbor_weight = params[:sims][1][idx]
    neighbor_contribution += params[:acts][idx] * neighbor_weight
    if params[:value_map].size == 2 # AM: provide compat to binary classification: 1=>false 2=>true
      case params[:acts][idx]
      when 1
        confidence_sum -= neighbor_weight
      when 2
        confidence_sum += neighbor_weight
      end
    else
      confidence_sum += neighbor_weight
    end
  end
  if params[:value_map].size == 2 
    if confidence_sum >= 0.0
      prediction = 2 unless params[:acts].size==0
    elsif confidence_sum < 0.0
      prediction = 1 unless params[:acts].size==0
    end
  else 
    prediction = (neighbor_contribution/confidence_sum).round  unless params[:acts].size==0  # AM: new multinomial prediction
  end 

  LOGGER.debug "Prediction is: '" + prediction.to_s + "'." unless prediction.nil?
  confidence = (confidence_sum/params[:acts].size).abs if params[:acts].size > 0
  LOGGER.debug "Confidence is: '" + confidence.to_s + "'." unless prediction.nil?
  return {:prediction => prediction, :confidence => confidence.abs}
end