Class: OpenTox::Transform::ModelTransformer

Inherits:
Object
  • Object
show all
Defined in:
lib/transform.rb

Overview

Attaches transformations to an OpenTox::Model Stores props, sims, performs similarity calculations

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(model) ⇒ ModelTransformer

@params model to transform



322
323
324
325
# File 'lib/transform.rb', line 322

def initialize model
  @model = model
  @similarity_algorithm = @model.similarity_algorithm
end

Instance Attribute Details

#actsObject

Returns the value of attribute acts.



319
320
321
# File 'lib/transform.rb', line 319

def acts
  @acts
end

#modelObject

Returns the value of attribute model.



319
320
321
# File 'lib/transform.rb', line 319

def model
  @model
end

#similarity_algorithmObject

Returns the value of attribute similarity_algorithm.



319
320
321
# File 'lib/transform.rb', line 319

def similarity_algorithm
  @similarity_algorithm
end

#simsObject

Returns the value of attribute sims.



319
320
321
# File 'lib/transform.rb', line 319

def sims
  @sims
end

Instance Method Details

#add_neighbor(training_props, idx) ⇒ Object

Adds a neighbor to @neighbors if it passes the similarity threshold adjusts @ids to signal the



420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
# File 'lib/transform.rb', line 420

def add_neighbor(training_props, idx)

  sim = similarity(training_props)
  if sim > @model.parameter("min_sim")
    if @model.activities[@cmpds[idx]]
      @model.activities[@cmpds[idx]].each do |act|
        @model.neighbors << {
          :compound => @cmpds[idx],
          :similarity => sim,
          :features => @fps[idx].keys,
          :activity => act
        }
        @sims << sim
        @ids << idx
      end
    end
  end
end

#convert_nilsObject

Replaces nils by zeroes in n_prop and q_prop Enables the use of Tanimoto similarities with arrays (rows of n_prop and q_prop)



475
476
477
478
# File 'lib/transform.rb', line 475

def convert_nils
  @n_prop.each { |row| row.collect! { |v| v.nil? ? 0 : v } }
  @q_prop.collect! { |v| v.nil? ? 0 : v }
end

#get_matricesObject

Converts fingerprints to matrix, order of rows by fingerprints. nil values allowed. Same for compound fingerprints.



489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
# File 'lib/transform.rb', line 489

def get_matrices

  @cmpds = []; @fps = []; @acts = []; @n_prop = []; @q_prop = []
  
  # Major BUG! Must loop over @model.compounds, hash is unordered!
  # @model.fingerprints.each 
  @model.compounds.each { |cmpd|
    fp = @model.fingerprints[cmpd]
    if @model.activities[cmpd] # row good
      acts = @model.activities[cmpd]; @acts += acts
      LOGGER.debug "#{acts.size} activities for '#{cmpd}'" if acts.size > 1
      row = []; @model.features.each { |f| row << fp[f] } # nils for non-existent f's
      acts.size.times { # multiple additions for multiple activities
        @n_prop << row.collect
        @cmpds << cmpd
        @fps << Marshal.load(Marshal.dump(fp))
      } 
    else
      LOGGER.warn "No activity found for compound '#{cmpd}' in model '#{@model.uri}'"
    end
  }

  @model.features.each { |f| @q_prop << @model.compound_fingerprints[f] } # query structure

end

#neighborsObject

Find neighbors and store them as object variable, access all compounds for that.



410
411
412
413
414
415
# File 'lib/transform.rb', line 410

def neighbors
  @model.neighbors = []
  @n_prop.each_with_index do |fp, idx| # AM: access all compounds
    add_neighbor fp, idx
  end
end

#propsObject



515
516
517
# File 'lib/transform.rb', line 515

def props
  @model.parameter("propositionalized") ? [ @n_prop, @q_prop ] : nil
end

#remove_nilsObject

Removes nil entries from n_prop and q_prop. Matrix is a nested two-dimensional array. Removes iteratively rows or columns with the highest fraction of nil entries, until all nil entries are removed. Tie break: columns take precedence. Deficient input such as [[nil],] will not be completely reduced, as the algorithm terminates if any matrix dimension (x or y) is zero. Enables the use of cosine similarity / SVD



446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
# File 'lib/transform.rb', line 446

def remove_nils
 return @n_prop if (@n_prop.length == 0 || @n_prop[0].length == 0)
  col_nr_nils = (Matrix.rows(@n_prop)).column_vectors.collect{ |cv| (cv.to_a.count(nil) / cv.size.to_f) }
  row_nr_nils = (Matrix.rows(@n_prop)).row_vectors.collect{ |rv| (rv.to_a.count(nil) / rv.size.to_f) }
  m_cols = col_nr_nils.max
  m_rows = row_nr_nils.max
  idx_cols = col_nr_nils.index(m_cols)
  idx_rows = row_nr_nils.index(m_rows)
  while ((m_cols > 0) || (m_rows > 0)) do
    if m_cols >= m_rows
      @n_prop.each { |row| row.slice!(idx_cols) }
      @q_prop.slice!(idx_cols)
    else
      @n_prop.slice!(idx_rows)
      @ids.slice!(idx_rows)
    end
    break if (@n_prop.length == 0) || (@n_prop[0].length == 0)
    col_nr_nils = Matrix.rows(@n_prop).column_vectors.collect{ |cv| (cv.to_a.count(nil) / cv.size.to_f) }
    row_nr_nils = Matrix.rows(@n_prop).row_vectors.collect{ |rv| (rv.to_a.count(nil) / rv.size.to_f) }
    m_cols = col_nr_nils.max
    m_rows = row_nr_nils.max
    idx_cols= col_nr_nils.index(m_cols)
    idx_rows = row_nr_nils.index(m_rows)
  end
end

#similarity(training_props) ⇒ Object

Executes model similarity_algorithm



482
483
484
# File 'lib/transform.rb', line 482

def similarity(training_props)
  eval("OpenTox::Algorithm::#{@model.similarity_algorithm}(training_props, @q_prop)")
end

#transformObject



327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
# File 'lib/transform.rb', line 327

def transform
  get_matrices # creates @n_prop, @q_prop, @acts from ordered fps
  @ids = (0..((@n_prop.length)-1)).to_a # surviving compounds; become neighbors

  # Preprocessing
  if (@model.similarity_algorithm == "Similarity.cosine")
    # truncate nil-columns and -rows
    LOGGER.debug "O: #{@n_prop.size}x#{@n_prop[0].size}; R: #{@q_prop.size}"
    while @q_prop.size>0
      idx = @q_prop.index(nil)
      break if idx.nil?
      @q_prop.slice!(idx)
      @n_prop.each { |r| r.slice!(idx) }
    end
    LOGGER.debug "Q: #{@n_prop.size}x#{@n_prop[0].size}; R: #{@q_prop.size}"
    remove_nils  # removes nil cells (for cosine); alters @n_props, @q_props, cuts down @ids to survivors
    LOGGER.debug "M: #{@n_prop.size}x#{@n_prop[0].size}; R: #{@q_prop.size}"

    # adjust rest
    fps_tmp = []; @ids.each { |idx| fps_tmp << @fps[idx] }; @fps = fps_tmp
    cmpds_tmp = []; @ids.each { |idx| cmpds_tmp << @cmpds[idx] }; @cmpds = cmpds_tmp
    acts_tmp = []; @ids.each { |idx| acts_tmp << @acts[idx] }; @acts = acts_tmp

    # scale and svd
    nr_cases, nr_features = @n_prop.size, @n_prop[0].size
    gsl_n_prop = GSL::Matrix.alloc(@n_prop.flatten, nr_cases, nr_features); gsl_n_prop_orig = gsl_n_prop.clone # make backup
    gsl_q_prop = GSL::Matrix.alloc(@q_prop.flatten, 1, nr_features); gsl_q_prop_orig = gsl_q_prop.clone # make backup
    (0...nr_features).each { |i|
       autoscaler = OpenTox::Transform::AutoScale.new(gsl_n_prop.col(i))
       gsl_n_prop.col(i)[0..nr_cases-1] = autoscaler.vs
       gsl_q_prop.col(i)[0..0] = autoscaler.transform gsl_q_prop.col(i)
    }
    svd = OpenTox::Algorithm::Transform::SVD.new(gsl_n_prop, 0.0)
    @n_prop = svd.data_transformed_matrix.to_a
    @q_prop = svd.transform(gsl_q_prop).row(0).to_a
    LOGGER.debug "S: #{@n_prop.size}x#{@n_prop[0].size}; R: #{@q_prop.size}"
  else
    convert_nils # convert nil cells (for tanimoto); leave @n_props, @q_props, @ids untouched
  end

  # neighbor calculation
  @ids = [] # surviving compounds become neighbors
  @sims = [] # calculated by neighbor routine
  neighbors
  n_prop_tmp = []; @ids.each { |idx| n_prop_tmp << @n_prop[idx] }; @n_prop = n_prop_tmp # select neighbors from matrix
  acts_tmp = []; @ids.each { |idx| acts_tmp << @acts[idx] }; @acts = acts_tmp


  # Sims between neighbors, if necessary
  gram_matrix = []
  if !@model.parameter("propositionalized") # need gram matrix for standard setting (n. prop.)
    @n_prop.each_index do |i|
      gram_matrix[i] = [] unless gram_matrix[i]
      @n_prop.each_index do |j|
        if (j>i)
          sim = eval("OpenTox::Algorithm::#{@similarity_algorithm}(@n_prop[i], @n_prop[j])")
          gram_matrix[i][j] = sim
          gram_matrix[j] = [] unless gram_matrix[j]
          gram_matrix[j][i] = gram_matrix[i][j]
        end
      end
      gram_matrix[i][i] = 1.0
    end
  end

  # reclaim original data (if svd was performed)
  if svd
    @n_prop = gsl_n_prop_orig.to_a 
    n_prop_tmp = []; @ids.each { |idx| n_prop_tmp << @n_prop[idx] }; @n_prop = n_prop_tmp
    @q_prop = gsl_q_prop_orig.row(0).to_a
  end

  LOGGER.debug "F: #{@n_prop.size}x#{@n_prop[0].size}; R: #{@q_prop.size}" if (@n_prop && @n_prop[0] && @q_prop)
  LOGGER.debug "Sims: #{@sims.size}, Acts: #{@acts.size}"

  @sims = [ gram_matrix, @sims ] 

end