Class: OpenTox::Serializer::Spreadsheets

Inherits:
Object
  • Object
show all
Defined in:
lib/serializer.rb

Overview

Serializer for spreadsheet formats

Instance Method Summary collapse

Constructor Details

#initialize(dataset) ⇒ Spreadsheets

Create a new spreadsheet serializer

Parameters:



459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
# File 'lib/serializer.rb', line 459

def initialize(dataset)
  @rows = []
  @rows << ["SMILES"]

  features = dataset.features.keys

  # prepare for subgraphs
  have_substructures = features.collect{ |id| dataset.features[id][RDF.type].include? OT.Substructure}.compact.uniq
  if have_substructures.size == 1 && have_substructures[0] 
    features_smarts = features.collect{ |id| "'" + dataset.features[id][OT.smarts] + "'" }
  end

  # gather missing features
  delete_features = []
  features.each{ |id|
    dataset.features[id][RDF.type].each { |typestr|
      if typestr.include? "MissingFeature"
        delete_features << id 
      end
    }
  }
  features = features - delete_features

  # detect nr duplicates per compound
  compound_sizes = {}
  dataset.compounds.each do |compound|
    entries=dataset.data_entries[compound]
    if entries
      entries.each do |feature, values|
        compound_sizes[compound] || compound_sizes[compound] = []
        compound_sizes[compound] << values.size
      end
      compound_sizes[compound].uniq!
      raise "Inappropriate data for CSV export for compound #{compound}" if compound_sizes[compound].size > 1
      compound_sizes[compound] = compound_sizes[compound][0] # integer instead of array
    end
  end

  # get headers
  features_smarts && @rows.first << features_smarts || @rows.first << features
  @rows.first.flatten!

  # feature positions pre-calculated
  feature_positions = features.inject({}) { |h,f| 
    h.merge!({f => features.index(f)+1}) # +1 due to ID
    h
  }

  # serialize to csv
  dataset.compounds.each do |compound|
    entries=dataset.data_entries[compound]
    if entries
      inchi = URI.encode_www_form_component(Compound.new(compound).to_inchi)
  
      # allocate container
      row_container = Array.new(compound_sizes[compound])
      (0...row_container.size).each do |i|
        row_container[i] = Array.new(@rows.first.size)
        row_container[i][0] = inchi
      end

      # fill entries
      entries.each { |feature, values|
        (0...compound_sizes[compound]).each { |i|
          row_container[i][feature_positions[feature]] = values[i]
        }
      }

      # fill zeroes for subgraphs
      if (features_smarts)
        row_container.collect! { |row|
          row.collect! { |x| x ? x : 0 } 
        }
      end
      row_container.each { |row| @rows << row }

    end
  end
end

Instance Method Details

#to_csvString

Convert to CSV string

Returns:



541
542
543
544
545
546
# File 'lib/serializer.rb', line 541

def to_csv
  rows = @rows.collect
  result = ""
  result << rows.shift.collect { |f| f.split('/').last }.join(",") << "\n" # only feature name
  result << rows.collect{ |r| r.join(",") }.join("\n")
end

#to_spreadsheet(sheetname = "sheet1") ⇒ Spreadsheet::Workbook

Convert to spreadsheet workbook

Returns:

  • (Spreadsheet::Workbook)

    Workbook object (use the spreadsheet gemc to write a file)



550
551
552
553
554
555
556
557
558
559
560
561
562
563
# File 'lib/serializer.rb', line 550

def to_spreadsheet(sheetname="sheet1")
  Spreadsheet.client_encoding = 'UTF-8'
  book = Spreadsheet::Workbook.new
  sheet = book.create_worksheet(:name => "#{sheetname}")
  sheet.column(0).width = 100
  i = 0
  @rows.each do |row|
    row.each do |c|
      sheet.row(i).push c
    end
    i+=1
  end
  book
end