Class: OpenTox::Serializer::Spreadsheets

Inherits:

Object

Object
OpenTox::Serializer::Spreadsheets

show all

Defined in:: lib/serializer.rb

Overview

Serializer for spreadsheet formats

Instance Method Summary collapse

#initialize(dataset) ⇒ Spreadsheets constructor

Create a new spreadsheet serializer.
#to_csv ⇒ String

Convert to CSV string.
#to_spreadsheet(sheetname = "sheet1") ⇒ Spreadsheet::Workbook

Convert to spreadsheet workbook.

Constructor Details

#initialize(dataset) ⇒ `Spreadsheets`

Create a new spreadsheet serializer

Parameters:

dataset (OpenTox::Dataset) —

Dataset object

# File 'lib/serializer.rb', line 459

def initialize(dataset)
  @rows = []
  @rows << ["SMILES"]

  features = dataset.features.keys

  # prepare for subgraphs
  have_substructures = features.collect{ |id| dataset.features[id][RDF.type].include? OT.Substructure}.compact.uniq
  if have_substructures.size == 1 && have_substructures[0] 
    features_smarts = features.collect{ |id| "'" + dataset.features[id][OT.smarts] + "'" }
  end

  # gather missing features
  delete_features = []
  features.each{ |id|
    dataset.features[id][RDF.type].each { |typestr|
      if typestr.include? "MissingFeature"
        delete_features << id 
      end
    }
  }
  features = features - delete_features

  # detect nr duplicates per compound
  compound_sizes = {}
  dataset.compounds.each do |compound|
    entries=dataset.data_entries[compound]
    if entries
      entries.each do |feature, values|
        compound_sizes[compound] || compound_sizes[compound] = []
        compound_sizes[compound] << values.size
      end
      compound_sizes[compound].uniq!
      raise "Inappropriate data for CSV export for compound #{compound}" if compound_sizes[compound].size > 1
      compound_sizes[compound] = compound_sizes[compound][0] # integer instead of array
    end
  end

  # get headers
  features_smarts && @rows.first << features_smarts || @rows.first << features
  @rows.first.flatten!

  # feature positions pre-calculated
  feature_positions = features.inject({}) { |h,f| 
    h.merge!({f => features.index(f)+1}) # +1 due to ID
    h
  }

  # serialize to csv
  dataset.compounds.each do |compound|
    entries=dataset.data_entries[compound]
    if entries
      inchi = URI.encode_www_form_component(Compound.new(compound).to_inchi)
  
      # allocate container
      row_container = Array.new(compound_sizes[compound])
      (0...row_container.size).each do |i|
        row_container[i] = Array.new(@rows.first.size)
        row_container[i][0] = inchi
      end

      # fill entries
      entries.each { |feature, values|
        (0...compound_sizes[compound]).each { |i|
          row_container[i][feature_positions[feature]] = values[i]
        }
      }

      # fill zeroes for subgraphs
      if (features_smarts)
        row_container.collect! { |row|
          row.collect! { |x| x ? x : 0 } 
        }
      end
      row_container.each { |row| @rows << row }

    end
  end
end

Instance Method Details

#to_csv ⇒ `String`

Convert to CSV string

Returns:

(String) —

CSV string

# File 'lib/serializer.rb', line 541

def to_csv
  rows = @rows.collect
  result = ""
  result << rows.shift.collect { |f| f.split('/').last }.join(",") << "\n" # only feature name
  result << rows.collect{ |r| r.join(",") }.join("\n")
end

#to_spreadsheet(sheetname = "sheet1") ⇒ `Spreadsheet::Workbook`

Convert to spreadsheet workbook

Returns:

(Spreadsheet::Workbook) —

Workbook object (use the spreadsheet gemc to write a file)

# File 'lib/serializer.rb', line 550

def to_spreadsheet(sheetname="sheet1")
  Spreadsheet.client_encoding = 'UTF-8'
  book = Spreadsheet::Workbook.new
  sheet = book.create_worksheet(:name => "#{sheetname}")
  sheet.column(0).width = 100
  i = 0
  @rows.each do |row|
    row.each do |c|
      sheet.row(i).push c
    end
    i+=1
  end
  book
end

Class: OpenTox::Serializer::Spreadsheets

Overview

Instance Method Summary collapse

Constructor Details

#initialize(dataset) ⇒ Spreadsheets

Instance Method Details

#to_csv ⇒ String

#to_spreadsheet(sheetname = "sheet1") ⇒ Spreadsheet::Workbook

#initialize(dataset) ⇒ `Spreadsheets`

#to_csv ⇒ `String`

#to_spreadsheet(sheetname = "sheet1") ⇒ `Spreadsheet::Workbook`