Class: OpenTox::Serializer::Spreadsheets
- Inherits:
-
Object
- Object
- OpenTox::Serializer::Spreadsheets
- Defined in:
- lib/serializer.rb
Overview
Serializer for spreadsheet formats
Instance Method Summary collapse
-
#initialize(dataset) ⇒ Spreadsheets
constructor
Create a new spreadsheet serializer.
-
#to_csv ⇒ String
Convert to CSV string.
-
#to_spreadsheet(sheetname = "sheet1") ⇒ Spreadsheet::Workbook
Convert to spreadsheet workbook.
Constructor Details
#initialize(dataset) ⇒ Spreadsheets
Create a new spreadsheet serializer
459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 |
# File 'lib/serializer.rb', line 459 def initialize(dataset) @rows = [] @rows << ["SMILES"] features = dataset.features.keys # prepare for subgraphs have_substructures = features.collect{ |id| dataset.features[id][RDF.type].include? OT.Substructure}.compact.uniq if have_substructures.size == 1 && have_substructures[0] features_smarts = features.collect{ |id| "'" + dataset.features[id][OT.smarts] + "'" } end # gather missing features delete_features = [] features.each{ |id| dataset.features[id][RDF.type].each { |typestr| if typestr.include? "MissingFeature" delete_features << id end } } features = features - delete_features # detect nr duplicates per compound compound_sizes = {} dataset.compounds.each do |compound| entries=dataset.data_entries[compound] if entries entries.each do |feature, values| compound_sizes[compound] || compound_sizes[compound] = [] compound_sizes[compound] << values.size end compound_sizes[compound].uniq! raise "Inappropriate data for CSV export for compound #{compound}" if compound_sizes[compound].size > 1 compound_sizes[compound] = compound_sizes[compound][0] # integer instead of array end end # get headers features_smarts && @rows.first << features_smarts || @rows.first << features @rows.first.flatten! # feature positions pre-calculated feature_positions = features.inject({}) { |h,f| h.merge!({f => features.index(f)+1}) # +1 due to ID h } # serialize to csv dataset.compounds.each do |compound| entries=dataset.data_entries[compound] if entries inchi = URI.encode_www_form_component(Compound.new(compound).to_inchi) # allocate container row_container = Array.new(compound_sizes[compound]) (0...row_container.size).each do |i| row_container[i] = Array.new(@rows.first.size) row_container[i][0] = inchi end # fill entries entries.each { |feature, values| (0...compound_sizes[compound]).each { |i| row_container[i][feature_positions[feature]] = values[i] } } # fill zeroes for subgraphs if (features_smarts) row_container.collect! { |row| row.collect! { |x| x ? x : 0 } } end row_container.each { |row| @rows << row } end end end |
Instance Method Details
#to_csv ⇒ String
Convert to CSV string
541 542 543 544 545 546 |
# File 'lib/serializer.rb', line 541 def to_csv rows = @rows.collect result = "" result << rows.shift.collect { |f| f.split('/').last }.join(",") << "\n" # only feature name result << rows.collect{ |r| r.join(",") }.join("\n") end |
#to_spreadsheet(sheetname = "sheet1") ⇒ Spreadsheet::Workbook
Convert to spreadsheet workbook
550 551 552 553 554 555 556 557 558 559 560 561 562 563 |
# File 'lib/serializer.rb', line 550 def to_spreadsheet(sheetname="sheet1") Spreadsheet.client_encoding = 'UTF-8' book = Spreadsheet::Workbook.new sheet = book.create_worksheet(:name => "#{sheetname}") sheet.column(0).width = 100 i = 0 @rows.each do |row| row.each do |c| sheet.row(i).push c end i+=1 end book end |