Class: OpenTox::Parser::Sdf
- Inherits:
-
Object
- Object
- OpenTox::Parser::Sdf
- Defined in:
- lib/parser.rb
Overview
quick hack to enable sdf import via csv should be refactored
Instance Attribute Summary collapse
-
#dataset ⇒ Object
Returns the value of attribute dataset.
Instance Method Summary collapse
-
#initialize ⇒ Sdf
constructor
A new instance of Sdf.
- #load_sdf(sdf) ⇒ Object
Constructor Details
#initialize ⇒ Sdf
Returns a new instance of Sdf.
633 634 635 636 637 638 639 |
# File 'lib/parser.rb', line 633 def initialize @data = {} @compound_errors = [] @activity_errors = [] @duplicates = {} end |
Instance Attribute Details
#dataset ⇒ Object
Returns the value of attribute dataset.
631 632 633 |
# File 'lib/parser.rb', line 631 def dataset @dataset end |
Instance Method Details
#load_sdf(sdf) ⇒ Object
641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 |
# File 'lib/parser.rb', line 641 def load_sdf(sdf) obconversion = OpenBabel::OBConversion.new obmol = OpenBabel::OBMol.new obconversion.set_in_and_out_formats "sdf", "inchi" table = Table.new properties = [] sdf.each_line { |l| properties << l.to_s if l.match(/</) } properties.uniq! properties.sort! properties.collect!{ |p| p.gsub(/<|>/,'').strip.chomp } rec = 0 sdf.split(/\$\$\$\$\r*\n/).each do |s| rec += 1 obconversion.read_string obmol, s begin inchi = obconversion.write_string(obmol).gsub(/\s/,'').chomp @duplicates[inchi] = [] unless @duplicates[inchi] @duplicates[inchi] << rec #inchi#+", "+row.join(", ") compound = Compound.from_inchi inchi rescue @compound_errors << "Could not convert structure to InChI, all entries for this compound (record #{rec}) have been ignored! \n#{s}" next end row = {} obmol.get_data.each { |d| row[d.get_attribute] = d.get_value if properties.include?(d.get_attribute) } table.data[compound.uri] = row end # find and remove ignored_features @activity_errors = table.clean_features table.add_to_dataset @dataset warnings = '' warnings += "<p>Incorrect structures (ignored):</p>" + @compound_errors.join("<br/>") unless @compound_errors.empty? warnings += "<p>Irregular activities (ignored):</p>" + @activity_errors.join("<br/>") unless @activity_errors.empty? duplicate_warnings = '' @duplicates.each {|inchi,lines| duplicate_warnings << "<p>#{lines.join('<br/>')}</p>" if lines.size > 1 } warnings += "<p>Duplicated structures (all structures/activities used for model building, please make sure, that the results were obtained from <em>independent</em> experiments):</p>" + duplicate_warnings unless duplicate_warnings.empty? @dataset.[OT.Warnings] = warnings @dataset end |