Class: OpenTox::Parser::Sdf

Inherits:

Object

Object
OpenTox::Parser::Sdf

show all

Defined in:: lib/parser.rb

Overview

quick hack to enable sdf import via csv should be refactored

Instance Attribute Summary collapse

#dataset ⇒ Object

Returns the value of attribute dataset.

Instance Method Summary collapse

#initialize ⇒ Sdf constructor

A new instance of Sdf.
#load_sdf(sdf) ⇒ Object

Constructor Details

#initialize ⇒ `Sdf`

Returns a new instance of Sdf.

# File 'lib/parser.rb', line 633

def initialize
  @data = {}

  @compound_errors = []
  @activity_errors = []
  @duplicates = {}
end

Instance Attribute Details

#dataset ⇒ `Object`

Returns the value of attribute dataset.



631
632
633

# File 'lib/parser.rb', line 631

def dataset
  @dataset
end

Instance Method Details

#load_sdf(sdf) ⇒ `Object`

# File 'lib/parser.rb', line 641

def load_sdf(sdf)

  obconversion = OpenBabel::OBConversion.new
  obmol = OpenBabel::OBMol.new
  obconversion.set_in_and_out_formats "sdf", "inchi"

  table = Table.new

  properties = []
  sdf.each_line { |l| properties << l.to_s if l.match(/</) }
  properties.uniq!
  properties.sort!
  properties.collect!{ |p| p.gsub(/<|>/,'').strip.chomp }

  rec = 0
  sdf.split(/\$\$\$\$\r*\n/).each do |s|
    rec += 1
    obconversion.read_string obmol, s
    begin
      inchi = obconversion.write_string(obmol).gsub(/\s/,'').chomp 
      @duplicates[inchi] = [] unless @duplicates[inchi]
      @duplicates[inchi] << rec #inchi#+", "+row.join(", ")
      compound = Compound.from_inchi inchi
    rescue
      @compound_errors << "Could not convert structure to InChI, all entries for this compound (record #{rec}) have been ignored! \n#{s}"
      next
    end
    row = {}
    obmol.get_data.each { |d| row[d.get_attribute] = d.get_value if properties.include?(d.get_attribute) }
    table.data[compound.uri] = row
  end

  # find and remove ignored_features
  @activity_errors = table.clean_features
  table.add_to_dataset @dataset

  warnings = ''
  warnings += "<p>Incorrect structures (ignored):</p>" + @compound_errors.join("<br/>") unless @compound_errors.empty?
  warnings += "<p>Irregular activities (ignored):</p>" + @activity_errors.join("<br/>") unless @activity_errors.empty?
  duplicate_warnings = ''
  @duplicates.each {|inchi,lines| duplicate_warnings << "<p>#{lines.join('<br/>')}</p>" if lines.size > 1 }
  warnings += "<p>Duplicated structures (all structures/activities used for model building, please  make sure, that the results were obtained from <em>independent</em> experiments):</p>" + duplicate_warnings unless duplicate_warnings.empty?

  @dataset.metadata[OT.Warnings] = warnings 
  @dataset

end

Class: OpenTox::Parser::Sdf

Overview

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize ⇒ Sdf

Instance Attribute Details

#dataset ⇒ Object

Instance Method Details

#load_sdf(sdf) ⇒ Object

#initialize ⇒ `Sdf`

#dataset ⇒ `Object`

#load_sdf(sdf) ⇒ `Object`