Class: OpenTox::Dataset

Inherits:
Object
  • Object
show all
Defined in:
lib/dataset.rb

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(owl = nil) ⇒ Dataset

Returns a new instance of Dataset.



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
# File 'lib/dataset.rb', line 7

def initialize( owl=nil )
  @data = {}
  @features = []
  @compounds = []
  
  # creates dataset object from Opentox::Owl object
  # use Dataset.find( <uri> ) to load dataset from rdf-supporting datasetservice
  # note: does not load all feature values, as this is time consuming
  if owl
    raise "invalid param" unless owl.is_a?(OpenTox::Owl)
    @title = owl.get("title")
    @creator = owl.get("creator")
    @uri = owl.uri
    # when loading a dataset from owl, only compound- and feature-uris are loaded 
    owl.load_dataset(@compounds, @features)
    # all features are marked as dirty
    # as soon as a feature-value is requested all values for this feature are loaded from the rdf
    @dirty_features = @features.dclone
    @owl = owl
  end
end

Instance Attribute Details

#compoundsObject

Returns the value of attribute compounds.



5
6
7
# File 'lib/dataset.rb', line 5

def compounds
  @compounds
end

#creatorObject

Returns the value of attribute creator.



5
6
7
# File 'lib/dataset.rb', line 5

def creator
  @creator
end

#dataObject

Returns the value of attribute data.



5
6
7
# File 'lib/dataset.rb', line 5

def data
  @data
end

#featuresObject

Returns the value of attribute features.



5
6
7
# File 'lib/dataset.rb', line 5

def features
  @features
end

#titleObject

Returns the value of attribute title.



5
6
7
# File 'lib/dataset.rb', line 5

def title
  @title
end

#uriObject

Returns the value of attribute uri.



5
6
7
# File 'lib/dataset.rb', line 5

def uri
  @uri
end

Class Method Details

.find(uri, accept_header = nil) ⇒ Object



29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
# File 'lib/dataset.rb', line 29

def self.find(uri, accept_header=nil) 

  unless accept_header
    if (@@config[:yaml_hosts].include?(URI.parse(uri).host))
      accept_header = 'application/x-yaml'
    else
      accept_header = "application/rdf+xml"
    end
  end
  
  case accept_header
  when "application/x-yaml"
    d = YAML.load RestClientWrapper.get(uri.to_s.strip, :accept => 'application/x-yaml').to_s 
    d.uri = uri unless d.uri
  when "application/rdf+xml"
    owl = OpenTox::Owl.from_uri(uri.to_s.strip, "Dataset")
    d = Dataset.new(owl)
  else
    raise "cannot get datset with accept header: "+accept_header.to_s
  end
  d
end

.owl_to_yaml(owl_data, uri) ⇒ Object

converts a dataset represented in owl to yaml (uses a temporary dataset) note: to_yaml is overwritten, loads complete owl dataset values



55
56
57
58
59
# File 'lib/dataset.rb', line 55

def self.owl_to_yaml( owl_data, uri)
  owl = OpenTox::Owl.from_data(owl_data, uri, "Dataset")
  d = Dataset.new(owl)
  d.to_yaml
end

Instance Method Details

#create_new_dataset(new_compounds, new_features, new_title, new_creator) ⇒ Object

creates a new dataset, using only those compounsd specified in new_compounds returns uri of new dataset



63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/dataset.rb', line 63

def create_new_dataset( new_compounds, new_features, new_title, new_creator )
  
  LOGGER.debug "create new dataset with "+new_compounds.size.to_s+"/"+compounds.size.to_s+" compounds"
  raise "no new compounds selected" unless new_compounds and new_compounds.size>0
  
  # load require features 
  if ((defined? @dirty_features) && (@dirty_features & new_features).size > 0)
    (@dirty_features & new_features).each{|f| load_feature_values(f)}
  end
  
  dataset = OpenTox::Dataset.new
  dataset.title = new_title
  dataset.creator = new_creator
  dataset.features = new_features
  dataset.compounds = new_compounds
  
  # Copy dataset data for compounds and features
  # PENDING: why storing feature values in an array? 
  new_compounds.each do |c|
    data_c = []
    raise "no data for compound '"+c.to_s+"'" if @data[c]==nil
    @data[c].each do |d|
      m = {}
      new_features.each do |f|
        m[f] = d[f]
      end
      data_c << m 
    end
    dataset.data[c] = data_c
  end
  return dataset.save
end

#get_predicted_class(compound, feature) ⇒ Object

returns classification value



97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/dataset.rb', line 97

def get_predicted_class(compound, feature)
  v = get_value(compound, feature)
  if v.is_a?(Hash)
    k = v.keys.grep(/classification/).first
    unless k.empty?
    #if v.has_key?(:classification)
      return v[k]
    else
      return "no classification key"
    end
  elsif v.is_a?(Array)
    raise "predicted class value is an array\n"+
      "value "+v.to_s+"\n"+
      "value-class "+v.class.to_s+"\n"+
      "dataset "+@uri.to_s+"\n"+
      "compound "+compound.to_s+"\n"+
      "feature "+feature.to_s+"\n"
  else
    return v
  end
end

#get_predicted_regression(compound, feature) ⇒ Object

returns regression value



120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/dataset.rb', line 120

def get_predicted_regression(compound, feature)
  v = get_value(compound, feature)
  if v.is_a?(Hash)
    k = v.keys.grep(/regression/).first
    unless k.empty?
      return v[k]
    else
      return "no regression key"
    end
  elsif v.is_a?(Array)
    raise "predicted regression value is an array\n"+
      "value "+v.to_s+"\n"+
      "value-class "+v.class.to_s+"\n"+
      "dataset "+@uri.to_s+"\n"+
      "compound "+compound.to_s+"\n"+
      "feature "+feature.to_s+"\n"
  else
    return v
  end
end

#get_prediction_confidence(compound, feature) ⇒ Object

returns prediction confidence if available



142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/dataset.rb', line 142

def get_prediction_confidence(compound, feature)
  v = get_value(compound, feature)
  if v.is_a?(Hash)
    k = v.keys.grep(/confidence/).first
    unless k.empty?
    #if v.has_key?(:confidence)
      return v[k].abs
      #return v["http://ot-dev.in-silico.ch/model/lazar#confidence"].abs
    else
      # PENDING: return nil isntead of raising an exception
      raise "no confidence key"
    end
  else
    LOGGER.warn "no confidence for compound: "+compound.to_s+", feature: "+feature.to_s
    return 1
  end
end

#get_value(compound, feature) ⇒ Object

return compound-feature value



161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# File 'lib/dataset.rb', line 161

def get_value(compound, feature)
  if (defined? @dirty_features) && @dirty_features.include?(feature)
    load_feature_values(feature)
  end
  
  v = @data[compound]
  return nil if v == nil # missing values for all features
  if v.is_a?(Array)
    # PENDING: why using an array here?
    v.each do |e|
      if e.is_a?(Hash)
        if e.has_key?(feature)
          return e[feature]
        end
      else
        raise "invalid internal value type"
      end
    end
    return nil #missing value
  else
    raise "value is not an array\n"+
          "value "+v.to_s+"\n"+
          "value-class "+v.class.to_s+"\n"+
          "dataset "+@uri.to_s+"\n"+
          "compound "+compound.to_s+"\n"+
          "feature "+feature.to_s+"\n"
  end
end

#load_feature_values(feature = nil) ⇒ Object

loads specified feature and removes dirty-flag, loads all features if feature is nil



191
192
193
194
195
196
197
198
199
200
201
# File 'lib/dataset.rb', line 191

def load_feature_values(feature=nil)
  if feature
    raise "feature already loaded" unless @dirty_features.include?(feature)
    @owl.load_dataset_feature_values(@compounds, @data, [feature])
    @dirty_features.delete(feature)
  else
    @data = {} unless @data
    @owl.load_dataset_feature_values(@compounds, @data, @dirty_features)
    @dirty_features.clear
  end
end

#saveObject

saves (changes) as new dataset in dataset service returns uri uses to yaml method (which is overwritten)



222
223
224
# File 'lib/dataset.rb', line 222

def save
  OpenTox::RestClientWrapper.post(@@config[:services]["opentox-dataset"],{:content_type =>  "application/x-yaml"},self.to_yaml).strip   
end

#to_yamlObject

overwrite to yaml: in case dataset is loaded from owl:

  • load all values



206
207
208
209
210
211
212
# File 'lib/dataset.rb', line 206

def to_yaml
  # loads all features  
  if ((defined? @dirty_features) && @dirty_features.size > 0)
    load_feature_values
  end
  super
end

#to_yaml_propertiesObject

  • remove @owl from yaml, not necessary



215
216
217
# File 'lib/dataset.rb', line 215

def to_yaml_properties
  super - ["@owl"]
end