Class: OpenTox::Algorithm::StructuralClustering

Inherits:
Object
  • Object
show all
Defined in:
lib/algorithm.rb

Overview

Structural Graph Clustering by TU Munich Finds clusters similar to a query structure in a given training dataset May be queried for cluster membership of an unknown compound

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(training_dataset_uri, training_threshold = 0.8, cluster_service_uri = "http://opentox-dev.informatik.tu-muenchen.de:8080/OpenTox/algorithm/StructuralClustering") ⇒ StructuralClustering

@params Training dataset_uri @params Similarity threshold for training (optional) @params Cluster service uri (no AA)



206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
# File 'lib/algorithm.rb', line 206

def initialize training_dataset_uri, training_threshold=0.8, cluster_service_uri = "http://opentox-dev.informatik.tu-muenchen.de:8080/OpenTox/algorithm/StructuralClustering"

  if (training_dataset_uri =~ URI::regexp).nil? || (cluster_service_uri =~ URI::regexp).nil? 
    raise "Invalid URI."
  end
  @training_dataset_uri = training_dataset_uri
  if !self.numeric? training_threshold || training_threshold <0 || training_threshold >1
    raise "Training threshold out of bounds."
  end
  @training_threshold = training_threshold.to_f

  # Train a cluster model
  params = {:dataset_uri => @training_dataset_uri, :threshold => @training_threshold }
  @cluster_model_uri = OpenTox::RestClientWrapper.post cluster_service_uri, params
  cluster_model_rdf = OpenTox::RestClientWrapper.get @cluster_model_uri
  @datasets = OpenTox::Parser::Owl.from_rdf cluster_model_rdf, OT.Dataset, true # must extract OT.Datasets from model

  # Process parsed OWL objects
  @clusterid_dataset_map = Hash.new
  @datasets.each { |d|
    begin
      d.[OT.hasSource]["Structural Clustering cluster "] = "" # must parse in metadata for string (not elegant)
      @clusterid_dataset_map[d.[OT.hasSource].to_i] = d.uri
    rescue Exception => e
      # ignore other entries!
    end
  }
end

Instance Attribute Details

#query_dataset_uriObject

Returns the value of attribute query_dataset_uri.



201
202
203
# File 'lib/algorithm.rb', line 201

def query_dataset_uri
  @query_dataset_uri
end

#query_thresholdObject

Returns the value of attribute query_threshold.



201
202
203
# File 'lib/algorithm.rb', line 201

def query_threshold
  @query_threshold
end

#target_clusters_arrayObject

Returns the value of attribute target_clusters_array.



201
202
203
# File 'lib/algorithm.rb', line 201

def target_clusters_array
  @target_clusters_array
end

#training_dataset_uriObject

Returns the value of attribute training_dataset_uri.



201
202
203
# File 'lib/algorithm.rb', line 201

def training_dataset_uri
  @training_dataset_uri
end

#training_thresholdObject

Returns the value of attribute training_threshold.



201
202
203
# File 'lib/algorithm.rb', line 201

def training_threshold
  @training_threshold
end

Instance Method Details

#get_clusters(query_compound_uri, query_threshold = 0.5) ⇒ Object

Instance query: clusters for a compound @params Query compound @params Similarity threshold for query to clusters (optional)



243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
# File 'lib/algorithm.rb', line 243

def get_clusters query_compound_uri, query_threshold = 0.5

  if !self.numeric? query_threshold || query_threshold <0 || query_threshold >1
    raise "Query threshold out of bounds."
  end
  @query_threshold = query_threshold.to_f


  # Preparing a query dataset
  query_dataset = OpenTox::Dataset.new
  @query_dataset_uri = query_dataset.save
  query_dataset = OpenTox::Dataset.find @query_dataset_uri
  query_dataset.add_compound query_compound_uri
  @query_dataset_uri = query_dataset.save

  # Obtaining a clustering for query compound
  params = { :dataset_uri => @query_dataset_uri, :threshold => @query_threshold }
  cluster_query_dataset_uri = OpenTox::RestClientWrapper.post @cluster_model_uri, params
  cluster_query_dataset = OpenTox::Dataset.new cluster_query_dataset_uri
  cluster_query_dataset.load_all

  # Reading cluster ids for features from metadata
  feature_clusterid_map = Hash.new
  pattern="Prediction feature for cluster assignment " # must parse for string in metadata (not elegant)
  cluster_query_dataset.features.each { |feature_uri,|
    [DC.title][pattern]=""
    feature_clusterid_map[feature_uri] = [DC.title].to_i
  }

  # Integrity check
  unless cluster_query_dataset.compounds.size == 1
    raise "Number of predicted compounds is != 1."
  end

  # Process data entry
  query_compound_uri = cluster_query_dataset.compounds[0]
  @target_clusters_array = Array.new
  cluster_query_dataset.features.keys.each { |cluster_membership_feature|

    # Getting dataset URI for cluster
    target_cluster = feature_clusterid_map[cluster_membership_feature]
    dataset = @clusterid_dataset_map[target_cluster]

    # Finally look up presence
    data_entry = cluster_query_dataset.data_entries[query_compound_uri]
    present = data_entry[cluster_membership_feature][0]

    # Store result
    @target_clusters_array << dataset if present > 0.5 # 0.0 for absence, 1.0 for presence
  }
end

#trained?Boolean

Whether a model has been trained

Returns:

  • (Boolean)


236
237
238
# File 'lib/algorithm.rb', line 236

def trained?
  !@cluster_model_uri.nil?
end