Class: OpenTox::Algorithm::Fminer

Inherits:
Object
  • Object
show all
Includes:
OpenTox::Algorithm
Defined in:
lib/algorithm.rb

Overview

Fminer algorithms (github.com/amaunz/fminer2)

Direct Known Subclasses

BBRC, LAST

Instance Attribute Summary collapse

Attributes included from OpenTox

#metadata, #uri

Instance Method Summary collapse

Methods included from OpenTox::Algorithm

effect, gauss, get_cdk_descriptors, get_jl_descriptors, get_ob_descriptors, isnull_or_singular?, load_ds_csv, min_frequency, numeric?, pc_descriptors, #run, sum_size, #to_rdfxml, zero_variance?

Methods included from OpenTox

#add_metadata, all, #delete, #initialize, #load_metadata, sign_in, text_to_html, #to_rdfxml

Instance Attribute Details

#all_activitiesObject

Returns the value of attribute all_activities.



55
56
57
# File 'lib/algorithm.rb', line 55

def all_activities
  @all_activities
end

#compoundsObject

Returns the value of attribute compounds.



55
56
57
# File 'lib/algorithm.rb', line 55

def compounds
  @compounds
end

#db_class_sizesObject

Returns the value of attribute db_class_sizes.



55
56
57
# File 'lib/algorithm.rb', line 55

def db_class_sizes
  @db_class_sizes
end

#minfreqObject

Returns the value of attribute minfreq.



55
56
57
# File 'lib/algorithm.rb', line 55

def minfreq
  @minfreq
end

#prediction_featureObject

Returns the value of attribute prediction_feature.



55
56
57
# File 'lib/algorithm.rb', line 55

def prediction_feature
  @prediction_feature
end

#smiObject

Returns the value of attribute smi.



55
56
57
# File 'lib/algorithm.rb', line 55

def smi
  @smi
end

#training_datasetObject

Returns the value of attribute training_dataset.



55
56
57
# File 'lib/algorithm.rb', line 55

def training_dataset
  @training_dataset
end

Instance Method Details

#add_fminer_data(fminer_instance, value_map) ⇒ Object



104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
# File 'lib/algorithm.rb', line 104

def add_fminer_data(fminer_instance, value_map)


  # detect nr duplicates per compound
  compound_sizes = {}
  @training_dataset.compounds.each do |compound|
    entries=@training_dataset.data_entries[compound]
    entries.each do |feature, values|
      compound_sizes[compound] || compound_sizes[compound] = []
      compound_sizes[compound] << values.size unless values.size == 0
    end
    compound_sizes[compound].uniq!
    raise "Inappropriate data for fminer" if compound_sizes[compound].size > 1
    compound_sizes[compound] = compound_sizes[compound][0] # integer instead of array
  end

  id = 1 # fminer start id is not 0

  @training_dataset.compounds.each do |compound|
    entry=@training_dataset.data_entries[compound]
    begin
      smiles = OpenTox::Compound.new(compound).to_smiles
    rescue
      LOGGER.warn "No resource for #{compound.to_s}"
      next
    end
    if smiles == '' or smiles.nil?
      LOGGER.warn "Cannot find smiles for #{compound.to_s}."
      next
    end

    entry.each do |feature,values|
      if feature == @prediction_feature.uri
        (0...compound_sizes[compound]).each { |i|
          if values[i].nil? 
            LOGGER.warn "No #{feature} activity for #{compound.to_s}."
          else
            if @prediction_feature.feature_type == "classification"
              activity= value_map.invert[values[i]].to_i # activities are mapped to 1..n
              @db_class_sizes[activity-1].nil? ? @db_class_sizes[activity-1]=1 : @db_class_sizes[activity-1]+=1 # AM effect
            elsif @prediction_feature.feature_type == "regression"
              activity= values[i].to_f 
            end
            begin
              fminer_instance.AddCompound(smiles,id) if fminer_instance
              fminer_instance.AddActivity(activity, id) if fminer_instance 
              @all_activities[id]=activity # DV: insert global information
              @compounds[id] = compound
              @smi[id] = smiles
              id += 1
            rescue Exception => e
              LOGGER.warn "Could not add " + smiles + "\t" + values[i].to_s + " to fminer"
              LOGGER.warn e.backtrace
            end
          end
        }
      end
    end
  end
end

#check_params(params, per_mil, subjectid = nil) ⇒ Object



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/algorithm.rb', line 57

def check_params(params,per_mil,subjectid=nil)
  raise OpenTox::NotFoundError.new "Please submit a dataset_uri." unless params[:dataset_uri] and  !params[:dataset_uri].nil?
  @training_dataset = OpenTox::Dataset.find "#{params[:dataset_uri]}", subjectid

  unless params[:prediction_feature] # try to read prediction_feature from dataset
    raise OpenTox::NotFoundError.new "Please provide a prediction_feature parameter" unless @training_dataset.features.size == 1
    prediction_feature = OpenTox::Feature.find(@training_dataset.features.keys.first,@subjectid)
    params[:prediction_feature] = prediction_feature.uri
  end
  @prediction_feature = OpenTox::Feature.find params[:prediction_feature], subjectid

  raise OpenTox::NotFoundError.new "No feature #{params[:prediction_feature]} in dataset #{params[:dataset_uri]}" unless @training_dataset.features and @training_dataset.features.include?(params[:prediction_feature])

  unless params[:min_frequency].nil? 
    # check for percentage
    if params[:min_frequency].include? "pc"
      per_mil=params[:min_frequency].gsub(/pc/,"")
      if OpenTox::Algorithm.numeric? per_mil
        per_mil = per_mil.to_i * 10
      else
        bad_request=true
      end
    # check for per-mil
    elsif params[:min_frequency].include? "pm"
      per_mil=params[:min_frequency].gsub(/pm/,"")
      if OpenTox::Algorithm.numeric? per_mil
        per_mil = per_mil.to_i
      else
        bad_request=true
      end
    # set minfreq directly
    else
      if OpenTox::Algorithm.numeric? params[:min_frequency]
        @minfreq=params[:min_frequency].to_i
        LOGGER.debug "min_frequency #{@minfreq}"
      else
        bad_request=true
      end
    end
    raise OpenTox::BadRequestError.new "Minimum frequency must be integer [n], or a percentage [n]pc, or a per-mil [n]pm , with n greater 0" if bad_request
  end
  if @minfreq.nil?
    @minfreq=OpenTox::Algorithm.min_frequency(@training_dataset,per_mil)
    LOGGER.debug "min_frequency #{@minfreq} (input was #{per_mil} per-mil)"
  end
end