Class: OpenTox::Transform::PCA
- Inherits:
-
Object
- Object
- OpenTox::Transform::PCA
- Defined in:
- lib/transform.rb
Overview
Principal Components Analysis.
Instance Attribute Summary collapse
-
#autoscaler ⇒ Object
Returns the value of attribute autoscaler.
-
#data_matrix ⇒ Object
Returns the value of attribute data_matrix.
-
#data_transformed_matrix ⇒ Object
Returns the value of attribute data_transformed_matrix.
-
#eigenvalue_sums ⇒ Object
Returns the value of attribute eigenvalue_sums.
-
#eigenvector_matrix ⇒ Object
Returns the value of attribute eigenvector_matrix.
Instance Method Summary collapse
-
#initialize(data_matrix, compression = 0.05, maxcols = (1.0/0.0)) ⇒ GSL::Matrix
constructor
Creates a transformed dataset as GSL::Matrix.
-
#restore ⇒ GSL::Matrix
Restores data in the original feature space (possibly with compression loss).
-
#transform(values) ⇒ GSL::Matrix
Transforms data to feature space found by PCA.
Constructor Details
#initialize(data_matrix, compression = 0.05, maxcols = (1.0/0.0)) ⇒ GSL::Matrix
Creates a transformed dataset as GSL::Matrix.
112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 |
# File 'lib/transform.rb', line 112 def initialize data_matrix, compression=0.05, maxcols=(1.0/0.0) begin @data_matrix = data_matrix.clone @compression = compression.to_f @mean = Array.new @autoscaler = Array.new @cols = Array.new @maxcols = maxcols # Objective Feature Selection raise "Error! PCA needs at least two dimensions." if data_matrix.size2 < 2 @data_matrix_selected = nil (0..@data_matrix.size2-1).each { |i| if !Algorithm::zero_variance?(@data_matrix.col(i).to_a) if @data_matrix_selected.nil? @data_matrix_selected = GSL::Matrix.alloc(@data_matrix.size1, 1) @data_matrix_selected.col(0)[0..@data_matrix.size1-1] = @data_matrix.col(i) else @data_matrix_selected = @data_matrix_selected.horzcat(GSL::Matrix.alloc(@data_matrix.col(i).to_a,@data_matrix.size1, 1)) end @cols << i end } raise "Error! PCA needs at least two dimensions." if (@data_matrix_selected.nil? || @data_matrix_selected.size2 < 2) # PCA uses internal centering on 0 @data_matrix_scaled = GSL::Matrix.alloc(@data_matrix_selected.size1, @cols.size) (0..@cols.size-1).each { |i| as = OpenTox::Transform::AutoScale.new(@data_matrix_selected.col(i)) @data_matrix_scaled.col(i)[0..@data_matrix.size1-1] = as.vs * as.stdev # re-adjust by stdev @mean << as.mean @autoscaler << as } # PCA data_matrix_hash = Hash.new (0..@cols.size-1).each { |i| column_view = @data_matrix_scaled.col(i) data_matrix_hash[i] = column_view.to_scale } dataset_hash = data_matrix_hash.to_dataset # see http://goo.gl/7XcW9 cor_matrix=Statsample::Bivariate.correlation_matrix(dataset_hash) pca=Statsample::Factor::PCA.new(cor_matrix) # Select best eigenvectors pca.eigenvalues.each { |ev| raise "PCA failed!" unless !ev.nan? } @eigenvalue_sums = Array.new (0..@cols.size-1).each { |i| @eigenvalue_sums << pca.eigenvalues[0..i].inject{ |sum, ev| sum + ev } } eigenvectors_selected = Array.new pca.eigenvectors.each_with_index { |ev, i| if (@eigenvalue_sums[i] <= ((1.0-@compression)*@cols.size)) || (eigenvectors_selected.size == 0) eigenvectors_selected << ev.to_a unless @maxcols <= eigenvectors_selected.size end } @eigenvector_matrix = GSL::Matrix.alloc(eigenvectors_selected.flatten, eigenvectors_selected.size, @cols.size).transpose @data_transformed_matrix = (@eigenvector_matrix.transpose * @data_matrix_scaled.transpose).transpose rescue Exception => e LOGGER.debug "#{e.class}: #{e.}" LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end end |
Instance Attribute Details
#autoscaler ⇒ Object
Returns the value of attribute autoscaler.
105 106 107 |
# File 'lib/transform.rb', line 105 def autoscaler @autoscaler end |
#data_matrix ⇒ Object
Returns the value of attribute data_matrix.
105 106 107 |
# File 'lib/transform.rb', line 105 def data_matrix @data_matrix end |
#data_transformed_matrix ⇒ Object
Returns the value of attribute data_transformed_matrix.
105 106 107 |
# File 'lib/transform.rb', line 105 def data_transformed_matrix @data_transformed_matrix end |
#eigenvalue_sums ⇒ Object
Returns the value of attribute eigenvalue_sums.
105 106 107 |
# File 'lib/transform.rb', line 105 def eigenvalue_sums @eigenvalue_sums end |
#eigenvector_matrix ⇒ Object
Returns the value of attribute eigenvector_matrix.
105 106 107 |
# File 'lib/transform.rb', line 105 def eigenvector_matrix @eigenvector_matrix end |
Instance Method Details
#restore ⇒ GSL::Matrix
Restores data in the original feature space (possibly with compression loss).
200 201 202 203 204 205 206 207 208 209 210 211 212 |
# File 'lib/transform.rb', line 200 def restore begin data_matrix_restored = (@eigenvector_matrix * @data_transformed_matrix.transpose).transpose # reverse pca # reverse scaling (0..@cols.size-1).each { |i| data_matrix_restored.col(i)[0..data_matrix_restored.size1-1] += @mean[i] } data_matrix_restored rescue Exception => e LOGGER.debug "#{e.class}: #{e.}" LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end end |
#transform(values) ⇒ GSL::Matrix
Transforms data to feature space found by PCA.
181 182 183 184 185 186 187 188 189 190 191 192 193 194 |
# File 'lib/transform.rb', line 181 def transform values begin vs = values.clone raise "Error! Too few columns for transformation." if vs.size2 < @cols.max data_matrix_scaled = GSL::Matrix.alloc(vs.size1, @cols.size) @cols.each_with_index { |i,j| data_matrix_scaled.col(j)[0..data_matrix_scaled.size1-1] = @autoscaler[j].transform(vs.col(i).to_a) * @autoscaler[j].stdev } (@eigenvector_matrix.transpose * data_matrix_scaled.transpose).transpose rescue Exception => e LOGGER.debug "#{e.class}: #{e.}" LOGGER.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}" end end |