Class: Mspire::Lipid::Search
- Inherits:
-
Object
- Object
- Mspire::Lipid::Search
- Defined in:
- lib/mspire/lipid/search.rb,
lib/mspire/lipid/search/bin.rb,
lib/mspire/lipid/search/hit.rb,
lib/mspire/lipid/search/query.rb,
lib/mspire/lipid/search/db_isobar_group.rb,
lib/mspire/lipid/search/probability_distribution.rb
Defined Under Namespace
Classes: Bin, DBIsobarGroup, Hit, HitGroup, ProbabilityDistribution, Query
Constant Summary collapse
- STANDARD_MODIFICATIONS =
{ :proton => [1,2], :ammonium => [1], :lithium => [1], :water => [1,2], }
- STANDARD_SEARCH =
{ :units => :ppm, :query_min_count_per_bin => 500, # min number of peaks per bin :num_rand_samples_per_bin => 1000, :num_nearest => 2, :return_order => :as_given, # or :sorted }
Instance Attribute Summary collapse
-
#options ⇒ Object
Returns the value of attribute options.
-
#search_function ⇒ Object
Returns the value of attribute search_function.
Class Method Summary collapse
-
.generate_simple_queries(lipids, mods = STANDARD_MODIFICATIONS, gain_and_loss = false) ⇒ Object
will generate PossibleLipid objects and return a new search object uses only one kind of loss at a time and one type of gain at a time will also do the combination of a gain and a loss if gain_and_loss is true.
Instance Method Summary collapse
-
#create_db_isobar_spectrum(ions) ⇒ Object
returns a DB isobar spectrum where the m/z values are all the m/z values to search for and the intensities each an array corresponding to all the lipid ions matching that m/z value.
-
#create_probability_distribution_for_search_bins!(search_bins, db_isobar_spectrum, num_rand_samples_per_bin, use_ppm = true) ⇒ Object
use_ppm uses ppm or amu if false returns the search_bins.
- #create_search_bins(db_isobar_spectrum, min_n_per_bin) ⇒ Object
- #create_search_function(ions, opt) ⇒ Object
-
#initialize(ions = [], opts = {}) ⇒ Search
constructor
ions are Mspire::Lipid::Ion objects each one should give a non-nil m/z value.
- #qvalues!(hit_groups, opts) ⇒ Object
-
#search(search_queries, opts = {}) ⇒ Object
returns an array of HitGroup and a parallel array of BH derived q-values (will switch to Storey soon enough).
Constructor Details
#initialize(ions = [], opts = {}) ⇒ Search
ions are Mspire::Lipid::Ion objects each one should give a non-nil m/z value
57 58 59 60 61 |
# File 'lib/mspire/lipid/search.rb', line 57 def initialize(ions=[], opts={}) @options = STANDARD_SEARCH.merge(opts) @db_isobar_spectrum = (ions) @search_function = create_search_function(ions, @options) end |
Instance Attribute Details
#options ⇒ Object
Returns the value of attribute options.
26 27 28 |
# File 'lib/mspire/lipid/search.rb', line 26 def @options end |
#search_function ⇒ Object
Returns the value of attribute search_function.
27 28 29 |
# File 'lib/mspire/lipid/search.rb', line 27 def search_function @search_function end |
Class Method Details
.generate_simple_queries(lipids, mods = STANDARD_MODIFICATIONS, gain_and_loss = false) ⇒ Object
will generate PossibleLipid objects and return a new search object uses only one kind of loss at a time and one type of gain at a time will also do the combination of a gain and a loss if gain_and_loss is true
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 |
# File 'lib/mspire/lipid/search.rb', line 33 def self.generate_simple_queries(lipids, mods=STANDARD_MODIFICATIONS, gain_and_loss=false) possible_lipids = [] real_mods_and_cnts = mods.map {|name, cnts| [Mspire::Lipid::Modification.new(name), cnts] } # one of each real_mods_and_cnts.each do |mod, counts| counts.each do |cnt| possible_lipids << Mspire::Lipid::Search::Query.new(lipid, Array.new(cnt, mod)) end end if gain_and_loss # one of each gain + one of each loss (gain_mod_cnt_pairs, loss_mod_cnt_pairs) = real_mods_and_cnts.partition {|mod, count| mod.gain } gain_mod_cnt_pairs.each do |mod, cnt| lipids.each do |lipid| #### need to implement still (use combinations or something...) get_this_working! end end end self.new(possible_lipids) end |
Instance Method Details
#create_db_isobar_spectrum(ions) ⇒ Object
returns a DB isobar spectrum where the m/z values are all the m/z values to search for and the intensities each an array corresponding to all the lipid ions matching that m/z value
144 145 146 147 148 149 |
# File 'lib/mspire/lipid/search.rb', line 144 def (ions) mzs = [] ; query_groups = [] pairs = ions.group_by(&:mz).sort_by(&:first) pairs.each {|mz, ar| mzs << mz ; query_groups << ar } Mspire::Spectrum.new([mzs, query_groups]) end |
#create_probability_distribution_for_search_bins!(search_bins, db_isobar_spectrum, num_rand_samples_per_bin, use_ppm = true) ⇒ Object
use_ppm uses ppm or amu if false returns the search_bins
153 154 155 156 157 158 159 160 161 162 163 164 165 166 |
# File 'lib/mspire/lipid/search.rb', line 153 def create_probability_distribution_for_search_bins!(search_bins, , num_rand_samples_per_bin, use_ppm=true) search_bins.each do |search_bin| rng = Random.new random_mzs = num_rand_samples_per_bin.times.map { rng.rand(search_bin.to_range) } # find the deltas diffs = random_mzs.map do |random_mz| nearest_random_mz = .find_nearest(random_mz) delta = (random_mz - nearest_random_mz).abs use_ppm ? delta./(nearest_random_mz).*(1e6) : delta end search_bin.probability_distribution = ProbabilityDistribution.deviations_to_probability_distribution((use_ppm ? :ppm : :amu), diffs) end search_bins end |
#create_search_bins(db_isobar_spectrum, min_n_per_bin) ⇒ Object
168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 |
# File 'lib/mspire/lipid/search.rb', line 168 def create_search_bins(, min_n_per_bin) # make sure we get the right bin size based on the input ss = .mzs.size ; optimal_num_groups = 1 (1..ss).each do |divisions| if (ss.to_f / divisions) >= min_n_per_bin optimal_num_groups = divisions else ; break end end mz_ranges = [] prev = nil groups = .points.in_groups(optimal_num_groups,false).to_a case groups.size when 0 raise 'I think you need some data in your query spectrum!' when 1 group = groups.first [ Mspire::Lipid::Search::Bin.new( Range.new(group.first.first, group.last.first), ) ] else search_bins = groups.each_cons(2).map do |points1, points2| bin = Mspire::Lipid::Search::Bin.new( Range.new(points1.first.first, points2.first.first, true), ) prev = points2 bin end _range = Range.new(prev.first.first, prev.last.first) search_bins << Mspire::Lipid::Search::Bin.new(_range, ) # inclusive end end |
#create_search_function(ions, opt) ⇒ Object
120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 |
# File 'lib/mspire/lipid/search.rb', line 120 def create_search_function(ions, opt) = (ions) search_bins = create_search_bins(, opt[:query_min_count_per_bin]) create_probability_distribution_for_search_bins!(search_bins, , opt[:num_rand_samples_per_bin], opt[:ppm]) # create the actual search function # returns an array of hit_groups lambda do |search_queries, num_nearest_hits| Bin.bin(search_bins, search_queries, &:mz) search_bins_with_data = search_bins.reject {|bin| bin.data.empty? } hit_groups = search_bins_with_data.map {|bin| bin.queries_to_hit_groups!(opt[:num_nearest]) }.flatten(1) end end |
#qvalues!(hit_groups, opts) ⇒ Object
81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
# File 'lib/mspire/lipid/search.rb', line 81 def qvalues!(hit_groups, opts) # from http://stats.stackexchange.com/questions/870/multiple-hypothesis-testing-correction-with-benjamini-hochberg-p-values-or-q-va # but I've already coded this up before, too, in multiple ways... prev_bh_value = 0 num_total_tests = hit_groups.size #hit_groups.each {|hg| p [hg.first.pvalue, hg] } # calculate Q-values BH style for now: # first hit is the best hit in the group pval_hg_index_tuples = hit_groups.each_with_index.map {|hg,i| [hg.pvalue, hg.delta.abs, hg.ppm.abs, i, hg] } if pval_hg_index_tuples.any? {|pair| pair.first.nan? } $stderr.puts "pvalue of NaN!" $stderr.puts ">>> Consider increasing query_min_count_per_bin or setting ppm to false <<<" raise end sorted_pval_index_tuples = pval_hg_index_tuples.sort sorted_pval_index_tuples.each_with_index do |tuple,i| pval = tuple.first bh_value = pval * num_total_tests / (i + 1) # Sometimes this correction can give values greater than 1, # so we set those values at 1 bh_value = [bh_value, 1].min # To preserve monotonicity in the values, we take the # maximum of the previous value or this one, so that we # don't yield a value less than the previous. bh_value = [bh_value, prev_bh_value].max prev_bh_value = bh_value tuple.last.first.qvalue = bh_value # give the top hit the q-value end sorted_pval_index_tuples.map(&:last) end |
#search(search_queries, opts = {}) ⇒ Object
returns an array of HitGroup and a parallel array of BH derived q-values (will switch to Storey soon enough). The HitGroups are returned in the order in which the mz_values are given. assumes search_queries are in ascending m/z order
67 68 69 70 71 72 73 74 75 76 77 78 79 |
# File 'lib/mspire/lipid/search.rb', line 67 def search(search_queries, opts={}) opt = @options.merge( opts ) hit_groups = @search_function.call(search_queries, opt[:num_nearest]) sorted_hit_groups = qvalues!(hit_groups, opt) case opts[:return_order] when :given hit_groups when :sorted sorted_hit_groups else raise ArgumentError, "invalid :return_order" end end |