Class: MS::Lipid::Search

Inherits:
Object
  • Object
show all
Defined in:
lib/ms/lipid/search.rb,
lib/ms/lipid/search/bin.rb,
lib/ms/lipid/search/hit.rb,
lib/ms/lipid/search/query.rb,
lib/ms/lipid/search/db_isobar_group.rb,
lib/ms/lipid/search/probability_distribution.rb

Defined Under Namespace

Classes: Bin, DBIsobarGroup, Hit, HitGroup, ProbabilityDistribution, Query

Constant Summary collapse

STANDARD_MODIFICATIONS =
{
  :proton => [1,2],
  :ammonium => [1],
  :lithium => [1],
  :water => [1,2],
}
STANDARD_SEARCH =
{
  :units => :ppm,
  :query_min_count_per_bin => 500,  # min number of peaks per bin
  :num_rand_samples_per_bin => 1000,
  :num_nearest => 2,
  :return_order => :as_given,  # or :sorted 
}

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(ions = [], opts = {}) ⇒ Search

ions are MS::Lipid::Ion objects each one should give a non-nil m/z value



57
58
59
60
61
# File 'lib/ms/lipid/search.rb', line 57

def initialize(ions=[], opts={})
  @options = STANDARD_SEARCH.merge(opts)
  @db_isobar_spectrum = create_db_isobar_spectrum(ions)
  @search_function = create_search_function(ions, @options)
end

Instance Attribute Details

#optionsObject

Returns the value of attribute options.



26
27
28
# File 'lib/ms/lipid/search.rb', line 26

def options
  @options
end

#search_functionObject

Returns the value of attribute search_function.



27
28
29
# File 'lib/ms/lipid/search.rb', line 27

def search_function
  @search_function
end

Class Method Details

.generate_simple_queries(lipids, mods = STANDARD_MODIFICATIONS, gain_and_loss = false) ⇒ Object

will generate PossibleLipid objects and return a new search object uses only one kind of loss at a time and one type of gain at a time will also do the combination of a gain and a loss if gain_and_loss is true



33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/ms/lipid/search.rb', line 33

def self.generate_simple_queries(lipids, mods=STANDARD_MODIFICATIONS, gain_and_loss=false)
  possible_lipids = []
  real_mods_and_cnts = mods.map {|name, cnts| [MS::Lipid::Modification.new(name), cnts] }
  # one of each
  real_mods_and_cnts.each do |mod, counts|
    counts.each do |cnt|
      possible_lipids << MS::Lipid::Search::Query.new(lipid, Array.new(cnt, mod))
    end
  end
  if gain_and_loss
    # one of each gain + one of each loss
    (gain_mod_cnt_pairs, loss_mod_cnt_pairs) = real_mods_and_cnts.partition {|mod, count| mod.gain }
    gain_mod_cnt_pairs.each do |mod, cnt|
      lipids.each do |lipid|
        #### need to implement still (use combinations or something...)
        get_this_working!
      end
    end
  end
  self.new(possible_lipids)
end

Instance Method Details

#create_db_isobar_spectrum(ions) ⇒ Object

returns a DB isobar spectrum where the m/z values are all the m/z values to search for and the intensities each an array corresponding to all the lipid ions matching that m/z value



142
143
144
145
146
147
# File 'lib/ms/lipid/search.rb', line 142

def create_db_isobar_spectrum(ions)
  mzs = [] ; query_groups = []
  pairs = ions.group_by(&:mz).sort_by(&:first)
  pairs.each {|mz, ar| mzs << mz ; query_groups << ar }
  MS::Spectrum.new([mzs, query_groups])
end

#create_probability_distribution_for_search_bins!(search_bins, db_isobar_spectrum, num_rand_samples_per_bin, use_ppm = true) ⇒ Object

use_ppm uses ppm or amu if false returns the search_bins



151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/ms/lipid/search.rb', line 151

def create_probability_distribution_for_search_bins!(search_bins, db_isobar_spectrum, num_rand_samples_per_bin, use_ppm=true)
  search_bins.each do |search_bin| 
    rng = Random.new
    random_mzs = num_rand_samples_per_bin.times.map { rng.rand(search_bin.to_range)  }
    # find the deltas
    diffs = random_mzs.map do |random_mz| 
      nearest_random_mz = db_isobar_spectrum.find_nearest(random_mz)
      delta = (random_mz - nearest_random_mz).abs
      use_ppm ? delta./(nearest_random_mz).*(1e6) : delta
    end
    search_bin.probability_distribution = ProbabilityDistribution.deviations_to_probability_distribution((use_ppm ? :ppm : :amu), diffs)
  end
  search_bins
end

#create_search_bins(db_isobar_spectrum, min_n_per_bin) ⇒ Object



166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
# File 'lib/ms/lipid/search.rb', line 166

def create_search_bins(db_isobar_spectrum, min_n_per_bin)
  # make sure we get the right bin size based on the input
  ss = db_isobar_spectrum.mzs.size ; optimal_num_groups = 1
  (1..ss).each do |divisions|
    if  (ss.to_f / divisions) >= min_n_per_bin
      optimal_num_groups = divisions
    else ; break
    end
  end

  mz_ranges = []
  prev = nil

  groups = db_isobar_spectrum.points.in_groups(optimal_num_groups,false).to_a

  case groups.size
  when 0
    raise 'I think you need some data in your query spectrum!'
  when 1
    group = groups.first
    [ MS::Lipid::Search::Bin.new( Range.new(group.first.first, group.last.first), db_isobar_spectrum ) ]
  else
    search_bins = groups.each_cons(2).map do |points1, points2|
      bin = MS::Lipid::Search::Bin.new( Range.new(points1.first.first, points2.first.first, true), db_isobar_spectrum )
      prev = points2
      bin
    end
    _range = Range.new(prev.first.first, prev.last.first)
    search_bins << MS::Lipid::Search::Bin.new(_range, db_isobar_spectrum) # inclusive
  end
end

#create_search_function(ions, opt) ⇒ Object



118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# File 'lib/ms/lipid/search.rb', line 118

def create_search_function(ions, opt)

  db_isobar_spectrum = create_db_isobar_spectrum(ions)

  search_bins = create_search_bins(db_isobar_spectrum, opt[:query_min_count_per_bin])

  create_probability_distribution_for_search_bins!(search_bins, db_isobar_spectrum, opt[:num_rand_samples_per_bin], opt[:ppm])

  # create the actual search function
  # returns an array of hit_groups
  lambda do |search_queries, num_nearest_hits|
    Bin.bin(search_bins, search_queries, &:mz)
    search_bins_with_data = search_bins.reject {|bin| bin.data.empty? }
    hit_groups = search_bins_with_data.map {|bin| bin.queries_to_hit_groups!(opt[:num_nearest]) }.flatten(1)
  end
end

#qvalues!(hit_groups, opts) ⇒ Object



79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/ms/lipid/search.rb', line 79

def qvalues!(hit_groups, opts)

  # from http://stats.stackexchange.com/questions/870/multiple-hypothesis-testing-correction-with-benjamini-hochberg-p-values-or-q-va
  # but I've already coded this up before, too, in multiple ways...
  prev_bh_value = 0
  num_total_tests = hit_groups.size

  #hit_groups.each {|hg| p [hg.first.pvalue, hg] }

  # calculate Q-values BH style for now:
  # first hit is the best hit in the group
  pval_hg_index_tuples = hit_groups.each_with_index.map {|hg,i| [hg.pvalue, hg.delta.abs, hg.ppm.abs, i, hg] }

  if pval_hg_index_tuples.any? {|pair| pair.first.nan? }
    $stderr.puts "pvalue of NaN!"
    $stderr.puts ">>> Consider increasing query_min_count_per_bin or setting ppm to false <<<"
    raise
  end

  sorted_pval_index_tuples = pval_hg_index_tuples.sort

  sorted_pval_index_tuples.each_with_index do |tuple,i|
    pval = tuple.first
    bh_value = pval * num_total_tests / (i + 1)
    # Sometimes this correction can give values greater than 1,
    # so we set those values at 1
    bh_value = [bh_value, 1].min

    # To preserve monotonicity in the values, we take the
    # maximum of the previous value or this one, so that we
    # don't yield a value less than the previous.
    bh_value = [bh_value, prev_bh_value].max
    prev_bh_value = bh_value
    tuple.last.first.qvalue = bh_value # give the top hit the q-value
  end

  sorted_pval_index_tuples.map(&:last)
end

#search(search_queries, opts = {}) ⇒ Object

returns an array of HitGroup and a parallel array of BH derived q-values (will switch to Storey soon enough). The HitGroups are returned in the order in which the mz_values are given. assumes search_queries are in ascending m/z order



67
68
69
70
71
72
73
74
75
76
77
# File 'lib/ms/lipid/search.rb', line 67

def search(search_queries, opts={})
  opt = @options.merge( opts )
  hit_groups = @search_function.call(search_queries, opt[:num_nearest])
  sorted_hit_groups = qvalues!(hit_groups, opt)
  case opts[:return_order]
  when :as_given
    hit_groups
  when :sorted
    sorted_hit_groups
  end
end