Class: Mspire::Peaklist

Inherits:
Array
  • Object
show all
Defined in:
lib/mspire/peaklist.rb

Overview

a collection of Peak objects. At a minimum, each peak should respond to :x and :y

Constant Summary collapse

DEFAULT_MERGE =
{
  :bin_width => 5,
  :bin_unit => :ppm,
  :normalize => true,
  :return_data => false,
  :split => :share,
  :centroided => true,
}

Class Method Summary collapse

Instance Method Summary collapse

Methods inherited from Array

#in_groups

Class Method Details

.[](*peaks) ⇒ Object

creates a new Mspire::Peaklist and coerces each peak into an Mspire::Peak. If your peaks already behave like peaks you should use .new



44
45
46
# File 'lib/mspire/peaklist.rb', line 44

def [](*peaks)
  self.new( peaks.map {|peak| Mspire::Peak.new(peak) } )
end

.create_bins(peaklists, opts) ⇒ Object



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/mspire/peaklist.rb', line 48

def create_bins(peaklists, opts)
  min, max = min_max_x(peaklists)

  divisions = []
  bin_width = opts[:bin_width]
  use_ppm = (opts[:bin_unit] == :ppm)

  puts "using bin width: #{bin_width}" if $VERBOSE
  puts "using ppm for bins: #{use_ppm}" if $VERBOSE

  current_x = min
  loop do
    if current_x >= max
      divisions << max
      break
    else
      divisions << current_x
      current_x += ( use_ppm ? current_x./(1e6).*(bin_width) : bin_width )
    end
  end
  # make each bin exclusive so there is no overlap
  bins = divisions.each_cons(2).map {|pair| Mspire::Bin.new(*pair, true) }
  # make the last bin *inclusive* of the terminating value
  bins[-1] = Mspire::Bin.new(bins.last.begin, bins.last.end)
  bins
end

.merge(peaklists, opts = {}) ⇒ Object

returns a new peaklist which has been merged with the others. opts) and then segment according to monotonicity (sharing intensity between abutting points). The final m/z is the weighted averaged of all the m/z’s in each peak. Valid opts (with default listed first):

:bin_width => 5 
:bin_unit => :ppm|:amu        interpret bin_width as ppm or amu
:bins => array of Mspire::Bin objects  for custom bins (overides other bin options)
:normalize => true              if true, divides total intensity by 
                                number of spectra
:return_data => false           returns a parallel array containing
                                the peaks associated with each returned peak
:only_data => false           only returns the binned peaks
:split => :zero|:greedy_y|:share  see Mspire::Peak#split
:centroided => true             treat the data as centroided

The binning algorithm is roughly the fastest possible algorithm that would allow for arbitrary, non-constant bin widths (a ratcheting algorithm O(n + m))

Assumes the peaklists are already sorted by m/z.

Note that the peaks themselves will be altered if using the :share split method.



172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# File 'lib/mspire/peaklist.rb', line 172

def merge(peaklists, opts={})
  opts = DEFAULT_MERGE.merge(opts)

  (peaklist, returned_data) =  
    if opts[:centroided]
      merge_centroids(peaklists, opts.dup)
    else
      raise NotImplementedError, "need to implement profile merging"
    end

  if opts[:only_data]
    returned_data
  elsif opts[:return_data]
    [peaklist, returned_data]
  else
    peaklist
  end
end

.merge_and_deconvolve(peaklists, opts = {}) ⇒ Object

takes an array of peaklists, merges them together (like merge), then returns an array of doublets. Each doublet is an m/z and an array of values (parallel to input) of intensities]

If you are already using tagged peak objects, then you should set have_tagged_peaks to false (default is true). The peaks will be deconvolved based on the sample_id in this case.

:have_tagged_peaks => false|true


201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
# File 'lib/mspire/peaklist.rb', line 201

def merge_and_deconvolve(peaklists, opts={})
  unless htp=opts.delete(:have_tagged_peaks)
    peaklists.each_with_index do |peaklist,i|
      peaklist.map! do |peak|
        TaggedPeak.new(peak, i)
      end
    end
  end

  # make sure we get the data back out
  opts[:return_data] = true
  opts[:only_data] = false

  sample_ids = peaklists.map {|pl| pl.first.sample_id }
  peaks, data = merge(peaklists, opts)
  data.zip(peaks).map do |bucket_of_peaks, meta_peak|
    signal_by_sample_id = Hash.new {|h,k| h[k] = 0.0 }
    bucket_of_peaks.each do |peak|
      signal_by_sample_id[peak.sample_id] += peak.y
    end
    [meta_peak.x, sample_ids.map {|sample_id| signal_by_sample_id[sample_id] } ]
  end
end

.merge_centroids(peaklists, opts = {}) ⇒ Object



86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/mspire/peaklist.rb', line 86

def merge_centroids(peaklists, opts={})
  opts[:return_data] = true if opts[:only_data]

  # Create Mspire::Bin objects
  bins = opts[:bins] ? opts[:bins] : create_bins(peaklists, opts)
  puts "created #{bins.size} bins" if $VERBOSE

  peaklists.each do |peaklist|
    Mspire::Bin.bin(bins, peaklist, &:x)
  end

  pseudo_peaks = bins.map do |bin|
    Mspire::Peak.new( [bin, bin.data.reduce(0.0) {|sum,peak| sum + peak.y }] )
  end

  pseudo_peaklist = Mspire::Peaklist.new(pseudo_peaks)

  separate_peaklists = pseudo_peaklist.split(opts[:split])

  normalize_factor = opts[:normalize] ? peaklists.size : 1

  return_data = []
  final_peaklist = Mspire::Peaklist.new unless opts[:only_data]

  separate_peaklists.each do |pseudo_peaklist|
    data_peaklist = Mspire::Peaklist.new
    weight_x = 0.0
    tot_intensity = pseudo_peaklist.inject(0.0) {|sum, bin_peak| sum + bin_peak.y }
    #puts "TOT INTENSITY:"
    #p tot_intensity
    calc_from_lil_bins = pseudo_peaklist.inject(0.0) {|sum, bin_peak| sum + bin_peak.x.data.map(&:y).reduce(:+) }
    #puts "LILBINS: "
    #p calc_from_lil_bins
    pseudo_peaklist.each do |bin_peak|

      # For the :share method, the psuedo_peak intensity may have been
      # adjusted, but the individual peaks were not.  Correct this.             
      if opts[:split] == :share
        post_scaled_y = bin_peak.y
        pre_scaled_y = bin_peak.x.data.reduce(0.0) {|sum,peak| sum + peak.last }
        #puts "PRESCALED Y:"
        #p pre_scaled_y
        if (post_scaled_y - pre_scaled_y).abs.round(10) != 0.0
          correction = post_scaled_y / pre_scaled_y
          bin_peak.x.data.each {|peak| peak.y = (peak.y * correction) }
        end
      end

      unless opts[:only_data]
        bin_peak.x.data.each do |peak|
          weight_x += peak.x * ( peak.y.to_f / tot_intensity)
        end
      end
      (data_peaklist.push( *bin_peak.x.data )) if opts[:return_data]
    end
    final_peaklist << Mspire::Peak.new([weight_x, tot_intensity / normalize_factor]) unless opts[:only_data]
    return_data << data_peaklist if opts[:return_data]
  end
  [final_peaklist, return_data]
end

.min_max_x(peaklists) ⇒ Object



75
76
77
78
79
80
81
82
83
84
# File 'lib/mspire/peaklist.rb', line 75

def min_max_x(peaklists)
  # find the min and max across all spectra
  first_peaklist = peaklists.first
  min = first_peaklist.first.x; max = first_peaklist.last.x
  peaklists.each do |peaklist|
    min = peaklist.lo_x if peaklist.lo_x < min
    max = peaklist.hi_x if peaklist.hi_x > max
  end
  [min, max]
end

Instance Method Details

#hi_xObject



13
14
15
# File 'lib/mspire/peaklist.rb', line 13

def hi_x
  self.last[0]
end

#lo_xObject



9
10
11
# File 'lib/mspire/peaklist.rb', line 9

def lo_x
  self.first[0]
end

#peak_boundaries(gt = 0.0) ⇒ Object

returns an array with the indices outlining each peak. The first index is the start of the peak, the last index is the last of the peak. Interior indices represent local minima. So, peaks that have only two indices have no local minima.



231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
# File 'lib/mspire/peaklist.rb', line 231

def peak_boundaries(gt=0.0)
  in_peak = false
  prev_y = gt
  prev_prev_y = gt
  peak_inds = []
  self.each_with_index do |peak, index|
    curr_y = peak.y
    if curr_y > gt
      if !in_peak
        in_peak = true
        peak_inds << [index]
      else
        # if on_upslope
        if prev_y < curr_y
          # If we were previously on a downslope and we are now on an upslope
          # then the previous index is a local min
          # on_downslope(prev_previous_y, prev_y)
          if prev_prev_y > prev_y
            # We have found a local min
            peak_inds.last << (index - 1)
          end
        end # end if (upslope)
      end # end if !in_peak
    elsif in_peak
      peak_inds.last << (index - 1)
      in_peak = false
    end 
    prev_prev_y = prev_y
    prev_y = curr_y
  end
  # add the last one to the last peak if it is a boundary
  if self[-1].y > gt
    peak_inds.last << (self.size-1)
  end
  peak_inds
end

#split(split_multipeaks_mthd = :zero) ⇒ Object

returns an Array of peaklist objects. Splits run of 1 or more local minima into multiple peaklists. When a point is ‘shared’ between two adjacent hill-ish areas, the choice of how to resolve multi-hills (runs of data above zero) is one of:

:zero = only split on zeros
:share = give each peak its rightful portion of shared peaks, dividing the
          intensity based on the intensity of adjacent peaks
:greedy_y = give the point to the peak with highest point next to
             the point in question. tie goes lower.

Note that the peak surrounding a local_minima may be altered if using :share

assumes that a new peak can be made with an array containing the x value and the y value.



337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
# File 'lib/mspire/peaklist.rb', line 337

def split(split_multipeaks_mthd=:zero)
  if split_multipeaks_mthd == :zero
    split_on_zeros
  else
    boundaries = peak_boundaries(0.0)
    no_lm_pklsts = []
    boundaries.each do |indices|
      peak = self[indices.first..indices.last]
      if indices.size == 2
        no_lm_pklsts << peak
      else # have local minima
        multipeak = Peaklist.new(peak)
        local_min_inds = indices[1..-2].map {|i| i-indices.first}
        peaklists = multipeak.split_contiguous(split_multipeaks_mthd, local_min_inds)
        no_lm_pklsts.push *peaklists
      end
    end
    #$stderr.puts "now #{no_lm_pklsts.size} peaks." if $VERBOSE
    no_lm_pklsts 
  end 
end

#split_contiguous(methd = :greedy_y, local_min_indices = nil) ⇒ Object

returns an array of Peaklist objects assumes that this is one connected list of peaks (i.e., no zeros/whitespace on the edges or internally)

  /\       
 /  \/\
/      \

if there are no local minima, just returns self inside the array



285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
# File 'lib/mspire/peaklist.rb', line 285

def split_contiguous(methd=:greedy_y, local_min_indices=nil)
  local_min_indices ||= ((pb=peak_boundaries.first) && pb.shift && pb.pop && pb)

  if local_min_indices.size == 0
    self
  else
    peak_class = first.class
    prev_lm_i = 0  # <- don't worry, will be set to bumped to zero
    peaklists = [ self.class.new([self[0]]) ]
    local_min_indices.each do |lm_i|
      peaklists.last.push( *self[(prev_lm_i+1)..(lm_i-1)] )
      case methd
      when :greedy_y
        if self[lm_i-1].y >= self[lm_i+1].y
          peaklists.last << self[lm_i]
          peaklists << self.class.new
        else
          peaklists << self.class.new( [self[lm_i]] )
        end
      when :share
        # for each local min, two new peaks will be created, with
        # intensity shared between adjacent peaklists
        lm = self[lm_i]
        sum = self[lm_i-1].y + self[lm_i+1].y
        # push onto the last peaklist its portion of the local min
        peaklists.last << peak_class.new( [lm.x, lm.y * (self[lm_i-1].y.to_f/sum)] )
        # create a new peaklist that contains its portion of the local min
        peaklists << self.class.new( [peak_class.new([lm.x, lm.y * (self[lm_i+1].y.to_f/sum)])] )
      end
      prev_lm_i = lm_i
    end
    peaklists.last.push(*self[(prev_lm_i+1)...(self.size)] )
    peaklists
  end
end

#split_on_zeros(given_peak_boundaries = nil) ⇒ Object

returns an array of Peaklist objects



269
270
271
272
273
274
# File 'lib/mspire/peaklist.rb', line 269

def split_on_zeros(given_peak_boundaries=nil)
  pk_bounds = given_peak_boundaries || peak_boundaries(0.0)
  pk_bounds.map do |indices|
    self.class.new self[indices.first..indices.last]
  end
end

#weighted_xObject

for spectral peaks, this is the weighted m/z



27
28
29
30
31
32
33
34
35
36
# File 'lib/mspire/peaklist.rb', line 27

def weighted_x
  tot_intensity = self.inject(0.0) {|sum,peak| sum + peak.y }
  _weighted_x = 0.0
  self.each do |peak|
    int = peak.y
    signal_by_sample_index[peak.sample_id] += int
    _weighted_x += (peak.first * (int/tot_intensity))
  end
  _weighted_x
end