Class: Ms::Msrun

Inherits:
Object
  • Object
show all
Includes:
Enumerable, Search
Defined in:
lib/ms/msrun.rb,
lib/ms/msrun/sha1.rb,
lib/ms/msrun/index.rb,
lib/ms/msrun/plms1.rb,
lib/ms/msrun/search.rb,
lib/ms/msrun/nokogiri.rb,
lib/ms/msrun/axml/mzxml.rb,
lib/ms/msrun/regexp/mzxml.rb

Defined Under Namespace

Modules: Axml, Nokogiri, Regexp, Search, Sha1 Classes: Index, Plms1

Constant Summary collapse

DEFAULT_PARSER =

DEFAULT_PARSER = ‘axml’ DEFAULT_PARSER = ‘regexp’

'nokogiri'
Mzxml_regexp =
/http:\/\/sashimi.sourceforge.net\/schema(_revision)?\/([\w\d_\.]+)/o
Mzdata_regexp =
/<mzData.*version="([\d\.]+)"/m
Raw_header_unpack_code =
'@2axaxaxaxaxaxaxa'
Mzml_regexp =
/http:\/\/psidev.info\/files\/ms\/mzML\/xsd\/mzML([\w\d_\.]+)_idx.xsd/o

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Search

convert, #get_vals, #mgf_header, #ms2_header, #set_opts, #to_mgf, #to_ms2, #to_search

Constructor Details

#initialize(io, filename = nil) ⇒ Msrun

takes an io object. The preferred way to access Msrun objects is through the open method since it ensures that the io object will be available for the lazy evaluation of spectra.



63
64
65
66
67
68
69
70
71
72
73
# File 'lib/ms/msrun.rb', line 63

def initialize(io, filename=nil)
  @scan_counts = nil
  @filename = filename
  @filetype, @version = Ms::Msrun.filetype_and_version(io)
  parser_klass = Ms::Msrun.get_parser(@filetype, @version)

  @parser = parser_klass.new(self, io, @version)
  @index = Ms::Msrun::Index.new(io)
  @scan_nums = @index.scan_nums
  @parser.parse_header(@index.header_length)
end

Instance Attribute Details

#end_timeObject

the retention time in seconds of the last scan (regardless of any meta-data written in the header)



21
22
23
# File 'lib/ms/msrun.rb', line 21

def end_time
  @end_time
end

#filenameObject

the string passed in to open the file for reading



30
31
32
# File 'lib/ms/msrun.rb', line 30

def filename
  @filename
end

#filetypeObject

The filetype. Valid types (for parsing) are:

:mzxml
:mzdata
:mzml


27
28
29
# File 'lib/ms/msrun.rb', line 27

def filetype
  @filetype
end

#indexObject

an array of doublets, [start_byte, length] for each scan element



47
48
49
# File 'lib/ms/msrun.rb', line 47

def index
  @index
end

#parent_basenameObject

The basename of the parent file listed (e.g., a .RAW file). Note that in v1 mzXML this will be *.mzXML while in later versions it’s *.RAW. See parent_basename_noext for more robust value



40
41
42
# File 'lib/ms/msrun.rb', line 40

def parent_basename
  @parent_basename
end

#parent_locationObject

The location of the parent file (e.g., a .RAW file). In version mzXML v1 this will be nil.



44
45
46
# File 'lib/ms/msrun.rb', line 44

def parent_location
  @parent_location
end

#parserObject

holds the class that parses the file



50
51
52
# File 'lib/ms/msrun.rb', line 50

def parser
  @parser
end

#scan_count(mslevel = 0) ⇒ Object



143
144
145
146
147
148
149
150
151
152
153
# File 'lib/ms/msrun.rb', line 143

def scan_count(mslevel=0)
  if @scan_counts
    @scan_counts[mslevel]
  else
    if mslevel == 0
      @scan_count 
    else
      scan_counts[mslevel]
    end
  end
end

#scan_numsObject

an array holding the scan numbers found in the run



53
54
55
# File 'lib/ms/msrun.rb', line 53

def scan_nums
  @scan_nums
end

#start_timeObject

the retention time in seconds of the first scan (regardless of any meta-data written in the header)



18
19
20
# File 'lib/ms/msrun.rb', line 18

def start_time
  @start_time
end

#versionObject

The version string of this type of file



33
34
35
# File 'lib/ms/msrun.rb', line 33

def version
  @version
end

Class Method Details

.add_parent_scan(scans, add_intensities = false) ⇒ Object

only adds the parent if one is not already present!



209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
# File 'lib/ms/msrun.rb', line 209

def self.add_parent_scan(scans, add_intensities=false)
  prev_scan = nil
  parent_stack = [nil]
  ## we want to set the level to be the first mslevel we come to
  prev_level = scans.first.ms_level
  scans.each do |scan|
    #next unless scan  ## the first one is nil, (others?)
    level = scan.ms_level
    if prev_level < level
      parent_stack.unshift prev_scan
    end
    if prev_level > level
      (prev_level - level).times do parent_stack.shift end
    end
    if scan.ms_level > 1
      precursor = scan.precursor
      #precursor.parent = parent_stack.first  # that's the next line's
      precursor[2] = parent_stack.first unless precursor[2]
      #precursor.intensity
      if add_intensities
        precursor[1] = precursor[2].spectrum.intensity_at_mz(precursor[0])
      end
    end
    prev_level = level
    prev_scan = scan
  end
end

.filetype_and_version(file_or_io) ⇒ Object



245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
# File 'lib/ms/msrun.rb', line 245

def self.filetype_and_version(file_or_io)
  if file_or_io.is_a? IO
    io = file_or_io
    found = nil
    io.rewind
    # Test for RAW file:
    header = io.read(18).unpack(Raw_header_unpack_code).join
    if header == 'Finnigan'
      return [:raw, nil]
    end
    io.rewind
    while (line = io.gets)
      found = 
        case line
        when Mzml_regexp
          [:mzml, $1.dup]
        when Mzxml_regexp
          mtch = $2.dup
          case mtch
          when /mzXML_([\d\.]+)/
            [:mzxml, $1.dup]
          when /MsXML/
            [:mzxml, '1.0']
          else
            abort "Cannot determine mzXML version!"
          end
        when Mzdata_regexp
          [:mzdata, $1.dup]
        end
      if found
        break
      end
    end
    io.rewind
    found
  else
    File.open(file_or_io) do |_io|
      filetype_and_version(_io)
    end
  end
end

.foreach(filename, parse_opts = {}, &block) ⇒ Object

opens the file and yields each scan in the block see each_scan for parsing options



105
106
107
108
109
# File 'lib/ms/msrun.rb', line 105

def self.foreach(filename, parse_opts={}, &block)
  self.open(filename) do |obj|
    obj.each_scan(parse_opts, &block)
  end
end

.get_parser(filetype, version) ⇒ Object



197
198
199
200
201
202
203
204
205
206
# File 'lib/ms/msrun.rb', line 197

def self.get_parser(filetype, version)
  require "ms/msrun/#{DEFAULT_PARSER}/#{filetype}"
  parser_class = filetype.to_s.capitalize
  base_class = Ms::Msrun.const_get( DEFAULT_PARSER.capitalize )
  if base_class.const_defined? parser_class
    base_class.const_get parser_class
  else
    raise RuntimeError, "no class #{base_class}::#{parser_class}"
  end
end

.open(filename, &block) ⇒ Object

Opens the filename



56
57
58
# File 'lib/ms/msrun.rb', line 56

def self.open(filename, &block)
  File.open(filename) {|io| block.call( self.new(io, filename) ) }
end

Instance Method Details

#each_scan(parse_opts = {}, &block) ⇒ Object Also known as: each

returns each scan options:

:spectrum => true | false (default is true)
:precursor => true | false (default is true)
:ms_level => Integer or Array return only scans of that level
:reverse => true | false (default is false) goes backwards


85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/ms/msrun.rb', line 85

def each_scan(parse_opts={}, &block)
  ms_levels = 
    if msl = parse_opts[:ms_level]
      if msl.is_a?(Integer) ; [msl]
      else ; msl  
      end
    end
  snums = @index.scan_nums
  snums = snums.reverse if parse_opts[:reverse]
  snums.each do |scan_num|
    if ms_levels
      next unless ms_levels.include?(ms_level(scan_num))
    end
    block.call(scan(scan_num, parse_opts))
  end
end

#first(opts = {}) ⇒ Object



183
184
185
186
187
188
189
190
# File 'lib/ms/msrun.rb', line 183

def first(opts={})
  the_first = nil
  each_scan(opts) do |scan|
    the_first = scan
    break
  end
  the_first
end

#last(opts = {}) ⇒ Object



192
193
194
195
# File 'lib/ms/msrun.rb', line 192

def last(opts={})
  opts[:reverse] = true
  first(opts)
end

#ms_level(num) ⇒ Object

a very fast method to only query the ms_level of a scan



112
113
114
# File 'lib/ms/msrun.rb', line 112

def ms_level(num)
  @parser.parse_ms_level(@index[num].first, @index[num].last)
end

#parent_basename_noextObject



75
76
77
# File 'lib/ms/msrun.rb', line 75

def parent_basename_noext
  @parent_basename.chomp(File.extname(@parent_basename))
end

#scan(num, parse_opts = {}) ⇒ Object

returns a Ms::Scan object for the scan at that number



118
119
120
121
# File 'lib/ms/msrun.rb', line 118

def scan(num, parse_opts={})
  #@parser.parse_scan(*(@index[num]), parse_opts)
  @parser.parse_scan(@index[num].first, @index[num].last, parse_opts)
end

#scan_countsObject

returns an array, whose indices provide the number of scans in each index level the ms_levels, [0] = all the scans, [1] = mslevel 1, [2] = mslevel 2, …



128
129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'lib/ms/msrun.rb', line 128

def scan_counts
  return @scan_counts if @scan_counts
  ar = []
  ar[0] = 0
  each_scan do |sc|
    level = sc.ms_level
    unless ar[level]
      ar[level] = 0
    end
    ar[level] += 1
    ar[0] += 1
  end
  @scan_counts = ar
end

#start_and_end_mzObject

returns [start_mz, end_mz] for ms level 1 scans or [nil,nil] if unknown



157
158
159
160
# File 'lib/ms/msrun.rb', line 157

def start_and_end_mz
  scan = first(:ms_level => 1, :spectrum => false, :precursor => false)
  [scan.start_mz, scan.end_mz]
end

#start_and_end_mz_brute_forceObject

goes through every scan and gets the first and last m/z, then returns the max.ceil and min.floor



164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
# File 'lib/ms/msrun.rb', line 164

def start_and_end_mz_brute_force
  first_scan = first(:ms_level => 1, :precursor => false)
  first_mzs = first_scan.spectrum.mzs

  lo_mz = first_mzs[0]
  hi_mz = first_mzs[-1]

  each_scan(:ms_level => 1, :precursor => false) do |sc|
    mz_ar = sc.spectrum.mzs
    if mz_ar.last > hi_mz
      hi_mz = mz_ar.last
    end
    if mz_ar.last < lo_mz
      lo_mz = mz_ar.last
    end
  end
  [lo_mz.floor, hi_mz.ceil]
end

#to_plms1(scans = nil) ⇒ Object

if given scans, will use those, or optionally takes a block where an array of ms1 scans are yielded and it expects Enumerable scans back.



10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
# File 'lib/ms/msrun/plms1.rb', line 10

def to_plms1(scans=nil)
  times = []
  scan_numbers = []
  spectra = []

  unless scans
    scans = []
    self.each(:ms_level => 1, :precursor => false) do |scan|
      scans << scan
    end
  end

  if block_given?
    scans = yield(scans)
  end

  scans.each do |scan|
    times << scan.time
    scan_numbers << scan.num
    spec = scan.spectrum
    spectra << [spec.mzs.to_a, spec.intensities.to_a]
  end
  plms1 = Plms1.new
  plms1.times = times
  plms1.scan_numbers = scan_numbers
  plms1.spectra = spectra
  plms1
end