Class: Mspire::Mzml

Inherits:
Object
  • Object
show all
Includes:
Enumerable, Convenience, Reader
Defined in:
lib/mspire/mzml.rb,
lib/mspire/mzml/cv.rb,
lib/mspire/mzml/run.rb,
lib/mspire/mzml/list.rb,
lib/mspire/mzml/scan.rb,
lib/mspire/mzml/index.rb,
lib/mspire/mzml/plms1.rb,
lib/mspire/mzml/parser.rb,
lib/mspire/mzml/reader.rb,
lib/mspire/mzml/reader.rb,
lib/mspire/mzml/sample.rb,
lib/mspire/mzml/contact.rb,
lib/mspire/mzml/product.rb,
lib/mspire/mzml/io_index.rb,
lib/mspire/mzml/software.rb,
lib/mspire/mzml/spectrum.rb,
lib/mspire/mzml/component.rb,
lib/mspire/mzml/precursor.rb,
lib/mspire/mzml/scan_list.rb,
lib/mspire/mzml/activation.rb,
lib/mspire/mzml/data_array.rb,
lib/mspire/mzml/index_list.rb,
lib/mspire/mzml/scan_window.rb,
lib/mspire/mzml/source_file.rb,
lib/mspire/mzml/chromatogram.rb,
lib/mspire/mzml/file_content.rb,
lib/mspire/mzml/selected_ion.rb,
lib/mspire/mzml/scan_settings.rb,
lib/mspire/mzml/spectrum_list.rb,
lib/mspire/mzml/data_processing.rb,
lib/mspire/mzml/file_description.rb,
lib/mspire/mzml/isolation_window.rb,
lib/mspire/mzml/chromatogram_list.rb,
lib/mspire/mzml/io_indexable_list.rb,
lib/mspire/mzml/processing_method.rb,
lib/mspire/mzml/instrument_configuration.rb,
lib/mspire/mzml/data_array_container_like.rb,
lib/mspire/mzml/referenceable_param_group.rb

Overview

Reading an mzml file:

Mspire::Mzml.open("somefile.mzML") do |mzml|
  mzml.each do |spectrum|
    scan = spectrum.scan
    spectrum.mzs                  # array of m/zs
    spectrum.intensities          # array of intensities
    spectrum.peaks do |mz,intensity|
      puts "mz: #{mz} intensity: #{intensity}" 
    end
  end
end

Note that the mzml object supports random spectrum access (even if the mzml was not indexed):

mzml[22]  # retrieve spectrum at index 22

Writing an mzml file from scratch:

spec1 = Mspire::Mzml::Spectrum.new('scan=1') do |spec|
  spec.describe_many! ['MS:1000127', ['MS:1000511', 1]]
  spec.data_arrays = [[1,2,3], [4,5,6]]
  spec.scan_list = Mspire::Mzml::ScanList.new do |sl|
    scan = Mspire::Mzml::Scan.new do |scan|
      # retention time of 40 seconds
      scan.describe! ['MS:1000016', 40.0, 'UO:0000010']
    end
    sl << scan
  end
end

mzml = Mspire::Mzml.new do |mzml|
  mzml.id = 'the_little_example'
  mzml.cvs = Mspire::Mzml::CV::DEFAULT_CVS
  mzml.file_description = Mspire::Mzml::FileDescription.new  do |fd|
    fd.file_content = Mspire::Mzml::FileContent.new
    fd.source_files << Mspire::Mzml::SourceFile.new
  end
  default_instrument_config = Mspire::Mzml::InstrumentConfiguration.new("IC",[])
  default_instrument_config.describe! 'MS:1000031'
  mzml.instrument_configurations << default_instrument_config
  software = Mspire::Mzml::Software.new
  mzml.software_list << software
  default_data_processing = Mspire::Mzml::DataProcessing.new("did_nothing")
  mzml.data_processing_list << default_data_processing
  mzml.run = Mspire::Mzml::Run.new("little_run", default_instrument_config) do |run|
    spectrum_list = Mspire::Mzml::SpectrumList.new(default_data_processing)
    spectrum_list.push(spec1)
    run.spectrum_list = spectrum_list
  end
end

Defined Under Namespace

Modules: Component, Convenience, DataArrayContainerLike, Default, List, Parser, Reader Classes: Activation, Analyzer, CV, Chromatogram, ChromatogramList, Contact, DataArray, DataProcessing, Detector, FileContent, FileDescription, IOIndex, IOIndexableList, Index, IndexList, InstrumentConfiguration, IsolationWindow, Precursor, ProcessingMethod, Product, ReferenceableParamGroup, Run, Sample, Scan, ScanList, ScanNumbersNotFound, ScanNumbersNotUnique, ScanSettings, ScanWindow, SelectedIon, Software, Source, SourceFile, Spectrum, SpectrumList

Instance Attribute Summary collapse

Attributes included from Reader

#link

Class Method Summary collapse

Instance Method Summary collapse

Methods included from Reader

#get_default_data_processing_ids, #get_header_string, #read_header!, #set_from_xml_io!

Methods included from Convenience

#chromatogram, #each_chromatogram, #each_spectrum, #length, #num_chromatograms, #spectrum, #spectrum_from_scan_num

Methods included from Enumerable

#index_by

Constructor Details

#initialize(arg = nil, &block) ⇒ Mzml

arg must be an IO object for automatic index and header parsing to occur. If arg is a hash, then attributes are set. In addition (or alternatively) a block called that yields self to setup the object.

io must respond_to?(:size), giving the size of the io object in bytes which allows seeking. get_index_list is called to get or create the index list.



155
156
157
158
159
160
161
162
163
164
165
# File 'lib/mspire/mzml.rb', line 155

def initialize(arg=nil, &block)
  %w(cvs software_list instrument_configurations samples data_processing_list).each {|guy| self.send( guy + '=', [] ) }

  case arg
  when IO
    set_from_xml_io!(arg)
  when Hash
    arg.each {|k,v| self.send("#{k}=", v) }
  end
  block.call(self) if block
end

Instance Attribute Details

#accessionObject

(optional) e.g. a PRIDE accession number



104
105
106
# File 'lib/mspire/mzml.rb', line 104

def accession
  @accession
end

#cvsObject

(required) an array of Mspire::Mzml::CV objects



111
112
113
# File 'lib/mspire/mzml.rb', line 111

def cvs
  @cvs
end

#data_processing_listObject

(required) an array of Mspire::Mzml::DataProcessing objects



132
133
134
# File 'lib/mspire/mzml.rb', line 132

def data_processing_list
  @data_processing_list
end

#encodingObject

xml file encoding



145
146
147
# File 'lib/mspire/mzml.rb', line 145

def encoding
  @encoding
end

#file_descriptionObject

(required) an Mspire::Mzml::FileDescription



114
115
116
# File 'lib/mspire/mzml.rb', line 114

def file_description
  @file_description
end

#idObject

(optional) an id for accessing from external files



98
99
100
# File 'lib/mspire/mzml.rb', line 98

def id
  @id
end

#index_listObject

Mspire::Mzml::IndexList object associated with the file (only expected when reading mzml files at the moment)



142
143
144
# File 'lib/mspire/mzml.rb', line 142

def index_list
  @index_list
end

#instrument_configurationsObject

(required) an array of Mspire::Mzml::InstrumentConfiguration objects



129
130
131
# File 'lib/mspire/mzml.rb', line 129

def instrument_configurations
  @instrument_configurations
end

#ioObject

the io object of the mzml file



138
139
140
# File 'lib/mspire/mzml.rb', line 138

def io
  @io
end

#referenceable_param_groupsObject

(optional) an array of CV::ReferenceableParamGroup objects



117
118
119
# File 'lib/mspire/mzml.rb', line 117

def referenceable_param_groups
  @referenceable_param_groups
end

#runObject

(required) an Mspire::Mzml::Run object



135
136
137
# File 'lib/mspire/mzml.rb', line 135

def run
  @run
end

#samplesObject

(optional) an array of Mspire::Mzml::Sample objects



120
121
122
# File 'lib/mspire/mzml.rb', line 120

def samples
  @samples
end

#scan_settings_listObject

(optional) an array of Mspire::Mzml::ScanSettings objects



126
127
128
# File 'lib/mspire/mzml.rb', line 126

def scan_settings_list
  @scan_settings_list
end

#software_listObject

(required) an array of Mspire::Mzml::Software objects



123
124
125
# File 'lib/mspire/mzml.rb', line 123

def software_list
  @software_list
end

#versionObject

(required) the Mzml document version



101
102
103
# File 'lib/mspire/mzml.rb', line 101

def version
  @version
end

Class Method Details

.foreach(filename, &block) ⇒ Object



75
76
77
78
79
80
# File 'lib/mspire/mzml.rb', line 75

def foreach(filename, &block)
  block or return enum_for(__method__, filename)
  open(filename) do |mzml|
    mzml.each(&block)
  end
end

.open(filename, &block) ⇒ Object

read-only right now



69
70
71
72
73
# File 'lib/mspire/mzml.rb', line 69

def open(filename, &block)
  File.open(filename) do |io|
    block.call(self.new(io))
  end
end

Instance Method Details

#to_plms1(use_scan_nums = true) ⇒ Object

will use scan numbers if use_scan_nums is true (typically start with one), otherwise it will use index numbers (starts with zero)



8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
# File 'lib/mspire/mzml/plms1.rb', line 8

def to_plms1(use_scan_nums=true)
  spectrum_index = self.index_list[:spectrum]

  scan_nums = spectrum_index.create_scan_to_index.keys if use_scan_nums

  nums = [] ; rts = [] ; spectra = []

  self.each_with_index do |spec, index|
    next unless spec.ms_level == 1
    nums << (use_scan_nums ? scan_nums[index] : index)
    spectra << spec
    rts << spec.retention_time
  end
  Mspire::Plms1.new(nums, rts, spectra)
end

#to_xml(filename = nil) ⇒ Object

Because mzml files are often very large, we try to avoid storing the entire object tree in memory before writing.

takes a filename and uses builder to write to it if no filename is given, returns a string



220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
# File 'lib/mspire/mzml.rb', line 220

def to_xml(filename=nil)
  # TODO: support indexed mzml files
  io = filename ? File.open(filename, 'w') : StringIO.new
  xml = Builder::XmlMarkup.new(:target => io, :indent => 2)
  xml.instruct!

  mzml_atts = Default::NAMESPACE.dup
  mzml_atts[:version] = @version || Default::VERSION
  mzml_atts[:accession] = @accession if @accession
  mzml_atts[:id] = @id if @id

  xml.mzML(mzml_atts) do |mzml_n|
    # the 'if' statements capture whether or not the list is required or not
    raise "#{self.class}#cvs must have > 0 Mspire::Mzml::CV objects" unless @cvs.size > 0 
    Mspire::Mzml::CV.list_xml(@cvs, mzml_n)
    @file_description.to_xml(mzml_n)
    if @referenceable_param_groups
      Mspire::Mzml::ReferenceableParamGroup.list_xml(@referenceable_param_groups, mzml_n)
    end
    if @samples && @samples.size > 0
      Mspire::Mzml::Sample.list_xml(@samples, mzml_n)
    end
    Mspire::Mzml::Software.list_xml(@software_list, mzml_n)
    if @scan_settings_list && @scan_settings_list.size > 0
      Mspire::Mzml::ScanSettings.list_xml(@scan_settings_list, mzml_n)
    end
    icl = Mspire::Mzml::InstrumentConfiguration.list_xml(@instrument_configurations, mzml_n)
    Mspire::Mzml::DataProcessing.list_xml(@data_processing_list, mzml_n)
    @run.to_xml(mzml_n)
  end
  
  if filename
    io.close 
    self
  else
    io.string
  end
end