ms-ident
Working with mass spectrometry based peptide/protein identifications. Includes support for building pepxml files.
Planned support for mzIdentML and reading pepxml.
Synposis
Generating a pepxml file
This example shows a very block oriented way of constructing a pepxml. Objects or empty data structures are passed into blocks for subcategories to use. Because there are a lot of attributes to manage, most objects accept upon initialization or later with ‘merge!’
pepxml = Pepxml.new do |msms_pipeline_analysis|
msms_pipeline_analysis.merge!(:summary_xml => "020.xml") do |msms_run_summary|
# prep the sample enzyme and search_summary
msms_run_summary.merge!(
:base_name => '/home/jtprince/dev/mspire/020',
:ms_manufacturer => 'Thermo',
:ms_model => 'LTQ Orbitrap',
:ms_ionization => 'ESI',
:ms_mass_analyzer => 'Ion Trap',
:ms_detector => 'UNKNOWN'
) do |sample_enzyme, search_summary, spectrum_queries|
sample_enzyme.merge!(:name=>'Trypsin',:cut=>'KR',:no_cut=>'P',:sense=>'C')
search_summary.merge!(
:base_name=>'/path/to/file/020',
:search_engine => 'SEQUEST',
:precursor_mass_type =>'monoisotopic',
:fragment_mass_type => 'average'
) do |search_database, enzymatic_search_constraint, modifications, parameters|
search_database.merge!(:local_path => '/path/to/db.fasta', :seq_type => 'AA') # note seq_type == type
enzymatic_search_constraint.merge!(
:enzyme => 'Trypsin',
:max_num_internal_cleavages => 2,
:min_number_termini => 2
)
modifications << Pepxml::AminoacidModification.new(
:aminoacid => 'M', :massdiff => 15.9994, :mass => Ms::Mass::AA::MONO['M']+15.9994,
:variable => 'Y', :symbol => '*')
# invented, for example, a protein terminating mod
modifications << Pepxml::TerminalModification.new(
:terminus => 'c', :massdiff => 23.3333, :mass => Ms::Mass::MONO['oh'] + 23.3333,
:variable => 'Y', :symbol => '[', :protein_terminus => 'c',
:description => 'leave protein_terminus off if not protein mod'
)
modifications << Pepxml::TerminalModification.new(
:terminus => 'c', :massdiff => 25.42322, :mass => Ms::Mass::MONO['h+'] + 25.42322,
:variable => 'N', :symbol => ']', :description => 'example: c term mod'
)
parameters.merge!(
:fragment_ion_tolerance => 1.0000,
:digest_mass_range => '600.0 3500.0',
:enzyme_info => 'Trypsin(KR/P) 1 1 KR P', # etc....
)
end
spectrum_query1 = Pepxml::SpectrumQuery.new(
:spectrum => '020.3.3.1', :start_scan => 3, :end_scan => 3,
:precursor_neutral_mass => 1120.93743421875, :assumed_charge => 1
) do |search_results|
search_result1 = Pepxml::SearchResult.new do |search_hits|
modpositions = [[1, 243.1559], [6, 167.0581], [7,181.085]].map do |pair|
Pepxml::SearchHit::ModificationInfo::ModAminoacidMass.new(*pair)
end
# order(modified_peptide, mod_aminoacid_masses, :mod_nterm_mass, :mod_cterm_mass)
# or can be set by hash
mod_info = Pepxml::SearchHit::ModificationInfo.new('Y#RLGGS#T#K', modpositions)
search_hit1 = Pepxml::SearchHit.new(
:hit_rank=>1, :peptide=>'YRLGGSTK', :peptide_prev_aa => "R", :peptide_next_aa => "K",
:protein => "gi|16130113|ref|NP_416680.1|", :num_tot_proteins => 1, :num_matched_ions => 5,
:tot_num_ions => 35, :calc_neutral_pep_mass => 1120.93163442, :massdiff => 0.00579979875010395,
:num_tol_term => 2, :num_missed_cleavages => 1, :is_rejected => 0,
:modification_info => mod_info) do |search_scores|
search_scores.merge!(:xcorr => 0.12346, :deltacn => 0.7959, :deltacnstar => 0,
:spscore => 29.85, :sprank => 1)
end
search_hits << search_hit1
end
search_results << search_result1
end
spectrum_queries << spectrum_query1
end
end
end
puts pepxml.to_xml
The block is optional in initalization or with merge! You can just as easily set the needed attributes directly.
msms_run_summary.new(:search_summary => my_search_summary, :spectrum_queries => spec_queries)
msms_run_summary.sample_enzyme = sample_enzyme_object
Copyright
see LICENSE