Class: MzID::BatchParser
- Inherits:
-
BaseParser
- Object
- BaseParser
- MzID::BatchParser
- Defined in:
- lib/mzid/batch_parser.rb
Overview
class to parse an mzIdentML file
Direct Known Subclasses
Instance Method Summary collapse
-
#cache_db_seq_entries(root) ⇒ Object
store database sequence entries (ids).
-
#cache_pep_ev(root) ⇒ Object
store peptide evidence sequences in hash for lookup.
-
#each_psm(use_pbar = nil) ⇒ Object
iterate through each psm.
-
#each_spectrum(use_pbar = nil) {|spec_lst| ... } ⇒ Object
for each spectrum, return a list of PSM objects for that spectrum.
-
#initialize(file) ⇒ BatchParser
constructor
A new instance of BatchParser.
Constructor Details
#initialize(file) ⇒ BatchParser
Returns a new instance of BatchParser.
12 13 14 15 16 17 |
# File 'lib/mzid/batch_parser.rb', line 12 def initialize(file) super(file) @pep_ev_h = Hash.new @db_seq_h = Hash.new cache_ids end |
Instance Method Details
#cache_db_seq_entries(root) ⇒ Object
store database sequence entries (ids)
66 67 68 69 70 71 72 73 |
# File 'lib/mzid/batch_parser.rb', line 66 def cache_db_seq_entries(root) dbseq_lst = root.xpath('//DBSequence') dbseq_lst.each do |dnode| id = dnode["id"] acc_id = dnode["accession"] @db_seq_h[id] = acc_id end end |
#cache_pep_ev(root) ⇒ Object
store peptide evidence sequences in hash for lookup
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
# File 'lib/mzid/batch_parser.rb', line 47 def cache_pep_ev(root) pep_ev_lst = root.xpath('//PeptideEvidence') pep_ev_lst.each do |pnode| id = pnode["id"] @pep_ev_h[id] = PeptideEvidence.new(:id => pnode["id"], :db_seq_ref => pnode["dBSequence_ref"], :pep_id => pnode["peptide_ref"], :start_pos => pnode["start"].to_i, :end_pos => pnode["end"].to_i, :pre => pnode["pre"], :post => pnode["post"], :prot_id => @db_seq_h[pnode["dBSequence_ref"]].to_sym) end end |
#each_psm(use_pbar = nil) ⇒ Object
iterate through each psm
77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
# File 'lib/mzid/batch_parser.rb', line 77 def each_psm(=nil) hit_values = File.open(@mzid_file) do |io| doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS | Nokogiri::XML::ParseOptions::STRICT) doc.remove_namespaces! root = doc.root # get list of identifications spec_results = root.xpath('//SpectrumIdentificationResult') = ProgressBar.new("PSMs", spec_results.size) if spec_results.each do |sres| # psms_of_spec = sres.xpath('.//SpectrumIdentificationItem') # go over each PSM from the spectra psms_of_spec.each do |psm_node| # get peptide evidence list pep_ev_raw_lst = psm_node.xpath('.//PeptideEvidenceRef') pep_ev_lst = pep_ev_raw_lst.map do |penode| pep_ev_ref_id = penode["peptideEvidence_ref"] @pep_ev_h[pep_ev_ref_id] end # get cvparams cvlst = psm_node.xpath('.//cvParam') # find spectral prob tmp_lst = cvlst.select{|v| v['name'] == "MS-GF:SpecEValue"} spec_prob = tmp_lst[0]['value'] # get peptide pep_seq = @pep_h[psm_node['peptide_ref']] # get spectrum id/ref number spec_id = psm_node['id'] spec_num = spec_id.split("_")[1].to_i spec_ref = spec_id.split("_")[-1].to_i # store in object psm = PSM.new(:spec_num => spec_num, :spec_ref => spec_ref, :pep => pep_seq, :spec_prob => spec_prob.to_f, :mods => (@mod_h.has_key?(psm_node['peptide_ref']) ? @mod_h[psm_node['peptide_ref']] : nil), :pep_ev => pep_ev_lst ) # yield psm object yield psm end .inc if end .finish if end end |
#each_spectrum(use_pbar = nil) {|spec_lst| ... } ⇒ Object
for each spectrum, return a list of PSM objects for that spectrum
126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 |
# File 'lib/mzid/batch_parser.rb', line 126 def each_spectrum(=nil) spec_lst = [] self.each_psm() do |psm| if spec_lst.empty? then spec_lst.push(psm) else if spec_lst[-1].get_spec_num == psm.get_spec_num then spec_lst.push(psm) else # found new spec num, yield psm list yield spec_lst spec_lst = [psm] # add new to list end end end yield spec_lst end |