Class: MzID::BatchParser

Inherits:
BaseParser show all
Defined in:
lib/mzid/batch_parser.rb

Overview

class to parse an mzIdentML file

Direct Known Subclasses

StreamingParser

Instance Method Summary collapse

Constructor Details

#initialize(file) ⇒ BatchParser

Returns a new instance of BatchParser.



12
13
14
15
16
17
# File 'lib/mzid/batch_parser.rb', line 12

def initialize(file)
  super(file)
  @pep_ev_h = Hash.new
  @db_seq_h = Hash.new
  cache_ids
end

Instance Method Details

#cache_db_seq_entries(root) ⇒ Object

store database sequence entries (ids)



66
67
68
69
70
71
72
73
# File 'lib/mzid/batch_parser.rb', line 66

def cache_db_seq_entries(root)
  dbseq_lst = root.xpath('//DBSequence')
  dbseq_lst.each do |dnode|
    id = dnode["id"]
    acc_id = dnode["accession"]
    @db_seq_h[id] = acc_id
  end
end

#cache_pep_ev(root) ⇒ Object

store peptide evidence sequences in hash for lookup



47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/mzid/batch_parser.rb', line 47

def cache_pep_ev(root)
  pep_ev_lst = root.xpath('//PeptideEvidence')
  pep_ev_lst.each do |pnode|
    id = pnode["id"]
    
    @pep_ev_h[id] = 
      PeptideEvidence.new(:id => pnode["id"],
                          :db_seq_ref => pnode["dBSequence_ref"],
                          :pep_id => pnode["peptide_ref"],
                          :start_pos => pnode["start"].to_i,
                          :end_pos => pnode["end"].to_i,
                          :pre => pnode["pre"],
                          :post => pnode["post"],
                          :prot_id => @db_seq_h[pnode["dBSequence_ref"]].to_sym)
  end
end

#each_psm(use_pbar = nil) ⇒ Object

iterate through each psm



77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
# File 'lib/mzid/batch_parser.rb', line 77

def each_psm(use_pbar=nil)
  hit_values = File.open(@mzid_file) do |io|
    doc = Nokogiri::XML.parse(io, nil, nil, Nokogiri::XML::ParseOptions::DEFAULT_XML | Nokogiri::XML::ParseOptions::NOBLANKS | Nokogiri::XML::ParseOptions::STRICT)
    doc.remove_namespaces!
    root = doc.root
    # get list of identifications
    spec_results = root.xpath('//SpectrumIdentificationResult')
    pbar = ProgressBar.new("PSMs", spec_results.size) if use_pbar
    spec_results.each do |sres|
      # 
      psms_of_spec = sres.xpath('.//SpectrumIdentificationItem')
      # go over each PSM from the spectra
      psms_of_spec.each do |psm_node|
        # get peptide evidence list
        pep_ev_raw_lst = psm_node.xpath('.//PeptideEvidenceRef')
        pep_ev_lst = pep_ev_raw_lst.map do |penode|
          pep_ev_ref_id = penode["peptideEvidence_ref"]
          @pep_ev_h[pep_ev_ref_id]
        end 
        # get cvparams
        cvlst = psm_node.xpath('.//cvParam')
        # find spectral prob
        tmp_lst = cvlst.select{|v| v['name'] == "MS-GF:SpecEValue"}
        spec_prob = tmp_lst[0]['value']
        # get peptide
        pep_seq = @pep_h[psm_node['peptide_ref']]
        # get spectrum id/ref number
        spec_id = psm_node['id']
        spec_num = spec_id.split("_")[1].to_i
        spec_ref = spec_id.split("_")[-1].to_i
        # store in object
        psm = PSM.new(:spec_num => spec_num, 
                      :spec_ref => spec_ref, 
                      :pep => pep_seq, 
                      :spec_prob => spec_prob.to_f,
                      :mods => (@mod_h.has_key?(psm_node['peptide_ref']) ? @mod_h[psm_node['peptide_ref']] : nil),
                      :pep_ev => pep_ev_lst
                      )
        # yield psm object
        yield psm
      end
      pbar.inc if use_pbar
    end
    pbar.finish if use_pbar
  end
end

#each_spectrum(use_pbar = nil) {|spec_lst| ... } ⇒ Object

for each spectrum, return a list of PSM objects for that spectrum

Yields:

  • (spec_lst)


126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'lib/mzid/batch_parser.rb', line 126

def each_spectrum(use_pbar=nil)
  spec_lst = []
  self.each_psm(use_pbar) do |psm|
    if spec_lst.empty? then
      spec_lst.push(psm) 
    else
      if spec_lst[-1].get_spec_num == psm.get_spec_num then
        spec_lst.push(psm)
      else # found new spec num, yield psm list
        yield spec_lst
        spec_lst = [psm] # add new to list
      end
    end
  end
  yield spec_lst
end