Class: MzID::ParserSax

Inherits:
Object
  • Object
show all
Defined in:
lib/mzid/parser_sax.rb

Overview

class to parse an mzIdentML file (.mzid) in a memory efficient manner. can parse large files that a DOM parser will fail on, e.g., most mzid parsers. The caveat is that it must be written to a csv file.

Defined Under Namespace

Classes: CounterHandler, DBSequenceHandler, PeptideEventHandler, PeptideHandler, SpectraIDHandler

Instance Method Summary collapse

Constructor Details

#initialize(file, use_pbar = nil, tda_flag = true) ⇒ ParserSax

Returns a new instance of ParserSax.



203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
# File 'lib/mzid/parser_sax.rb', line 203

def initialize(file, use_pbar = nil, tda_flag = true)
  @use_pbar = use_pbar
  @mzid_file = file
  @tda_flag = tda_flag
  #
  # get counts
  if @use_pbar then
    count_handler = CounterHandler.new
    File.open(@mzid_file){|f| Ox.sax_parse(count_handler, f)}     
    @num_spec = count_handler.spec_count
  end 
  
  #puts "DBSeq:\t#{count_handler.dbseq_count}"
  #puts "Peptides:\t#{count_handler.pep_count}"
  #puts "PepEv:\t#{count_handler.pepev_count}"
  #puts "Spectra:\t#{count_handler.spec_count}"
  #
  # cache DBSequence elements
  dbseq_handler = DBSequenceHandler.new(@use_pbar.nil? ? nil : count_handler.dbseq_count)
  File.open(@mzid_file){|f| Ox.sax_parse(dbseq_handler, f)}
  dbseq_handler.pbar.finish if !dbseq_handler.pbar.nil?
  @dbseq_h = dbseq_handler.dbseq_h
  #      
  # cache Peptide elements
  pep_handler = PeptideHandler.new(@use_pbar.nil? ? nil : count_handler.pep_count)
  File.open(@mzid_file){|f| Ox.sax_parse(pep_handler, f)}
  pep_handler.pbar.finish if !pep_handler.pbar.nil?
  @pep_h = pep_handler.pep_h
  @mod_h = pep_handler.mod_h
  #
  # create/cache PeptideEvent elements
  pep_ev_handler = PeptideEventHandler.new(@dbseq_h, @use_pbar.nil? ? nil : count_handler.pepev_count)
  File.open(@mzid_file){|f| Ox.sax_parse(pep_ev_handler, f)}
  pep_ev_handler.pbar.finish if !pep_ev_handler.pbar.nil?
  @pep_ev_h = pep_ev_handler.pep_ev_h
  
end

Instance Method Details

#write_to_csv(outfile = "result.csv", show_mods = true) ⇒ Object

write output to specified csv file



243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
# File 'lib/mzid/parser_sax.rb', line 243

def write_to_csv(outfile="result.csv", show_mods=true)
  CSV.open(outfile, "w", {:col_sep => "\t"}) do |csv|
    headerAry = ["#spec_num", "peptide", "spec_prob", "decoy", "prot_ids", "start", "end", "num_prot"]
    headerAry.push("mods") if show_mods
    headerAry.delete("decoy") if !@tda_flag
    csv << headerAry
    
    proc = Proc.new do |spec_h|
      # peptide reference/seq
      pep_ref = spec_h[:peptide_ref].to_sym
      pep_seq = @pep_h[pep_ref]
      mods = @mod_h[pep_ref]
      # peptide evidence list
      pep_ev_ref_lst = spec_h[:peptideEvidence_ref]
      # number of proteins with matching peptide
      num_prot = pep_ev_ref_lst.size
      # for each PeptideEvidence entry ...
      pep_ev_ref_lst.each do |pep_ev_ref|
        pep_ev = @pep_ev_h[pep_ev_ref]
        # start/end pos within protein
        start_pos = pep_ev.get_start_pos
        end_pos = pep_ev.get_end_pos
        # get protein ID
        prot_id = pep_ev.get_prot_id
        # get decoy flag
        is_decoy = pep_ev.get_is_decoy
        # write to file
        ary = [spec_h[:id], pep_seq, spec_h[:spec_prob], is_decoy, prot_id, start_pos, end_pos, num_prot]
        ary.delete_at(3) if !@tda_flag
        if show_mods then
          modstr = if !mods.nil? then
                     mods.keys.map{|loc| val = mods[loc].to_i; [loc, val > 0 ? "+#{val}" : "-#{val}"].join(";")}.join("|")
                   else
                     nil
                   end
          ary.push(modstr)
        end
        csv << ary
      end 
      
    end
    spec_handler = SpectraIDHandler.new(@dbseq_h, @pep_h, @pep_ev_h, proc, @use_pbar.nil? ? nil : @num_spec)
    File.open(@mzid_file){|f| Ox.sax_parse(spec_handler, f)}
    spec_handler.pbar.finish if !spec_handler.pbar.nil?
  end
end