Class: MzID::ParserSax
- Inherits:
-
Object
- Object
- MzID::ParserSax
- Defined in:
- lib/mzid/parser_sax.rb
Overview
class to parse an mzIdentML file (.mzid) in a memory efficient manner. can parse large files that a DOM parser will fail on, e.g., most mzid parsers. The caveat is that it must be written to a csv file.
Defined Under Namespace
Classes: CounterHandler, DBSequenceHandler, PeptideEventHandler, PeptideHandler, SpectraIDHandler
Instance Method Summary collapse
-
#initialize(file, use_pbar = nil, tda_flag = true) ⇒ ParserSax
constructor
A new instance of ParserSax.
-
#write_to_csv(outfile = "result.csv", show_mods = true) ⇒ Object
write output to specified csv file.
Constructor Details
#initialize(file, use_pbar = nil, tda_flag = true) ⇒ ParserSax
Returns a new instance of ParserSax.
203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 |
# File 'lib/mzid/parser_sax.rb', line 203 def initialize(file, = nil, tda_flag = true) @use_pbar = @mzid_file = file @tda_flag = tda_flag # # get counts if @use_pbar then count_handler = CounterHandler.new File.open(@mzid_file){|f| Ox.sax_parse(count_handler, f)} @num_spec = count_handler.spec_count end #puts "DBSeq:\t#{count_handler.dbseq_count}" #puts "Peptides:\t#{count_handler.pep_count}" #puts "PepEv:\t#{count_handler.pepev_count}" #puts "Spectra:\t#{count_handler.spec_count}" # # cache DBSequence elements dbseq_handler = DBSequenceHandler.new(@use_pbar.nil? ? nil : count_handler.dbseq_count) File.open(@mzid_file){|f| Ox.sax_parse(dbseq_handler, f)} dbseq_handler..finish if !dbseq_handler..nil? @dbseq_h = dbseq_handler.dbseq_h # # cache Peptide elements pep_handler = PeptideHandler.new(@use_pbar.nil? ? nil : count_handler.pep_count) File.open(@mzid_file){|f| Ox.sax_parse(pep_handler, f)} pep_handler..finish if !pep_handler..nil? @pep_h = pep_handler.pep_h @mod_h = pep_handler.mod_h # # create/cache PeptideEvent elements pep_ev_handler = PeptideEventHandler.new(@dbseq_h, @use_pbar.nil? ? nil : count_handler.pepev_count) File.open(@mzid_file){|f| Ox.sax_parse(pep_ev_handler, f)} pep_ev_handler..finish if !pep_ev_handler..nil? @pep_ev_h = pep_ev_handler.pep_ev_h end |
Instance Method Details
#write_to_csv(outfile = "result.csv", show_mods = true) ⇒ Object
write output to specified csv file
243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 |
# File 'lib/mzid/parser_sax.rb', line 243 def write_to_csv(outfile="result.csv", show_mods=true) CSV.open(outfile, "w", {:col_sep => "\t"}) do |csv| headerAry = ["#spec_num", "peptide", "spec_prob", "decoy", "prot_ids", "start", "end", "num_prot"] headerAry.push("mods") if show_mods headerAry.delete("decoy") if !@tda_flag csv << headerAry proc = Proc.new do |spec_h| # peptide reference/seq pep_ref = spec_h[:peptide_ref].to_sym pep_seq = @pep_h[pep_ref] mods = @mod_h[pep_ref] # peptide evidence list pep_ev_ref_lst = spec_h[:peptideEvidence_ref] # number of proteins with matching peptide num_prot = pep_ev_ref_lst.size # for each PeptideEvidence entry ... pep_ev_ref_lst.each do |pep_ev_ref| pep_ev = @pep_ev_h[pep_ev_ref] # start/end pos within protein start_pos = pep_ev.get_start_pos end_pos = pep_ev.get_end_pos # get protein ID prot_id = pep_ev.get_prot_id # get decoy flag is_decoy = pep_ev.get_is_decoy # write to file ary = [spec_h[:id], pep_seq, spec_h[:spec_prob], is_decoy, prot_id, start_pos, end_pos, num_prot] ary.delete_at(3) if !@tda_flag if show_mods then modstr = if !mods.nil? then mods.keys.map{|loc| val = mods[loc].to_i; [loc, val > 0 ? "+#{val}" : "-#{val}"].join(";")}.join("|") else nil end ary.push(modstr) end csv << ary end end spec_handler = SpectraIDHandler.new(@dbseq_h, @pep_h, @pep_ev_h, proc, @use_pbar.nil? ? nil : @num_spec) File.open(@mzid_file){|f| Ox.sax_parse(spec_handler, f)} spec_handler..finish if !spec_handler..nil? end end |