Module: Ms::ErrorRate::Qvalue::Mascot::Percolator
- Defined in:
- lib/ms/error_rate/qvalue/mascot/percolator.rb,
lib/ms/error_rate/qvalue/mascot/percolator.rb
Class Method Summary collapse
-
.qvalues(datp_files, tab_txt_files, opts = {}) ⇒ Object
returns an array of Structs of PeptideHit(:filename, :query_title, :charge, :sequence, :mowse, :qvalue) opts = :min_peptide_length => Integer.
-
.tab_txt(file) ⇒ Object
returns an array of Structs where the keys are the first line everything is cast properly three additional keys are available query_num, rank, sequence sequence is the amino acid sequence without the surrounding X’s and dots.
Class Method Details
.qvalues(datp_files, tab_txt_files, opts = {}) ⇒ Object
returns an array of Structs of PeptideHit(:filename, :query_title, :charge, :sequence, :mowse, :qvalue) opts =
:min_peptide_length => Integer
49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 |
# File 'lib/ms/error_rate/qvalue/mascot/percolator.rb', line 49 def qvalues(datp_files, tab_txt_files, opts={}) min_pep_len = opts[:min_peptide_length] # we only want the top hit per query title (which should ensure that we # get the top hit per scan) hits_by_query_title = Hash.new {|h,k| h[k] = [] } datp_files.zip(tab_txt_files) do |datp_file, tab_txt_file| # build a hash based on the sequence structs = Ms::ErrorRate::Qvalue::Mascot::Percolator.tab_txt( tab_txt_file ) qvalue_by_query_rank = {} structs.each do |struct| qvalue_by_query_rank[[struct.query_num, struct.rank]] = struct.q_value end base_no_ext = File.basename(datp_file, '.*') Ms::Mascot::Dat.open(datp_file) do |dat| dat.each_peptide_hit(:by => :groups, :yield_nil => false, :with_query => true) do |hits,query| hits.each do |hit| if qval = qvalue_by_query_rank[[hit.query_num, hit.hit_num]] hit_as_struct = Ms::ErrorRate::Qvalue::Mascot::MascotPeptideHit.new(base_no_ext, query.title, query.charge, hit.sequence, hit.score, qval) hits_by_query_title[hit_as_struct.query_title] << hit_as_struct end end end end end final_hits = [] hits_by_query_title.each do |title, hits| best_hit = if hits.size == 1 hits.first else hits.sort_by(&:mowse).last end # FILTER HERE: # ONLY TAKE the BEST HIT IF it passes any filters if min_pep_len next unless best_hit.sequence.size >= min_pep_len end final_hits << best_hit end final_hits end |
.tab_txt(file) ⇒ Object
returns an array of Structs where the keys are the first line everything is cast properly three additional keys are available query_num, rank, sequence sequence is the amino acid sequence without the surrounding X’s and dots. (with ‘-’ substituted for ‘_’)
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/ms/error_rate/qvalue/mascot/percolator.rb', line 19 def tab_txt(file) hits = [] File.open(file) do |io| # PSMId score q-value posterior_error_prob peptide proteinIds atts = io.gets.chomp.split("\t").map {|v| v.gsub('-', '_').to_sym } atts.push(:query_num, :rank, :sequence) struct_class = Struct.new("Hit", *atts) io.each do |line| (query_rank, score, qvalue, perrp, peptide, *prots ) = line.chomp.split("\t") (query, rank) = query_rank.split(';').map {|v| v.split(':').last.to_i } hits << struct_class.new(query_rank, score.to_f, qvalue.to_f, perrp.to_f, peptide, prots, query, rank, peptide.split('.')[1]) end end hits end |