Module: Ms::Ident
- Defined in:
- lib/ms/ident.rb
Constant Summary collapse
- IPI_RE =
/IPI:([\w\d\.]+)\|/
- GI_RE =
/gi|([\w\d\.]+)\|/
- STANDARD_AA =
the twenty standard amino acids
%w(A C D E F G H I K L M N P Q R S T V W Y)
- DEFAULT_PEPTIDE_CENTRIC_DB =
{:missed_cleavages => 1, :min_length => 8, :enzyme => Ms::InSilico::Digester::TRYPSIN, :id_regexp => nil, :remove_digestion_file => true, :cleave_initiator_methionine => true, :expand_aa => {'X' => STANDARD_AA}}
Class Method Summary collapse
-
.expand_peptides(peptide, expand_aa) ⇒ Object
does combinatorial expansion of all letters requesting it.
-
.peptide_centric_db(fasta_file, opts = {}) ⇒ Object
writes a new file with the added ‘min_aaseq<Integer>’ creates a temporary digestion file that contains all peptides digesting with certain missed_cleavages (i.e., min_seq_length is not applied to this file but on the final peptide centric db).
Class Method Details
.expand_peptides(peptide, expand_aa) ⇒ Object
does combinatorial expansion of all letters requesting it. expand_aa is hash like: ‘X’=>STANDARD_AA
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 |
# File 'lib/ms/ident.rb', line 105 def self.(peptide, ) letters_in_order = .keys.sort index_and_key = [] peptide.split('').each_with_index do |char,i| if let_index = letters_in_order.index(char) index_and_key << [i, letters_in_order[let_index]] end end = [peptide] index_and_key.each do |i,letter| new_peps = [] while current_pep = .shift do new_peps << [letter].map {|v| dp = current_pep.dup ; dp[i] = v ; dp } end = new_peps.flatten end end |
.peptide_centric_db(fasta_file, opts = {}) ⇒ Object
writes a new file with the added ‘min_aaseq<Integer>’ creates a temporary digestion file that contains all peptides digesting with certain missed_cleavages (i.e., min_seq_length is not applied to this file but on the final peptide centric db)
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
# File 'lib/ms/ident.rb', line 19 def self.peptide_centric_db(fasta_file, opts={}) opts = DEFAULT_PEPTIDE_CENTRIC_DB.merge(opts) (missed_cleavages, min_length, enzyme, id_regexp, remove_digestion_file, cleave_initiator_methionine, ) = opts.values_at(:missed_cleavages, :min_length, :enzyme, :id_regexp, :remove_digestion_file, :cleave_initiator_methionine, :expand_aa) unless id_regexp id_regexp = Ms::Fasta.id_regexp(Ms::Fasta.filetype(fasta_file)) raise RuntimeError, "fasta file type not recognized, supply id_regexp" unless id_regexp end start_time = Time.now print "Digesting #{fasta_file} ..." if $VERBOSE if = Regexp.new("[" << Regexp.escape(.keys.join) << "]") end base = fasta_file.chomp(File.extname(fasta_file)) digestion_file = base + ".msd_clvg#{missed_cleavages}.peptides" File.open(digestion_file, "w") do |fh| Ms::Fasta.open(fasta_file) do |fasta| fasta.each do |prot| peptides = enzyme.digest(prot.sequence, missed_cleavages) if (cleave_initiator_methionine && (prot.sequence[0,1] == "M")) m_peps = [] init_methionine_peps = [] peptides.each do |pep| # if the peptide is at the beginning of the protein sequence if prot.sequence[0,pep.size] == pep m_peps << pep[1..-1] end end peptides.push(*m_peps) end if peptides = peptides.map do |pep| if pep =~ (pep, ) else pep end end.flatten end fh.puts( prot.header.split(/\s+/).first + "\t" + peptides.join(" ") ) end end end puts "#{Time.now - start_time} sec" if $VERBOSE start_time = Time.now print "Organizing raw digestion #{digestion_file} ..." if $VERBOSE hash = Hash.new {|h,k| h[k] = [] } IO.foreach(digestion_file) do |line| (prot, *peps) = line.chomp!.split(/\s+/) id = prot.match(id_regexp)[1] peps.each do |pep| if pep.size >= min_length hash[pep] << id end end end puts "#{Time.now - start_time} sec" if $VERBOSE base = digestion_file.chomp(File.extname(digestion_file)) final_outfile = base + ".min_aaseq#{min_length}" + ".yml" start_time = Time.now print "Writing results to #{} ..." if $VERBOSE File.open(final_outfile, 'w') do |out| hash.each do |k,v| out.puts( "#{k}: #{v.join('-')}" ) end end puts "#{Time.now - start_time} sec" if $VERBOSE if remove_digestion_file File.unlink(digestion_file) end end |