Class: ProtXML
- Inherits:
-
Object
- Object
- ProtXML
- Defined in:
- lib/protk/protxml.rb
Instance Attribute Summary collapse
-
#groups ⇒ Object
Returns the value of attribute groups.
Instance Method Summary collapse
-
#as_rows(threshold_probability) ⇒ Object
Convert the entire prot.xml document to row format Returns an array of arrays.
- #find_pep_xml ⇒ Object
- #indistinguishable_proteins_from_protein(protein_element) ⇒ Object
- #init_groups ⇒ Object
-
#initialize(file_name) ⇒ ProtXML
constructor
A new instance of ProtXML.
- #peptide_sequences_from_protein(prot) ⇒ Object
- #peptides_from_protein(protein_element) ⇒ Object
- #protein_to_row(prot) ⇒ Object
- #proteins_from_group(group_element) ⇒ Object
Constructor Details
#initialize(file_name) ⇒ ProtXML
Returns a new instance of ProtXML.
58 59 60 61 |
# File 'lib/protk/protxml.rb', line 58 def initialize(file_name) @doc=REXML::Document.new(File.new(file_name)) @groups=self.init_groups end |
Instance Attribute Details
#groups ⇒ Object
Returns the value of attribute groups.
7 8 9 |
# File 'lib/protk/protxml.rb', line 7 def groups @groups end |
Instance Method Details
#as_rows(threshold_probability) ⇒ Object
Convert the entire prot.xml document to row format Returns an array of arrays. Each of the sub-arrays is a row. Each row should contain a simple summary of the protein. A separate row should be provided for every protein (including indistinguishable ones) The first row will be the header
Proteins with probabilities below a threshold are excluded
111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
# File 'lib/protk/protxml.rb', line 111 def as_rows(threshold_probability) rows=[] rows.push(["Accession","Probability","Indistinguishable Proteins","Num Peptides","Peptides"]) proteins=[] @groups.each do |grp| grp[:proteins].each {|prot| if ( prot[:probability].to_f >= threshold_probability) proteins.push(prot) end } end proteins.each do |prot| protein_row=protein_to_row(prot) rows.push(protein_row) indistinguishables=prot[:indistinguishable_prots] indistinguishables.each do |indist| indist_row=protein_row.clone indist_row[0]=indist rows.push(indist_row) end end rows end |
#find_pep_xml ⇒ Object
63 64 65 66 |
# File 'lib/protk/protxml.rb', line 63 def find_pep_xml() header = REXML::XPath.first(@doc, "//protein_summary_header") source_file = header.attributes["source_files"] end |
#indistinguishable_proteins_from_protein(protein_element) ⇒ Object
10 11 12 13 14 15 16 17 18 |
# File 'lib/protk/protxml.rb', line 10 def indistinguishable_proteins_from_protein(protein_element) iprots=[] REXML::XPath.each(protein_element,"./indistinguishable_protein") do |ipel| ipel_attributes={} ipel.attributes.each_attribute { |att| ipel_attributes[att..to_sym]=att.value } iprots.push(ipel_attributes[:protein_name]) end iprots end |
#init_groups ⇒ Object
46 47 48 49 50 51 52 53 54 55 |
# File 'lib/protk/protxml.rb', line 46 def init_groups @groups=[] REXML::XPath.each(@doc.root,"//protein_group") do |gel| group={} group[:group_probability]=gel.attributes["probability"].to_f group[:proteins]=proteins_from_group(gel) groups.push group end @groups end |
#peptide_sequences_from_protein(prot) ⇒ Object
68 69 70 71 72 73 74 75 76 77 78 79 80 |
# File 'lib/protk/protxml.rb', line 68 def peptide_sequences_from_protein(prot) peptides=prot[:peptides] sequences=[] peptides.each do |pep| if ( pep[:modifications].length > 0 ) pep[:modifications].each {|pmod| sequences.push(pmod) } else sequences.push(pep[:peptide_sequence]) end end sequences end |
#peptides_from_protein(protein_element) ⇒ Object
20 21 22 23 24 25 26 27 28 29 30 31 32 |
# File 'lib/protk/protxml.rb', line 20 def peptides_from_protein(protein_element) peptides=[] REXML::XPath.each(protein_element,"./peptide") do |pel| peptide={} pel.attributes.each_attribute { |att| peptide[att..to_sym]=att.value } modifications=pel.get_elements("./modification_info") mods=modifications.collect {|mp| mp.attribute("modified_peptide").value } peptide[:modifications] = mods peptides.push(peptide) end peptides end |
#protein_to_row(prot) ⇒ Object
82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
# File 'lib/protk/protxml.rb', line 82 def protein_to_row(prot) protein_row=[] protein_row.push(prot[:protein_name]) protein_row.push(prot[:probability]) indistinct=prot[:indistinguishable_prots] indist_string="#{prot[:protein_name]};" indistinct.each { |pr| indist_string<<"#{pr};"} indist_string.chop! protein_row.push(indist_string) protein_row.push(prot[:peptides].length) peptide_string="" peptide_sequences_from_protein(prot).each {|pep| peptide_string<<"#{pep};" } peptide_string.chop! protein_row.push(peptide_string) protein_row end |
#proteins_from_group(group_element) ⇒ Object
34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/protk/protxml.rb', line 34 def proteins_from_group(group_element) proteins=[] REXML::XPath.each(group_element,"./protein") do |pel| protein={} pel.attributes.each_attribute { |att| protein[att..to_sym]=att.value } protein[:peptides]=peptides_from_protein(pel) protein[:indistinguishable_prots]=indistinguishable_proteins_from_protein(pel) proteins.push(protein) end proteins end |