Class: ProtXML

Inherits:
Object
  • Object
show all
Defined in:
lib/protk/protxml.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(file_name) ⇒ ProtXML

Returns a new instance of ProtXML.



58
59
60
61
# File 'lib/protk/protxml.rb', line 58

def initialize(file_name)
  @doc=REXML::Document.new(File.new(file_name))
  @groups=self.init_groups
end

Instance Attribute Details

#groupsObject

Returns the value of attribute groups.



7
8
9
# File 'lib/protk/protxml.rb', line 7

def groups
  @groups
end

Instance Method Details

#as_rows(threshold_probability) ⇒ Object

Convert the entire prot.xml document to row format Returns an array of arrays. Each of the sub-arrays is a row. Each row should contain a simple summary of the protein. A separate row should be provided for every protein (including indistinguishable ones) The first row will be the header

Proteins with probabilities below a threshold are excluded



111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/protk/protxml.rb', line 111

def as_rows(threshold_probability)
  
  rows=[]
  rows.push(["Accession","Probability","Indistinguishable Proteins","Num Peptides","Peptides"])
  
  proteins=[]
  @groups.each do |grp|
    grp[:proteins].each {|prot| 
      if ( prot[:probability].to_f >= threshold_probability)
        proteins.push(prot)
      end
    }
  end
  
  proteins.each do |prot|
    protein_row=protein_to_row(prot)
    rows.push(protein_row)
    
    indistinguishables=prot[:indistinguishable_prots]
    indistinguishables.each do |indist|
      indist_row=protein_row.clone
      indist_row[0]=indist
      rows.push(indist_row)
    end
    
  end
  
  rows
end

#find_pep_xmlObject



63
64
65
66
# File 'lib/protk/protxml.rb', line 63

def find_pep_xml()
  header = REXML::XPath.first(@doc, "//protein_summary_header")
  source_file = header.attributes["source_files"]
end

#indistinguishable_proteins_from_protein(protein_element) ⇒ Object



10
11
12
13
14
15
16
17
18
# File 'lib/protk/protxml.rb', line 10

def indistinguishable_proteins_from_protein(protein_element)
  iprots=[]
  REXML::XPath.each(protein_element,"./indistinguishable_protein") do |ipel|
    ipel_attributes={}
    ipel.attributes.each_attribute { |att| ipel_attributes[att.expanded_name.to_sym]=att.value }
    iprots.push(ipel_attributes[:protein_name])
  end    
  iprots
end

#init_groupsObject



46
47
48
49
50
51
52
53
54
55
# File 'lib/protk/protxml.rb', line 46

def init_groups
  @groups=[]
  REXML::XPath.each(@doc.root,"//protein_group") do |gel|
    group={}
    group[:group_probability]=gel.attributes["probability"].to_f
    group[:proteins]=proteins_from_group(gel)
    groups.push group
  end
  @groups
end

#peptide_sequences_from_protein(prot) ⇒ Object



68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/protk/protxml.rb', line 68

def peptide_sequences_from_protein(prot)
  peptides=prot[:peptides]
  sequences=[]
  peptides.each do |pep| 
    if ( pep[:modifications].length > 0 )
      pep[:modifications].each {|pmod| 
        sequences.push(pmod) }
    else
      sequences.push(pep[:peptide_sequence])
    end
  end
  sequences
end

#peptides_from_protein(protein_element) ⇒ Object



20
21
22
23
24
25
26
27
28
29
30
31
32
# File 'lib/protk/protxml.rb', line 20

def peptides_from_protein(protein_element)
  peptides=[]
  REXML::XPath.each(protein_element,"./peptide") do |pel|
    peptide={}

    pel.attributes.each_attribute { |att| peptide[att.expanded_name.to_sym]=att.value }
    modifications=pel.get_elements("./modification_info")
    mods=modifications.collect {|mp| mp.attribute("modified_peptide").value }
    peptide[:modifications] = mods
    peptides.push(peptide)
  end
  peptides
end

#protein_to_row(prot) ⇒ Object



82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/protk/protxml.rb', line 82

def protein_to_row(prot)
  protein_row=[]
  protein_row.push(prot[:protein_name])
  protein_row.push(prot[:probability])
  
  indistinct=prot[:indistinguishable_prots]
  indist_string="#{prot[:protein_name]};"
  indistinct.each { |pr| indist_string<<"#{pr};"}
  indist_string.chop!
  protein_row.push(indist_string)
  
  protein_row.push(prot[:peptides].length)
  
  peptide_string=""
  peptide_sequences_from_protein(prot).each {|pep| peptide_string<<"#{pep};" }
  peptide_string.chop!
  
  protein_row.push(peptide_string)
  protein_row
end

#proteins_from_group(group_element) ⇒ Object



34
35
36
37
38
39
40
41
42
43
44
# File 'lib/protk/protxml.rb', line 34

def proteins_from_group(group_element)
  proteins=[]
  REXML::XPath.each(group_element,"./protein") do |pel|
    protein={}
    pel.attributes.each_attribute { |att| protein[att.expanded_name.to_sym]=att.value }
    protein[:peptides]=peptides_from_protein(pel)      
    protein[:indistinguishable_prots]=indistinguishable_proteins_from_protein(pel)
    proteins.push(protein)
  end
  proteins
end