Class: Bio::DTASelect::OutputFile
- Inherits:
-
Object
- Object
- Bio::DTASelect::OutputFile
show all
- Defined in:
- lib/dta_select_output.rb
Defined Under Namespace
Classes: Peptide, Result, SelectedProtein
Class Method Summary
collapse
Class Method Details
.parse(io) ⇒ Object
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
|
# File 'lib/dta_select_output.rb', line 110
def self.parse(io)
result = Result.new
result.protein_name_to_object = {}
result.peptide_name_to_object = {}
= true
current_proteins = []
last_line_was_protein_name = false
peptide_attribute_names = nil
io.each_line do |line|
splits = line.chomp.split("\t")
log.debug "Parsing line `#{line.chomp}'"
if
log.debug "reading header"
if splits[0] == 'Unique'
= false
peptide_attribute_names = splits
raise "Badly parsed file at this line: #{line.inspect}, expected 2nd field to be 'FileName', found #{splits[1]}" unless splits[1] == 'FileName'
end
next
end
if splits[0] != '' and splits[11].nil?
ident = splits[0]
if !last_line_was_protein_name
log.debug "New protein now being parsed"
current_proteins = []
end
current_protein = SelectedProtein.new
last_line_was_protein_name = true
current_proteins.push current_protein
current_protein.identifier = ident
i = 1
current_protein.sequence_count = splits[i].to_i; i+=1
current_protein.spectrum_count = splits[i].to_i; i+=1
current_protein.sequence_coverage = splits[i].to_f; i+=1
current_protein.length = splits[i].to_i; i+=1
current_protein.molwt = splits[i].to_f; i+=1
current_protein.pi = splits[i].to_f; i+=1
current_protein.validation_status = splits[i].to_f; i+=1
current_protein.descriptive_name = splits[i]
if result.protein_name_to_object[ident]
raise "Unexpectedly found the same protein identifier twice: #{ident}, from line #{line.chomp}"
end
result.protein_name_to_object[ident] = current_protein
elsif splits[1] == 'Proteins'
break
else
log.debug "New spectra now being parsed"
last_line_was_protein_name = false
ident = splits[1]
raise "Unexpected hits name `#{ident}', from line `#{line.chomp}'" unless ident.length > 10
pep = result.peptide_name_to_object[ident]
if pep.nil?
pep = Peptide.new
pep.identifier = ident
peptide_attribute_names.each_with_index do |attribute_name,i|
pep.dtaselect_attributes ||= {}
pep.dtaselect_attributes[attribute_name] = splits[i]
end
result.peptide_name_to_object[ident] = pep
end
current_proteins.each do |current_protein|
pep.parent_proteins.push current_protein
current_protein.peptides.push pep
end
log.debug "Parsed this peptide #{pep.inspect}"
end
end
log.debug "Proteins parsed: #{result.protein_name_to_object.inspect}"
return result
end
|