Class: Bio::PSORT::PSORT2::Report

Inherits:
Object
  • Object
show all
Defined in:
lib/bio/appl/psort/report.rb

Overview

Bio::PSORT::PSORT2::Report

Report parser classe for PSORT II(PSORT2).

Example

Constant Summary collapse

BOUNDARY =

Report boundary string.

'-' * 75
RS =

Report delimiter.

DELIMITER = "\)\n\n#{BOUNDARY}"

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(raw = '', entry_id = nil, scl = nil, definition = nil, seq = nil, k = nil, features = {}, prob = {}, pred = nil) ⇒ Report

Constructs aBio::PSORT::PSORT2::Report object.



227
228
229
230
231
232
233
234
235
236
237
238
# File 'lib/bio/appl/psort/report.rb', line 227

def initialize(raw = '', entry_id = nil, scl = nil, definition = nil, 
               seq = nil, k = nil, features = {}, prob = {}, pred = nil)
  @entry_id   = entry_id
  @scl        = scl
  @definition = definition
  @seq        = seq
  @features   = features
  @prob       = prob
  @pred       = pred
  @k          = k
  @raw        = raw
end

Instance Attribute Details

#definitionObject

Definition of query sequence.



205
206
207
# File 'lib/bio/appl/psort/report.rb', line 205

def definition
  @definition
end

#entry_idObject

entry_id of query sequence.



199
200
201
# File 'lib/bio/appl/psort/report.rb', line 199

def entry_id
  @entry_id
end

#featuresObject

Feature vector used the kNN prediction.



214
215
216
# File 'lib/bio/appl/psort/report.rb', line 214

def features
  @features
end

#kObject

k parameter of k-nearest neighbors classifier.



211
212
213
# File 'lib/bio/appl/psort/report.rb', line 211

def k
  @k
end

#predObject

Predicted subcellular localization (three letters code).



220
221
222
# File 'lib/bio/appl/psort/report.rb', line 220

def pred
  @pred
end

#probObject

Probability vector of kNN prediction.



217
218
219
# File 'lib/bio/appl/psort/report.rb', line 217

def prob
  @prob
end

#rawObject

Raw text of output report.



223
224
225
# File 'lib/bio/appl/psort/report.rb', line 223

def raw
  @raw
end

#sclObject

Given subcellular localization (three letters code).



202
203
204
# File 'lib/bio/appl/psort/report.rb', line 202

def scl
  @scl
end

#seqObject

Sequence of query sequence.



208
209
210
# File 'lib/bio/appl/psort/report.rb', line 208

def seq
  @seq
end

Class Method Details

.default_parser(ent, entry_id = nil) ⇒ Object

Parser for the default report format. “psort report” output.



273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
# File 'lib/bio/appl/psort/report.rb', line 273

def self.default_parser(ent, entry_id = nil)
  report = self.new(ent, entry_id)
  ent = ent.split(/\n\n/).map {|e| e.chomp }

  report.set_header_line(ent[0])

  # feature matrix
  ent[1].gsub(/\n/,' ').strip.split(/  /).map {|fe|
    pair = fe.split(/: /)
    report.features[pair[0].strip] = pair[1].strip.to_f
  }

  report.prob = self.set_kNN_prob(ent[2])
  report.set_prediction(ent[3])         

  return report
end

.divent(entry) ⇒ Object

Divides entry body



392
393
394
395
# File 'lib/bio/appl/psort/report.rb', line 392

def self.divent(entry)
  boundary = entry.index(BOUNDARY)
  return entry[0..(boundary - 1)], entry[(boundary + 2)..(entry.length)]
end

.parser(str, entry_id) ⇒ Object

Parses output report with output format detection automatically.



242
243
244
245
246
247
248
249
250
251
252
253
254
255
# File 'lib/bio/appl/psort/report.rb', line 242

def self.parser(str, entry_id)
  case str
  when /^ psg:/   # default report
    self.default_parser(str, entry_id)
  when /^PSG:/    # -v report
    self.v_parser(str, entry_id)
  when /: too short length /
    self.too_short_parser(str, entry_id)
  when /PSORT II server/
    tmp = self.new(ent, entry_id)
  else
    raise ArgumentError, "invalid format\n[#{str}]"
  end
end

.set_kNN_prob(str) ⇒ Object

Returns @prob value.



309
310
311
312
313
314
315
316
317
318
319
320
# File 'lib/bio/appl/psort/report.rb', line 309

def self.set_kNN_prob(str)
  prob = Hash.new
  Bio::PSORT::PSORT2::SclNames.keys.each {|a| 
    prob.update( {a => 0.0} )
  }
  str.gsub(/\t/,'').split(/\n/).each {|a|
    val,scl = a.strip.split(/ %: /)
    key = Bio::PSORT::PSORT2::SclNames.index(scl)
    prob[key] = val.to_f
  }
  return prob
end

.too_short_parser(ent, entry_id = nil) ⇒ Object

Parser for “too short length” report.

$id: too short length ($leng), skipped\n";


260
261
262
263
264
265
266
267
268
# File 'lib/bio/appl/psort/report.rb', line 260

def self.too_short_parser(ent, entry_id = nil)
  report = self.new(ent)
  report.entry_id = entry_id
  if ent =~ /^(.+)?: too short length/
    report.entry_id = $1 unless report.entry_id
    report.scl = '---'
  end
  report
end

.v_parser(ent, entry_id = nil) ⇒ Object

Parser for the verbose output report format. “psort -v report” and WWW server output.



338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
# File 'lib/bio/appl/psort/report.rb', line 338

def self.v_parser(ent, entry_id = nil)
  report = Bio::PSORT::PSORT2::Report.new(ent, entry_id)

  ent = ent.split(/\n\n/).map {|e| e.chomp }
  ent.each_with_index {|e, i|
    unless /^(\w|-|\>|\t)/ =~ e
      j = self.__send__(:search_j, i, ent)
      ent[i - j] += e
      ent[i] = nil
    end
    if /^none/ =~ e    # psort output bug
      j = self.__send__(:search_j, i, ent)
      ent[i - j] += e
      ent[i] = nil
    end
  }
  ent.compact!

  if /^ PSORT II server/ =~ ent[0] # for WWW version
    ent.shift 
    delline = ''
    ent.each {|e| delline = e if /^Results of Subprograms/ =~ e }
    i = ent.index(delline)
    ent.delete(delline)
    ent.delete_at(i - 1)
  end

  report.set_header_line(ent.shift)  
  report.seq = Bio::Sequence::AA.new(ent.shift)

  fent, pent = self.divent(ent)
  report.set_features(fent)          
  report.prob = self.set_kNN_prob(pent[0].strip)  
  report.set_prediction(pent[1].strip)

  return report
end

Instance Method Details

#set_features(features_ary) ⇒ Object

Sets @features values.



398
399
400
401
402
403
404
# File 'lib/bio/appl/psort/report.rb', line 398

def set_features(features_ary)
  features_ary.each {|fent|
    key = fent.split(/\:( |\n)/)[0].strip
    self.features[key] = fent # unless /^\>/ =~ key
  }
  self.features['AA'] = self.seq.length
end

#set_header_line(str) ⇒ Object

Returns header information.



292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
# File 'lib/bio/appl/psort/report.rb', line 292

def set_header_line(str)
  str.sub!(/^-+\n/,'')
  tmp = str.split(/\t| /)
  @entry_id = tmp.shift.sub(/^-+/,'').strip unless @entry_id

  case tmp.join(' ').chomp
  when /\(\d+ aa\) (.+)$/
    @definition = $1
  else
    @definition = tmp.join(' ').chomp
  end
  scl = @definition.split(' ')[0]

  @scl = scl if SclNames.keys.index(scl)
end

#set_prediction(str) ⇒ Object

Returns @prob and @k values.



323
324
325
326
327
328
329
330
331
332
333
# File 'lib/bio/appl/psort/report.rb', line 323

def set_prediction(str)
  case str
  when /prediction for (\S+?) is (\w{3}) \(k=(\d+)\)/
    @entry_id ||= $1 unless @entry_id
    @pred = $2
    @k    = $3
  else
    raise ArgumentError, 
      "Invalid format at(#{self.entry_id}):\n[#{str}]\n"
  end
end