Class: Bio::GCG::Msf

Inherits:
Object show all
Defined in:
lib/bio/appl/gcg/msf.rb

Overview

The msf is a multiple sequence alignment format developed by Wisconsin. Bio::GCG::Msf is a msf format parser.

Constant Summary collapse

DELIMITER =

delimiter used by Bio::FlatFile

RS = nil

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(str) ⇒ Msf

Creates a new Msf object.



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/bio/appl/gcg/msf.rb', line 31

def initialize(str)
  str = str.sub(/\A[\r\n]+/, '')
  if /^\!\![A-Z]+\_MULTIPLE\_ALIGNMNENT/ =~ str[/.*/] then
    @heading = str[/.*/] # '!!NA_MULTIPLE_ALIGNMENT 1.0' or like this
    str.sub!(/.*/, '')
  end
  str.sub!(/.*\.\.$/m, '')
  @description = $&.to_s.sub(/^.*\.\.$/, '').to_s
  d = $&.to_s
  if m = /(.+)\s+MSF\:\s+(\d+)\s+Type\:\s+(\w)\s+(.+)\s+(Comp)?Check\:\s+(\d+)/.match(d) then
    @entry_id = m[1].to_s.strip
    @length   = (m[2] ? m[2].to_i : nil)
    @seq_type = m[3]
    @date     = m[4].to_s.strip
    @checksum = (m[6] ? m[6].to_i : nil)
  end

  str.sub!(/.*\/\/$/m, '')
  a = $&.to_s.split(/^/)
  @seq_info = []
  a.each do |x|
    if /Name\: / =~ x then
      s = {}
      x.scan(/(\S+)\: +(\S*)/) { |y| s[$1] = $2 }
      @seq_info << s
    end
  end

  @data = str
  @description.sub!(/\A(\r\n|\r|\n)/, '')
  @align = nil
end

Instance Attribute Details

#checksumObject (readonly)

checksum



80
81
82
# File 'lib/bio/appl/gcg/msf.rb', line 80

def checksum
  @checksum
end

#dateObject (readonly)

date



77
78
79
# File 'lib/bio/appl/gcg/msf.rb', line 77

def date
  @date
end

#descriptionObject (readonly)

description



65
66
67
# File 'lib/bio/appl/gcg/msf.rb', line 65

def description
  @description
end

#entry_idObject (readonly)

ID of the alignment



68
69
70
# File 'lib/bio/appl/gcg/msf.rb', line 68

def entry_id
  @entry_id
end

#headingObject (readonly)

heading (‘!!NA_MULTIPLE_ALIGNMENT 1.0’ or whatever like this)



84
85
86
# File 'lib/bio/appl/gcg/msf.rb', line 84

def heading
  @heading
end

#lengthObject (readonly)

alignment length



71
72
73
# File 'lib/bio/appl/gcg/msf.rb', line 71

def length
  @length
end

#seq_typeObject (readonly)

sequence type (“N” for DNA/RNA or “P” for protein)



74
75
76
# File 'lib/bio/appl/gcg/msf.rb', line 74

def seq_type
  @seq_type
end

Instance Method Details

#alignmentObject

returns Bio::Alignment object.



179
180
181
182
# File 'lib/bio/appl/gcg/msf.rb', line 179

def alignment
  do_parse
  @align
end

#compcheckObject

CompCheck field



122
123
124
125
126
127
128
129
130
131
# File 'lib/bio/appl/gcg/msf.rb', line 122

def compcheck
  unless defined?(@compcheck)
    if /CompCheck\: +(\d+)/ =~ @description then
      @compcheck = $1.to_i
    else
      @compcheck = nil
    end
  end
  @compcheck
end

#gap_length_weightObject

gap length weight



113
114
115
116
117
118
119
# File 'lib/bio/appl/gcg/msf.rb', line 113

def gap_length_weight
  unless defined?(@gap_length_weight)
    /GapLengthWeight\: +(\S+)/ =~ @description
    @gap_length_weight = $1
  end
  @gap_length_weight
end

#gap_weightObject

gap weight



104
105
106
107
108
109
110
# File 'lib/bio/appl/gcg/msf.rb', line 104

def gap_weight
  unless defined?(@gap_weight)
    /GapWeight\: +(\S+)/ =~ @description
    @gap_weight = $1
  end
  @gap_weight
end

#seq_dataObject

gets seq data (used internally) (will be obsoleted)



185
186
187
188
# File 'lib/bio/appl/gcg/msf.rb', line 185

def seq_data
  do_parse
  @seq_data
end

#symbol_comparison_tableObject

symbol comparison table



95
96
97
98
99
100
101
# File 'lib/bio/appl/gcg/msf.rb', line 95

def symbol_comparison_table
  unless defined?(@symbol_comparison_table)
    /Symbol comparison table\: +(\S+)/ =~ @description
    @symbol_comparison_table = $1
  end
  @symbol_comparison_table
end

#validate_checksumObject

validates checksum



191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
# File 'lib/bio/appl/gcg/msf.rb', line 191

def validate_checksum
  do_parse
  valid = true
  total = 0
  @seq_data.each_with_index do |x, i|
    sum = Bio::GCG::Seq.calc_checksum(x)
    if sum != @seq_info[i]['Check'].to_i
      valid = false
      break
    end
    total += sum
  end
  return false unless valid
  if @checksum != 0 # "Check:" field of BioPerl is always 0
    valid = ((total % 10000) == @checksum)
  end
  valid
end