Class: Bio::GCG::Msf
Overview
The msf is a multiple sequence alignment format developed by Wisconsin. Bio::GCG::Msf is a msf format parser.
Constant Summary collapse
- DELIMITER =
delimiter used by Bio::FlatFile
RS = nil
Instance Attribute Summary collapse
-
#checksum ⇒ Object
readonly
checksum.
-
#date ⇒ Object
readonly
date.
-
#description ⇒ Object
readonly
description.
-
#entry_id ⇒ Object
readonly
ID of the alignment.
-
#heading ⇒ Object
readonly
heading (‘!!NA_MULTIPLE_ALIGNMENT 1.0’ or whatever like this).
-
#length ⇒ Object
readonly
alignment length.
-
#seq_type ⇒ Object
readonly
sequence type (“N” for DNA/RNA or “P” for protein).
Instance Method Summary collapse
-
#alignment ⇒ Object
returns Bio::Alignment object.
-
#compcheck ⇒ Object
CompCheck field.
-
#gap_length_weight ⇒ Object
gap length weight.
-
#gap_weight ⇒ Object
gap weight.
-
#initialize(str) ⇒ Msf
constructor
Creates a new Msf object.
-
#seq_data ⇒ Object
gets seq data (used internally) (will be obsoleted).
-
#symbol_comparison_table ⇒ Object
symbol comparison table.
-
#validate_checksum ⇒ Object
validates checksum.
Constructor Details
#initialize(str) ⇒ Msf
Creates a new Msf object.
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 |
# File 'lib/bio/appl/gcg/msf.rb', line 31 def initialize(str) str = str.sub(/\A[\r\n]+/, '') if /^\!\![A-Z]+\_MULTIPLE\_ALIGNMNENT/ =~ str[/.*/] then @heading = str[/.*/] # '!!NA_MULTIPLE_ALIGNMENT 1.0' or like this str.sub!(/.*/, '') end str.sub!(/.*\.\.$/m, '') @description = $&.to_s.sub(/^.*\.\.$/, '').to_s d = $&.to_s if m = /(.+)\s+MSF\:\s+(\d+)\s+Type\:\s+(\w)\s+(.+)\s+(Comp)?Check\:\s+(\d+)/.match(d) then @entry_id = m[1].to_s.strip @length = (m[2] ? m[2].to_i : nil) @seq_type = m[3] @date = m[4].to_s.strip @checksum = (m[6] ? m[6].to_i : nil) end str.sub!(/.*\/\/$/m, '') a = $&.to_s.split(/^/) @seq_info = [] a.each do |x| if /Name\: / =~ x then s = {} x.scan(/(\S+)\: +(\S*)/) { |y| s[$1] = $2 } @seq_info << s end end @data = str @description.sub!(/\A(\r\n|\r|\n)/, '') @align = nil end |
Instance Attribute Details
#checksum ⇒ Object (readonly)
checksum
80 81 82 |
# File 'lib/bio/appl/gcg/msf.rb', line 80 def checksum @checksum end |
#date ⇒ Object (readonly)
date
77 78 79 |
# File 'lib/bio/appl/gcg/msf.rb', line 77 def date @date end |
#description ⇒ Object (readonly)
description
65 66 67 |
# File 'lib/bio/appl/gcg/msf.rb', line 65 def description @description end |
#entry_id ⇒ Object (readonly)
ID of the alignment
68 69 70 |
# File 'lib/bio/appl/gcg/msf.rb', line 68 def entry_id @entry_id end |
#heading ⇒ Object (readonly)
heading (‘!!NA_MULTIPLE_ALIGNMENT 1.0’ or whatever like this)
84 85 86 |
# File 'lib/bio/appl/gcg/msf.rb', line 84 def heading @heading end |
#length ⇒ Object (readonly)
alignment length
71 72 73 |
# File 'lib/bio/appl/gcg/msf.rb', line 71 def length @length end |
#seq_type ⇒ Object (readonly)
sequence type (“N” for DNA/RNA or “P” for protein)
74 75 76 |
# File 'lib/bio/appl/gcg/msf.rb', line 74 def seq_type @seq_type end |
Instance Method Details
#alignment ⇒ Object
returns Bio::Alignment object.
179 180 181 182 |
# File 'lib/bio/appl/gcg/msf.rb', line 179 def alignment do_parse @align end |
#compcheck ⇒ Object
CompCheck field
122 123 124 125 126 127 128 129 130 131 |
# File 'lib/bio/appl/gcg/msf.rb', line 122 def compcheck unless defined?(@compcheck) if /CompCheck\: +(\d+)/ =~ @description then @compcheck = $1.to_i else @compcheck = nil end end @compcheck end |
#gap_length_weight ⇒ Object
gap length weight
113 114 115 116 117 118 119 |
# File 'lib/bio/appl/gcg/msf.rb', line 113 def gap_length_weight unless defined?(@gap_length_weight) /GapLengthWeight\: +(\S+)/ =~ @description @gap_length_weight = $1 end @gap_length_weight end |
#gap_weight ⇒ Object
gap weight
104 105 106 107 108 109 110 |
# File 'lib/bio/appl/gcg/msf.rb', line 104 def gap_weight unless defined?(@gap_weight) /GapWeight\: +(\S+)/ =~ @description @gap_weight = $1 end @gap_weight end |
#seq_data ⇒ Object
gets seq data (used internally) (will be obsoleted)
185 186 187 188 |
# File 'lib/bio/appl/gcg/msf.rb', line 185 def seq_data do_parse @seq_data end |
#symbol_comparison_table ⇒ Object
symbol comparison table
95 96 97 98 99 100 101 |
# File 'lib/bio/appl/gcg/msf.rb', line 95 def symbol_comparison_table unless defined?(@symbol_comparison_table) /Symbol comparison table\: +(\S+)/ =~ @description @symbol_comparison_table = $1 end @symbol_comparison_table end |
#validate_checksum ⇒ Object
validates checksum
191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 |
# File 'lib/bio/appl/gcg/msf.rb', line 191 def validate_checksum do_parse valid = true total = 0 @seq_data.each_with_index do |x, i| sum = Bio::GCG::Seq.calc_checksum(x) if sum != @seq_info[i]['Check'].to_i valid = false break end total += sum end return false unless valid if @checksum != 0 # "Check:" field of BioPerl is always 0 valid = ((total % 10000) == @checksum) end valid end |