Class: Bio::GCG::Msf
Overview
The msf is a multiple sequence alignment format developed by Wisconsin. Bio::GCG::Msf is a msf format parser.
Constant Summary collapse
- DELIMITER =
delimiter used by Bio::FlatFile
RS = nil
Instance Attribute Summary collapse
-
#checksum ⇒ Object
readonly
checksum.
-
#date ⇒ Object
readonly
date.
-
#description ⇒ Object
readonly
description.
-
#entry_id ⇒ Object
readonly
ID of the alignment.
-
#heading ⇒ Object
readonly
heading (‘!!NA_MULTIPLE_ALIGNMENT 1.0’ or whatever like this).
-
#length ⇒ Object
readonly
alignment length.
-
#seq_type ⇒ Object
readonly
sequence type (“N” for DNA/RNA or “P” for protein).
Instance Method Summary collapse
-
#alignment ⇒ Object
returns Bio::Alignment object.
-
#compcheck ⇒ Object
CompCheck field.
-
#gap_length_weight ⇒ Object
gap length weight.
-
#gap_weight ⇒ Object
gap weight.
-
#initialize(str) ⇒ Msf
constructor
Creates a new Msf object.
-
#seq_data ⇒ Object
gets seq data (used internally) (will be obsoleted).
-
#symbol_comparison_table ⇒ Object
symbol comparison table.
-
#validate_checksum ⇒ Object
validates checksum.
Constructor Details
#initialize(str) ⇒ Msf
Creates a new Msf object.
31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 |
# File 'lib/bio/appl/gcg/msf.rb', line 31 def initialize(str) str = str.sub(/\A[\r\n]+/, '') preamble, @data = str.split(/^\/\/$/, 2) preamble.sub!(/\A\!\![A-Z]+\_MULTIPLE\_ALIGNMENT.*/, '') @heading = $& # '!!NA_MULTIPLE_ALIGNMENT 1.0' or like this preamble.sub!(/.*\.\.\s*$/m, '') @description = $&.to_s.sub(/^.*\.\.\s*$/, '').to_s d = $&.to_s if m = /^(?:(.+)\s+)?MSF\:\s+(\d+)\s+Type\:\s+(\w)\s+(.+)\s+(Comp)?Check\:\s+(\d+)/.match(d) then @entry_id = m[1].to_s.strip @length = (m[2] ? m[2].to_i : nil) @seq_type = m[3] @date = m[4].to_s.strip @checksum = (m[6] ? m[6].to_i : nil) end @seq_info = [] preamble.each_line do |x| if /Name\: / =~ x then s = {} x.scan(/(\S+)\: +(\S*)/) { |y| s[$1] = $2 } @seq_info << s end end @description.sub!(/\A(\r\n|\r|\n)/, '') @align = nil end |
Instance Attribute Details
#checksum ⇒ Object (readonly)
checksum
76 77 78 |
# File 'lib/bio/appl/gcg/msf.rb', line 76 def checksum @checksum end |
#date ⇒ Object (readonly)
date
73 74 75 |
# File 'lib/bio/appl/gcg/msf.rb', line 73 def date @date end |
#description ⇒ Object (readonly)
description
61 62 63 |
# File 'lib/bio/appl/gcg/msf.rb', line 61 def description @description end |
#entry_id ⇒ Object (readonly)
ID of the alignment
64 65 66 |
# File 'lib/bio/appl/gcg/msf.rb', line 64 def entry_id @entry_id end |
#heading ⇒ Object (readonly)
heading (‘!!NA_MULTIPLE_ALIGNMENT 1.0’ or whatever like this)
80 81 82 |
# File 'lib/bio/appl/gcg/msf.rb', line 80 def heading @heading end |
#length ⇒ Object (readonly)
alignment length
67 68 69 |
# File 'lib/bio/appl/gcg/msf.rb', line 67 def length @length end |
#seq_type ⇒ Object (readonly)
sequence type (“N” for DNA/RNA or “P” for protein)
70 71 72 |
# File 'lib/bio/appl/gcg/msf.rb', line 70 def seq_type @seq_type end |
Instance Method Details
#alignment ⇒ Object
returns Bio::Alignment object.
176 177 178 179 |
# File 'lib/bio/appl/gcg/msf.rb', line 176 def alignment do_parse @align end |
#compcheck ⇒ Object
CompCheck field
118 119 120 121 122 123 124 125 126 127 |
# File 'lib/bio/appl/gcg/msf.rb', line 118 def compcheck unless defined?(@compcheck) if /CompCheck\: +(\d+)/ =~ @description then @compcheck = $1.to_i else @compcheck = nil end end @compcheck end |
#gap_length_weight ⇒ Object
gap length weight
109 110 111 112 113 114 115 |
# File 'lib/bio/appl/gcg/msf.rb', line 109 def gap_length_weight unless defined?(@gap_length_weight) /GapLengthWeight\: +(\S+)/ =~ @description @gap_length_weight = $1 end @gap_length_weight end |
#gap_weight ⇒ Object
gap weight
100 101 102 103 104 105 106 |
# File 'lib/bio/appl/gcg/msf.rb', line 100 def gap_weight unless defined?(@gap_weight) /GapWeight\: +(\S+)/ =~ @description @gap_weight = $1 end @gap_weight end |
#seq_data ⇒ Object
gets seq data (used internally) (will be obsoleted)
182 183 184 185 |
# File 'lib/bio/appl/gcg/msf.rb', line 182 def seq_data do_parse @seq_data end |
#symbol_comparison_table ⇒ Object
symbol comparison table
91 92 93 94 95 96 97 |
# File 'lib/bio/appl/gcg/msf.rb', line 91 def symbol_comparison_table unless defined?(@symbol_comparison_table) /Symbol comparison table\: +(\S+)/ =~ @description @symbol_comparison_table = $1 end @symbol_comparison_table end |
#validate_checksum ⇒ Object
validates checksum
188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 |
# File 'lib/bio/appl/gcg/msf.rb', line 188 def validate_checksum do_parse valid = true total = 0 @seq_data.each_with_index do |x, i| sum = Bio::GCG::Seq.calc_checksum(x) if sum != @seq_info[i]['Check'].to_i valid = false break end total += sum end return false unless valid if @checksum != 0 # "Check:" field of BioPerl is always 0 valid = ((total % 10000) == @checksum) end valid end |