Class: String
- Inherits:
-
Object
- Object
- String
- Defined in:
- lib/viral_seq/string.rb
Overview
functions added to Class::String for direct operation on sequence as a String object
Instance Method Summary collapse
-
#compare_with(seq2) ⇒ Integer
compare two sequences as String objects, two sequence strings need to aligned first.
-
#mutation(error_rate = 0.01) ⇒ String
mutate a nt sequence (String class) randomly.
-
#nt_diff(ref) ⇒ Interger
compare the given nt sequence string with the ref sequence string.
-
#nt_parser ⇒ Regexp
parse the nucleotide sequences as a String object and return a Regexp object for possible matches.
-
#nt_to_array ⇒ Array
parse the nucleotide sequences as an Array of Array.
-
#rc ⇒ String
reverse complement.
-
#to_list ⇒ Array
parse IUPAC nucleotide ambiguity codes (W S M K R Y B D H V N) as String if String.size == 1.
Instance Method Details
#compare_with(seq2) ⇒ Integer
compare two sequences as String objects, two sequence strings need to aligned first
145 146 147 148 149 150 151 152 153 154 155 |
# File 'lib/viral_seq/string.rb', line 145 def compare_with(seq2) seq1 = self length = seq1.size diff = 0 (0..(length-1)).each do |position| nt1 = seq1[position] nt2 = seq2[position] diff += 1 unless nt1 == nt2 end return diff end |
#mutation(error_rate = 0.01) ⇒ String
mutate a nt sequence (String class) randomly
23 24 25 26 27 28 29 30 31 32 33 34 35 36 |
# File 'lib/viral_seq/string.rb', line 23 def mutation(error_rate = 0.01) new_string = "" self.split("").each do |nt| pool = ["A","C","T","G"] pool.delete(nt) s = error_rate * 10000 r = rand(10000) if r < s nt = pool.sample end new_string << nt end return new_string end |
#nt_diff(ref) ⇒ Interger
compare the given nt sequence string with the ref sequence string
82 83 84 85 86 87 88 89 90 91 92 93 |
# File 'lib/viral_seq/string.rb', line 82 def nt_diff(ref) count_diff = 0 self_array = self.split("") ref_array = ref.nt_to_array self_array.each_with_index do |nt, i| ref_nt = ref_array[i] unless ref_nt.include? nt count_diff += 1 end end return count_diff end |
#nt_parser ⇒ Regexp
parse the nucleotide sequences as a String object
and return a Regexp object for possible matches
45 46 47 48 49 50 51 52 53 54 55 56 57 |
# File 'lib/viral_seq/string.rb', line 45 def nt_parser match = "" self.each_char.each do |base| base_array = base.to_list if base_array.size == 1 match += base_array[0] else pattern = "[" + base_array.join("|") + "]" match += pattern end end Regexp.new match end |
#nt_to_array ⇒ Array
parse the nucleotide sequences as an Array of Array
65 66 67 68 69 70 71 72 |
# File 'lib/viral_seq/string.rb', line 65 def nt_to_array return_array = [] self.each_char.each do |base| base_array = base.to_list return_array.append base_array end return return_array end |
#rc ⇒ String
reverse complement
11 12 13 |
# File 'lib/viral_seq/string.rb', line 11 def rc self.reverse.tr("ACTG","TGAC") end |
#to_list ⇒ Array
parse IUPAC nucleotide ambiguity codes (W S M K R Y B D H V N) as String if String.size == 1
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 |
# File 'lib/viral_seq/string.rb', line 102 def to_list list = [] case self.upcase when /[A|T|C|G]/ list << self when "W" list = ['A','T'] when "S" list = ['C','G'] when "M" list = ['A','C'] when 'K' list = ['G','C'] when 'R' list = ['A','G'] when 'Y' list = ['C','T'] when 'B' list = ['C','G','T'] when 'D' list = ['A','G','T'] when 'H' list = ['A','C','T'] when 'V' list = ['A','C','G'] when 'N' list = ['A','T','C','G'] end return list end |