Class: ViralSeq::Sequence
- Inherits:
-
Object
- Object
- ViralSeq::Sequence
- Defined in:
- lib/viral_seq/sequence.rb
Overview
ViralSeq::Sequence class for sequence operation
Instance Attribute Summary collapse
-
#aa_array ⇒ Array
ambiguity dna sequence will be translated in all possible amino acid sequence at the position.
-
#aa_string ⇒ String
Amino acid sequence.
-
#dna ⇒ String
DNA sequence.
-
#name ⇒ String
Sequence tag name.
Instance Method Summary collapse
-
#aa_length ⇒ Integer
Length of amino acid sequence.
-
#check_drm(drm_list_single_type) ⇒ Object
Similar to #sdrm but use a DRM list as a param.
-
#dna_length ⇒ Integer
Length of DNA sequence.
-
#initialize(name = ">sequence", dna_sequence = "") ⇒ Sequence
constructor
initialize a ViralSeq::Sequence class with sequence name (default as ‘>sequence’) and DNA sequence as String object.
-
#locator(ref_option = :HXB2, path_to_muscle = false) ⇒ Array
HIV sequence locator function, resembling HIV Sequence Locator from LANL # current version only supports nucleotide sequence, not for amino acid sequence.
-
#rev_complement ⇒ String
(also: #rc)
Reverse compliment sequence of the @dna.
-
#rev_complement! ⇒ Object
(also: #rc!)
replace the @dna with reverse complement DNA sequence.
-
#sdrm(option, start_aa = 1) ⇒ Hash
resistant mutation interpretation for a chosen region from a translated ViralSeq::Sequence object.
-
#sequence_clip(p1 = 0, p2 = 0, ref_option = :HXB2, path_to_muscle = false) ⇒ ViralSeq::Sequence?
Given start and end positions on the reference genome, return a sub-sequence of the target sequence in that range.
-
#translate(initial_position = 0) ⇒ Object
translate @dna to amino acid sequence.
Constructor Details
#initialize(name = ">sequence", dna_sequence = "") ⇒ Sequence
initialize a ViralSeq::Sequence class with sequence name (default as ‘>sequence’) and DNA sequence as String object
32 33 34 35 36 37 |
# File 'lib/viral_seq/sequence.rb', line 32 def initialize (name = ">sequence",dna_sequence ="") @name = name @dna = dna_sequence.upcase @aa_string = "" @aa_array = [] end |
Instance Attribute Details
#aa_array ⇒ Array
ambiguity dna sequence will be translated in all possible amino acid sequence at the position
50 51 52 |
# File 'lib/viral_seq/sequence.rb', line 50 def aa_array @aa_array end |
#aa_string ⇒ String
Returns amino acid sequence.
46 47 48 |
# File 'lib/viral_seq/sequence.rb', line 46 def aa_string @aa_string end |
#dna ⇒ String
Returns DNA sequence.
43 44 45 |
# File 'lib/viral_seq/sequence.rb', line 43 def dna @dna end |
#name ⇒ String
Returns sequence tag name.
40 41 42 |
# File 'lib/viral_seq/sequence.rb', line 40 def name @name end |
Instance Method Details
#aa_length ⇒ Integer
Returns length of amino acid sequence.
97 98 99 |
# File 'lib/viral_seq/sequence.rb', line 97 def aa_length @aa_string.length end |
#check_drm(drm_list_single_type) ⇒ Object
Similar to #sdrm but use a DRM list as a param
143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 |
# File 'lib/viral_seq/sequence.rb', line 143 def check_drm(drm_list_single_type) aa_array = self.aa_array out_hash = {} drm_list_single_type.each do |position, mut| wt_aa = mut[0] mut_aas = mut[1] test_aa = aa_array[position - 1] if test_aa.size == 1 and mut_aas.include?(test_aa) out_hash[position] = [wt_aa, test_aa] elsif test_aa.size > 1 test_aa_array = test_aa.split("") mut_detected = test_aa_array & mut_aas if !mut_detected.empty? out_hash[position] = [wt_aa, mut_detected.join] end end end return out_hash end |
#dna_length ⇒ Integer
Returns length of DNA sequence.
92 93 94 |
# File 'lib/viral_seq/sequence.rb', line 92 def dna_length @dna.length end |
#locator(ref_option = :HXB2, path_to_muscle = false) ⇒ Array
HIV sequence locator function, resembling HIV Sequence Locator from LANL
# current version only supports nucleotide sequence, not for amino acid sequence.
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 |
# File 'lib/viral_seq/sequence.rb', line 199 def locator(ref_option = :HXB2, path_to_muscle = false) seq = self.dna ori_ref = ViralSeq::RefSeq.get(ref_option) begin ori_ref_l = ori_ref.size l1 = 0 l2 = 0 aln_seq = ViralSeq::Muscle.align(ori_ref, seq, :Super5, path_to_muscle) aln_test = aln_seq[1] aln_test =~ /^(\-*)(\w.*\w)(\-*)$/ gap_begin = $1.size gap_end = $3.size aln_test2 = $2 ref = aln_seq[0] ref = ref[gap_begin..(-gap_end-1)] ref_size = ref.size if ref_size > 1.3*(seq.size) l1 = l1 + gap_begin l2 = l2 + gap_end max_seq = aln_test2.scan(/[ACGT]+/).max_by(&:length) aln_test2 =~ /#{max_seq}/ before_aln_seq = $` before_aln = $`.size post_aln_seq = $' post_aln = $'.size before_aln_seq_size = before_aln_seq.scan(/[ACGT]+/).join("").size b1 = (1.3 * before_aln_seq_size).to_i post_aln_seq_size = post_aln_seq.scan(/[ACGT]+/).join("").size b2 = (1.3 * post_aln_seq_size).to_i if (before_aln > seq.size) and (post_aln <= seq.size) ref = ref[(before_aln - b1)..(ref_size - post_aln - 1)] l1 = l1 + (before_aln - b1) elsif (post_aln > seq.size) and (before_aln <= seq.size) ref = ref[before_aln..(ref_size - post_aln - 1 + b2)] l2 = l2 + post_aln - b2 elsif (post_aln > seq.size) and (before_aln > seq.size) ref = ref[(before_aln - b1)..(ref_size - post_aln - 1 + b2)] l1 = l1 + (before_aln - b1) l2 = l2 + (post_aln - b2) end aln_seq = ViralSeq::Muscle.align(ref, seq, :Super5, path_to_muscle) aln_test = aln_seq[1] aln_test =~ /^(\-*)(\w.*\w)(\-*)$/ gap_begin = $1.size gap_end = $3.size ref = aln_seq[0] ref = ref[gap_begin..(-gap_end-1)] end aln_test = aln_seq[1] aln_test =~ /^(\-*)(\w.*\w)(\-*)$/ gap_begin = $1.size gap_end = $3.size aln_test = $2 aln_test =~ /^(\w+)(\-*)\w/ s1 = $1.size g1 = $2.size aln_test =~ /\w(\-*)(\w+)$/ s2 = $2.size g2 = $1.size l1 = l1 + gap_begin l2 = l2 + gap_end repeat = 0 if g1 == g2 and (s1 + g1 + s2) == ref.size if s1 > s2 and g2 >= s2 ref = ref[0..(-g2-1)] repeat = 1 l2 = l2 + g2 elsif s1 < s2 and g1 >= s1 ref = ref[g1..-1] repeat = 1 l1 = l1 + g1 end else if g1 >= s1 ref = ref[g1..-1] repeat = 1 l1 = l1 + g1 end if g2 >= s2 ref = ref[0..(-g2 - 1)] repeat = 1 l2 = l2 + g2 end end while repeat == 1 aln_seq = ViralSeq::Muscle.align(ref, seq, :Super5, path_to_muscle) aln_test = aln_seq[1] aln_test =~ /^(\-*)(\w.*\w)(\-*)$/ gap_begin = $1.size gap_end = $3.size aln_test = $2 aln_test =~ /^(\w+)(\-*)\w/ s1 = $1.size g1 = $2.size aln_test =~ /\w(\-*)(\w+)$/ s2 = $2.size g2 = $1.size ref = aln_seq[0] ref = ref[gap_begin..(-gap_end-1)] l1 = l1 + gap_begin l2 = l2 + gap_end repeat = 0 if g1 >= s1 ref = ref[g1..-1] repeat = 1 l1 = l1 + g1 end if g2 >= s2 ref = ref[0..(-g2 - 1)] repeat = 1 l2 = l2 + g2 end end ref = ori_ref[l1..(ori_ref_l - l2 - 1)] aln_seq = ViralSeq::Muscle.align(ref, seq, :Super5, path_to_muscle) aln_test = aln_seq[1] ref = aln_seq[0] #refine alignment if ref =~ /^(\-+)/ l1 = l1 - $1.size elsif ref =~ /(\-+)$/ l2 = l2 - $1.size end if (ori_ref_l - l2 - 1) >= l1 ref = ori_ref[l1..(ori_ref_l - l2 - 1)] aln_seq = ViralSeq::Muscle.align(ref, seq, :Super5, path_to_muscle) aln_test = aln_seq[1] ref = aln_seq[0] ref_size = ref.size sim_count = 0 (0..(ref_size-1)).each do |n| ref_base = ref[n] test_base = aln_test[n] sim_count += 1 if ref_base == test_base end similarity = (sim_count/ref_size.to_f*100).round(1) loc_p1 = l1 + 1 loc_p2 = ori_ref_l - l2 if seq.size != (loc_p2 - loc_p1 + 1) indel = true elsif aln_test.include?("-") indel = true else indel = false end return [loc_p1,loc_p2,similarity,indel,aln_test,ref] else return [0,0,0,0,0,0,0] end rescue => e puts "Unexpected error occured." puts "Exception Class: #{ e.class.name }" puts "Exception Message: #{ e. }" puts "Exception Backtrace: #{ e.backtrace[0] }" puts "ViralSeq.sequence_locator returns nil" return nil end end |
#rev_complement ⇒ String Also known as: rc
Returns reverse compliment sequence of the @dna.
53 54 55 |
# File 'lib/viral_seq/sequence.rb', line 53 def rev_complement @dna.rc end |
#rev_complement! ⇒ Object Also known as: rc!
replace the @dna with reverse complement DNA sequence.
58 59 60 |
# File 'lib/viral_seq/sequence.rb', line 58 def rev_complement! @dna = @dna.rc end |
#sdrm(option, start_aa = 1) ⇒ Hash
resistant mutation interpretation for a chosen region from a translated ViralSeq::Sequence object
113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 |
# File 'lib/viral_seq/sequence.rb', line 113 def sdrm(option, start_aa = 1) aa_array = self.aa_array out_hash = {} sdrm = ViralSeq::DRMs.sdrm_hash(option) aa_length = aa_array.size end_aa = start_aa + aa_length - 1 (start_aa..end_aa).each do |position| array_position = position - start_aa if sdrm.keys.include?(position) wt_aa = sdrm[position][0] test_aa = aa_array[array_position] if test_aa.size == 1 unless wt_aa == test_aa if sdrm[position][1].include?(test_aa) out_hash[position] = [wt_aa,test_aa] end end else test_aa_array = test_aa.split("") if (test_aa_array & sdrm[position][1]) out_hash[position] = [wt_aa,test_aa] end end end end return out_hash end |
#sequence_clip(p1 = 0, p2 = 0, ref_option = :HXB2, path_to_muscle = false) ⇒ ViralSeq::Sequence?
Given start and end positions on the reference genome, return a sub-sequence of the target sequence in that range
384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 |
# File 'lib/viral_seq/sequence.rb', line 384 def sequence_clip(p1 = 0, p2 = 0, ref_option = :HXB2, path_to_muscle = false) loc = self.locator(ref_option, path_to_muscle) l1 = loc[0] l2 = loc[1] if (p1 >= l1) & (p2 <= l2) seq = loc[4] ref = loc[5] g1 = 0 ref.each_char do |char| break if l1 == p1 g1 += 1 l1 += 1 unless char == "-" end g2 = 1 ref.reverse.each_char do |char| break if l2 == p2 g2 += 1 l2 -= 1 unless char == "-" end return ViralSeq::Sequence.new(self.name,seq[g1..(-g2)].tr("-","")) else return nil end end |
#translate(initial_position = 0) ⇒ Object
translate @dna to amino acid sequence. generate values for @aa_string and @aa_array
69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 |
# File 'lib/viral_seq/sequence.rb', line 69 def translate(initial_position = 0) @aa_string = "" require_sequence = @dna[initial_position..-1] base_array = [] require_sequence.each_char {|base| base_array << base} while (base_array.length>=3) do base_3= "" 3.times {base_3 += base_array.shift} @aa_string << amino_acid(base_3) end @aa_array = [] require_sequence = @dna[initial_position..-1].tr('-','N') base_array = [] require_sequence.each_char {|base| base_array << base} while (base_array.length>=3) do base_3= "" 3.times{base_3 += base_array.shift} @aa_array<< amino_acid_2(base_3) end end |