Module: FlAnalysis
- Defined in:
- lib/full_lengther_next/classes/fl_analysis.rb
Instance Method Summary collapse
- #analiza_orf_y_fl(seq, hit, options, db_name) ⇒ Object
- #compare_seq_length_with_subject(final_prot, distance, final_hit, type, status) ⇒ Object
- #determine_status(atg_status, end_status) ⇒ Object
- #exonerate_fix_frame_shift(query_fasta, hit) ⇒ Object
- #find_end(final_hit, max_distance, tmp_prot, query_fasta) ⇒ Object
- #find_start(subject_start, amine_seq, distance) ⇒ Object
- #mark_nt_seqs(final_hit, query_fasta) ⇒ Object
-
#modify_3p_align(new_q_end, final_hit, query_fasta, final_prot) ⇒ Object
For visual report.
-
#modify_5p_align(new_q_beg, final_hit, query_fasta) ⇒ Object
For visual report.
- #save_annotations(seq, final_hit, type, status, final_prot, query_fasta, db_name) ⇒ Object
- #set_start_codon(final_hit, distance, full_prot, query_fasta) ⇒ Object
-
#show_nts ⇒ Object
VERBOSE METHODS.
Instance Method Details
#analiza_orf_y_fl(seq, hit, options, db_name) ⇒ Object
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 |
# File 'lib/full_lengther_next/classes/fl_analysis.rb', line 8 def analiza_orf_y_fl(seq, hit, , db_name) query_fasta = seq.seq_fasta.upcase.dup # Upcase for prevents complications with masked sequences, dup for discard changes if hit.count > 1 # if the sequence has more than one hit, the frames are checked and fixed to get a single hit seq_unida = UneLosHit.new(hit, query_fasta) full_prot = seq_unida.full_prot query_fasta = seq_unida.output_seq # repaired fasta final_hit = seq_unida.final_hit # single hit $global_warnings += seq_unida.msgs # warning messages else query_fasta = reverse_seq(query_fasta, hit.first) if hit.first.q_frame < 0 # si la secuencia esta al reves le damos la vuelta final_hit = hit.first # single hit end query_fasta = exonerate_fix_frame_shift(query_fasta, hit) if [:exonerate] full_prot = query_fasta[final_hit.q_frame-1, query_fasta.length+1].translate original_query_coordinates = [final_hit.q_beg, final_hit.q_end] ## VERBOSE seq.show_alignment(final_hit, query_fasta, show_nts) if $verbose > 2 ## VERBOSE atg_status, tmp_prot = set_start_codon(final_hit, [:distance], full_prot, query_fasta) end_status, final_prot = find_end(final_hit, [:distance], tmp_prot, query_fasta) puts "\n------------------- POST EXTENSION---------------------" if $verbose > 1 ## VERBOSE seq.show_alignment(final_hit, query_fasta, show_nts, original_query_coordinates) if $verbose > 1 ## VERBOSE puts "ATG: #{atg_status} STOP: #{end_status}" if $verbose > 2 ## VERBOSE # decide the sequence status (Complete, Putative Complete, Internal, N-terminus, Putative N-terminus, C-terminus) type, status = determine_status(atg_status, end_status) status = compare_seq_length_with_subject(final_prot, [:distance], final_hit, type, status) if final_prot.length >= 25 && final_prot.length.to_f/final_hit.full_subject_length >= [:subject_coverage] # Prot length min of 25 aa and subject coverage by generated prot of 25% save_annotations(seq, final_hit, type, status, final_prot, query_fasta, db_name) end end |
#compare_seq_length_with_subject(final_prot, distance, final_hit, type, status) ⇒ Object
183 184 185 186 187 188 189 190 191 192 193 194 195 196 |
# File 'lib/full_lengther_next/classes/fl_analysis.rb', line 183 def compare_seq_length_with_subject(final_prot, distance, final_hit, type, status) if final_prot.length - 2 * distance > final_hit.s_len $global_warnings << ['SeqLonger', final_prot.length, final_hit.s_len] elsif final_prot.length + 2 * distance < final_hit.s_len $global_warnings << ['SeqShorter', final_prot.length, final_hit.s_len] if final_prot.length + 100 < final_hit.s_len || final_prot.length*2 < final_hit.s_len if type == COMPLETE status = FALSE $global_warnings << 'VeryShorter' end end end return status end |
#determine_status(atg_status, end_status) ⇒ Object
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 |
# File 'lib/full_lengther_next/classes/fl_analysis.rb', line 162 def determine_status(atg_status, end_status) if atg_status != 'incomplete' && end_status != 'incomplete' # proteina completa type = COMPLETE elsif atg_status == 'incomplete' && end_status == 'incomplete' # region intermedia type = INTERNAL elsif atg_status != 'incomplete' && end_status == 'incomplete' # tenemos el principio de la proteina type = N_TERMINAL elsif atg_status == 'incomplete' && end_status != 'incomplete' # tenemos el final de la proteina type = C_TERMINAL end if atg_status == 'putative' || end_status == 'putative' status = FALSE # Putative else status = TRUE # Sure end return type, status end |
#exonerate_fix_frame_shift(query_fasta, hit) ⇒ Object
239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 |
# File 'lib/full_lengther_next/classes/fl_analysis.rb', line 239 def exonerate_fix_frame_shift(query_fasta, hit) frame_shifts = [] added_nts = 0 hit.each_with_index do |hsp, num| if hsp.class.to_s == 'ExoBlastHit' #Only this type of class of BlastHit has frameshift attributes if !hsp.q_frameshift.empty? #There is frameshift hsp.q_frameshift.each do |position, num_nts| local_add = 3 - num_nts fs_final_position = position + num_nts $global_warnings << ['ExFrameS', fs_final_position] frame_shifts << [fs_final_position, local_add] added_nts += local_add end end end hsp.q_beg += added_nts if num > 0 hsp.q_end += added_nts end add = 0 frame_shifts.each do |position, num_nts| query_fasta = query_fasta.insert(position+add, 'n'*num_nts) add += num_nts end return query_fasta end |
#find_end(final_hit, max_distance, tmp_prot, query_fasta) ⇒ Object
105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
# File 'lib/full_lengther_next/classes/fl_analysis.rb', line 105 def find_end(final_hit, max_distance, tmp_prot, query_fasta) frame_shift = check_frame_shift(final_hit) beg_end_string =(final_hit.q_end-final_hit.q_beg)/3 - max_distance # Begin of terminal region (Coordinate) in tmp_prot atg_substring = tmp_prot[0..beg_end_string] # prot without terminal region end_substring = tmp_prot[beg_end_string + 1 ..tmp_prot.length-1] #Take 3' of unigen #puts "\e[32m\nfinal_hit.q_end-final_hit.q_beg: #{final_hit.q_end-final_hit.q_beg} /3 - max_distance: #{max_distance}\e[0m" #puts "\e[33mbeg_end_string: #{beg_end_string}\e[0m" #puts "\e[35mtmp_prot.length: #{tmp_prot.length}\e[0m" if beg_end_string < 0 || end_substring.nil? #Sequences whose homology is at end of it and dont't exits the 3' part of unigene atg_substring = tmp_prot end_substring = '' end_status = 'incomplete' else end_status = 'putative' putative_end = end_substring.index('*') end_substring = end_substring[0 .. putative_end] if putative_end s_end_resto = final_hit.s_len - (final_hit.s_end + 1) # en el subject, numero de aas que necesito cubrir q_end_resto = (query_fasta.length - final_hit.q_end)/3 # en el query, numero de aas que tengo sq_end_distance = q_end_resto - s_end_resto # La diferencia se hace a partir del final del hit para que el calculo no quede sesgado en caso de que la secuecia este truncada por 5' if (final_hit.align_len == final_hit.s_len && putative_end)||(sq_end_distance.abs <= max_distance && putative_end && putative_end <= max_distance*2) #Stop in a Full-length. max_distance *2 is set by de margin of +-15aa at the end of aligment end_status = 'complete' elsif sq_end_distance < max_distance # si no tenemos suficiente secuencia para tener el stop (nos faltan 15 aas o mas) end_status = 'incomplete' if putative_end $global_warnings << ['UnexpSTOP3pDist', sq_end_distance.abs] else $global_warnings << ['DistSubj', sq_end_distance.abs] end else # tenemos suficiente secuencia if putative_end # tenemos un stop #beg_end_string indica en que punto del unigen se encuentra el area de busqueda del codon stop stop_q_s = beg_end_string + putative_end - final_hit.s_len # Space between query's stop and subject's stop if stop_q_s.abs <= max_distance #Stop codon is in search region end_status = 'complete' elsif stop_q_s < 0 $global_warnings << 'UnexpSTOP3p' elsif stop_q_s > 0 end_status = 'complete' $global_warnings << 'QueryTooLong' end else # no tenemos codon de parada pero tenemos suficiente secuencia end_status = 'incomplete' $global_warnings << 'ProtFusion' end end end final_prot = atg_substring + end_substring end_status = 'complete' if final_prot.length == final_hit.s_len+1 && final_prot[final_prot.length-1] == '*' new_q_end = final_hit.q_beg-1 + final_prot.length * 3 + frame_shift modify_3p_align(new_q_end, final_hit, query_fasta, final_prot) if $verbose > 1 final_hit.q_end = new_q_end return end_status, final_prot end |
#find_start(subject_start, amine_seq, distance) ⇒ Object
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 |
# File 'lib/full_lengther_next/classes/fl_analysis.rb', line 68 def find_start(subject_start, amine_seq, distance) atg_status = 'putative' # complete, incomplete or putative stop_codon = amine_seq.rindex('*') if !stop_codon.nil? # tenemos un codon de parada en el amine_seq 5 prima _5prime_UTR = amine_seq.length - 10 - subject_start # marcamos la distancia al s_beg desde el principio del amine_seq amine_seq = amine_seq[stop_codon + 1 .. amine_seq.length - 1] first_m = amine_seq.index('M') if stop_codon <= _5prime_UTR # Ver si stop está en zona 5 prima UTR if first_m # tenemos M amine_seq = amine_seq[first_m .. amine_seq.length - 1] atg_status = 'complete' else # con STOP pero sin M $global_warnings << 'noM1' end else # esta despues, un cambio de fase impide analizar el principio $global_warnings << 'UnexpSTOP5p' amine_seq = amine_seq[first_m .. amine_seq.length - 1] if first_m # tenemos M end else # no hay stop codon first_m = amine_seq.index('M') if first_m # tenemos M amine_seq = amine_seq[first_m .. amine_seq.length - 1] m_distance = (subject_start - amine_seq.length).abs - 10 if m_distance.abs > distance*2 # con atg pero muy lejos del inicio que marca el subject $global_warnings << 'NoStopMfar' atg_status = 'incomplete' else # Tenemos M atg_status = 'complete' end else # sin M $global_warnings << 'noM2' end end return amine_seq, atg_status end |
#mark_nt_seqs(final_hit, query_fasta) ⇒ Object
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 |
# File 'lib/full_lengther_next/classes/fl_analysis.rb', line 221 def mark_nt_seqs(final_hit, query_fasta) atg = query_fasta[final_hit.q_beg..final_hit.q_beg + 2] mark_atg = nil if atg == 'ATG' mark_atg = '_-_' end stop = query_fasta[final_hit.q_end - 2..final_hit.q_end] mark_stop = nil if stop == 'TAG' || stop == 'TGA' || stop == 'TAA' mark_stop = '___' end seq5p = query_fasta[0..final_hit.q_beg-1] orf = query_fasta[final_hit.q_beg..final_hit.q_end] seq3p = query_fasta[final_hit.q_end..query_fasta.length] nt_seq = "#{seq5p}#{mark_atg}#{orf}#{mark_stop}#{seq3p}" return nt_seq end |
#modify_3p_align(new_q_end, final_hit, query_fasta, final_prot) ⇒ Object
For visual report
274 275 276 277 278 279 280 281 282 |
# File 'lib/full_lengther_next/classes/fl_analysis.rb', line 274 def modify_3p_align(new_q_end, final_hit, query_fasta, final_prot) ## For visual report if new_q_end > final_hit.q_end #There is an align extension extend_align = query_fasta[final_hit.q_end+1 .. new_q_end].translate final_hit.q_seq = final_hit.q_seq + extend_align elsif new_q_end < final_hit.q_end #The align is cutted upper_limit = final_prot.length - 1 + final_hit.q_seq.count('-') final_hit.q_seq = final_hit.q_seq[0 .. upper_limit] end end |
#modify_5p_align(new_q_beg, final_hit, query_fasta) ⇒ Object
For visual report
285 286 287 288 289 290 291 292 293 294 295 |
# File 'lib/full_lengther_next/classes/fl_analysis.rb', line 285 def modify_5p_align(new_q_beg, final_hit, query_fasta) ## For visual report if new_q_beg < final_hit.q_beg #There is an align extension extend_align = query_fasta[new_q_beg .. final_hit.q_beg-1].translate final_hit.q_seq = extend_align + final_hit.q_seq elsif new_q_beg > final_hit.q_beg #The align is cut seq_cut = (new_q_beg - final_hit.q_beg)/3 gaps = final_hit.q_seq[0..seq_cut].count('-') seq_cut += gaps final_hit.q_seq = final_hit.q_seq[seq_cut .. final_hit.q_seq.length-1] end end |
#save_annotations(seq, final_hit, type, status, final_prot, query_fasta, db_name) ⇒ Object
199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 |
# File 'lib/full_lengther_next/classes/fl_analysis.rb', line 199 def save_annotations(seq, final_hit, type, status, final_prot, query_fasta, db_name) # if the sequence is Complete or it hasn't previous info will be saved if seq.type == UNKNOWN || (type == COMPLETE && seq.type != COMPLETE) seq.type = type seq.status = status seq.db_name = db_name seq.seq_fasta = query_fasta seq.seq_aa = final_prot seq.hit = final_hit seq.warnings($global_warnings) $global_warnings = [] # Clean all warnings for current sequence seq.seq_nt = mark_nt_seqs(final_hit, query_fasta) if type == COMPLETE seq.ignore = TRUE end end if $verbose > 2 puts "\e[1mStruct annot: #{seq.prot_annot_calification}\e[0m" end end |
#set_start_codon(final_hit, distance, full_prot, query_fasta) ⇒ Object
41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
# File 'lib/full_lengther_next/classes/fl_analysis.rb', line 41 def set_start_codon(final_hit, distance, full_prot, query_fasta) q_index_start = contenidos_en_prot(final_hit.q_seq, full_prot) atg_status = nil _5prima = q_index_start + distance if final_hit.s_beg == 0 && final_hit.q_seq[0] == 'M' && final_hit.s_seq[0] == 'M' #there is M in query and subject at first position of alignment and subject's M is in first position atg_status = 'complete' tmp_prot = full_prot[q_index_start..full_prot.length] elsif _5prima >= final_hit.s_beg amine_seq = full_prot[0, _5prima] #Contiene parte amino de la proteina carboxile_seq = full_prot[_5prima, full_prot.length - _5prima] #Contiene parte carboxilo de la proteina hasta el fin de la secuencia length_before_cut = amine_seq.length amine_seq, atg_status = find_start(final_hit.s_beg, amine_seq, distance) # to look for the beginning of the protein tmp_prot = "#{amine_seq}#{carboxile_seq}" # merge seqs in prot new_q_beg = final_hit.q_frame-1 + (length_before_cut - amine_seq.length) * 3 modify_5p_align(new_q_beg, final_hit, query_fasta) if $verbose > 1 ## VERBOSE, Modify query align final_hit.q_beg = new_q_beg # to get the value of the start_ORF index else $global_warnings << 'UnexpStopBegSeq' if full_prot[0, q_index_start].rindex('*') atg_status = 'incomplete' tmp_prot = full_prot end return atg_status, tmp_prot end |
#show_nts ⇒ Object
VERBOSE METHODS
267 268 269 270 271 |
# File 'lib/full_lengther_next/classes/fl_analysis.rb', line 267 def show_nts show = FALSE show = TRUE if $verbose && $verbose > 3 return show end |