Class: UneLosHit

Inherits:
Object
  • Object
show all
Defined in:
lib/full_lengther_next/classes/une_los_hit.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(q, query_fasta, pident_threshold) ⇒ UneLosHit

Returns a new instance of UneLosHit.



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
# File 'lib/full_lengther_next/classes/une_los_hit.rb', line 9

def initialize(q, query_fasta, pident_threshold)
			
	(mismas_ids_array, query_fasta, wrong_seq) = hits_misma_id(q, query_fasta)
	
	@wrong_seq = wrong_seq
	@mismas_ids_array = mismas_ids_array
	@msgs = ''
	@number_x = 0
	num_x = ''
	
	@output_seq = query_fasta
	
	if (mismas_ids_array.count > 1)
		mismas_ids_array.sort! {|h1,h2| h1.q_beg <=> h2.q_beg} # ordenamos los hit por el inicio del query
		@final_hit = mismas_ids_array[0].dup

		mismas_ids_array.each do |hit|
			if (hit.ident >= pident_threshold)
				# if ($verbose)
					# puts "#{hit.acc}\tsc:#{hit.score}\teval:#{hit.e_val}\tid:#{hit.ident}\tframe:#{hit.q_frame}\tqb:#{hit.q_beg + 1}\tqe:#{hit.q_end + 1}\tsb:#{hit.s_beg + 1}\tse:#{hit.s_end + 1}"
					# puts "#{query_fasta[hit.q_beg..hit.q_end].translate}"
				# end
			
				same = same_hit(hit)
		
				if (!same)
					if (@final_hit.q_frame == hit.q_frame)
						same_frame_hits(hit)
					else
						# condiciones para corregir el frame en el que tiene que continuar la seq de nt
						correccion_x = 0
						if (@final_hit.q_frame - hit.q_frame == 1)
							correccion_x = 1
						elsif (@final_hit.q_frame - hit.q_frame == 2)
							correccion_x = 2
						elsif (@final_hit.q_frame - hit.q_frame == -1)
							correccion_x = 2
						elsif (@final_hit.q_frame - hit.q_frame == -2)
							correccion_x = 1
						end
					
						# las secuencias solapan en el query
						if ((@final_hit.q_end >= hit.q_beg) && (@final_hit.q_end < hit.q_end)) && (hit.q_end > @final_hit.q_end + 15)
							overlapped_hits(hit, correccion_x, q)
							# puts "---#{@output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate}"
					
						elsif (@final_hit.q_end < hit.q_beg) && (hit.q_end > @final_hit.q_end + 15) # las secuencias estan separadas en el query
							separated_hits(hit, correccion_x, q)
							# puts "----------#{@output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate}"
						else
							@msgs = 'warning!, putative chimeric sequence! or repetitive structure'
							# puts "\nWARNING!, PUTATIVE CHIMERIC SEQUENCE !!!\n\n"
						end
					end
				end
			end # pident
		end

	else

		@final_hit = mismas_ids_array[0].dup
		
		# if ($verbose)
			# puts "***#{@final_hit.acc}\t#{@final_hit.score}\t#{@final_hit.e_val}\t#{@final_hit.ident}\t#{@final_hit.align_len}\t#{@final_hit.q_frame}\t#{@final_hit.q_beg + 1}\t#{@final_hit.q_end + 1}\t#{@final_hit.s_beg + 1}\t#{@final_hit.s_end + 1}\t#{@final_hit.q_seq}"
			# puts "#{query_fasta[@final_hit.q_beg..@final_hit.q_end].translate}"
		# end
	end
	# puts "***#{@final_hit.acc}\t#{@final_hit.score}\t#{@final_hit.e_val}\t#{@final_hit.ident}\t#{@final_hit.align_len}\t#{@final_hit.q_frame}\t#{@final_hit.q_beg + 1}\t#{@final_hit.q_end + 1}\t#{@final_hit.s_beg + 1}\t#{@final_hit.s_end + 1}\t#{@final_hit.q_seq}"
	# puts "#{query_fasta[@final_hit.q_beg..@final_hit.q_end].translate}"
	
	@full_prot = @output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate
	# puts "\nfull_prot_ulh: \n#{full_prot}"
	
	(@is_ok, @q_index_start) = contenidos_en_prot(@final_hit, @full_prot, q)
	
end

Instance Attribute Details

#final_hitObject (readonly)

Returns the value of attribute final_hit.



7
8
9
# File 'lib/full_lengther_next/classes/une_los_hit.rb', line 7

def final_hit
  @final_hit
end

#full_protObject (readonly)

Returns the value of attribute full_prot.



7
8
9
# File 'lib/full_lengther_next/classes/une_los_hit.rb', line 7

def full_prot
  @full_prot
end

#is_okObject (readonly)

Returns the value of attribute is_ok.



7
8
9
# File 'lib/full_lengther_next/classes/une_los_hit.rb', line 7

def is_ok
  @is_ok
end

#msgsObject (readonly)

Returns the value of attribute msgs.



7
8
9
# File 'lib/full_lengther_next/classes/une_los_hit.rb', line 7

def msgs
  @msgs
end

#number_xObject (readonly)

Returns the value of attribute number_x.



7
8
9
# File 'lib/full_lengther_next/classes/une_los_hit.rb', line 7

def number_x
  @number_x
end

#output_seqObject (readonly)

Returns the value of attribute output_seq.



7
8
9
# File 'lib/full_lengther_next/classes/une_los_hit.rb', line 7

def output_seq
  @output_seq
end

#q_index_startObject (readonly)

Returns the value of attribute q_index_start.



7
8
9
# File 'lib/full_lengther_next/classes/une_los_hit.rb', line 7

def q_index_start
  @q_index_start
end

#wrong_seqObject (readonly)

Returns the value of attribute wrong_seq.



7
8
9
# File 'lib/full_lengther_next/classes/une_los_hit.rb', line 7

def wrong_seq
  @wrong_seq
end

Instance Method Details

#hits_misma_id(q, query_fasta_ori) ⇒ Object

creamos un array en el que esten solo los hits con la misma id.



248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
# File 'lib/full_lengther_next/classes/une_los_hit.rb', line 248

def hits_misma_id(q, query_fasta_ori)
	
	# Se les hace la reveso complementaria si es necesario, y se comprueba que para un mismo query no hay hits en sentidos diferentes
	wrong_seq = false
	
	misma_id = []
	
	query_fasta = query_fasta_ori.dup
	frame_ori = q.hits[0].q_frame

	q.hits.each do |h|

		# puts "#{q.query_def} f_ori :#{frame_ori} y h_f: #{h.q_frame}"
		reversed_hit = false # con respecto al primer hit, que es el de mayor score o evalue
		if (h.acc == q.hits[0].acc)

			# comprobar si los frames tiene el mismo sentido
			if ((frame_ori < 0 && h.q_frame > 0) or (frame_ori > 0 && h.q_frame < 0))
				wrong_seq = true
				reversed_hit = true
				# puts "The sequence #{q.query_def} contains sense and antisense hits!!! #{frame_ori} y #{h.q_frame}"
			end
			
			if (reversed_hit == false)
				if (h.q_frame.to_i < 0) # si la secuencia esta al reves le damos la vuelta
					(query_fasta, h.q_frame, h.q_beg, h.q_end) = reverse_seq(query_fasta_ori, h.q_frame, h.q_beg, h.q_end)
					h.reversed = true
				end
				misma_id.push h
			end

		end

	end

	return [misma_id, query_fasta, wrong_seq]
end

#overlapped_hits(hit, correccion_x, q) ⇒ Object



117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
# File 'lib/full_lengther_next/classes/une_los_hit.rb', line 117

def overlapped_hits(hit,correccion_x,q)
  # puts q.inspect
	# puts "los hits solapan!!!"
	
	if (@msgs.empty?)
		@msgs = "Overlapping hits, possible frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
	else
		@msgs += " and overlapping frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
	end
	
	# -------------------------------------   preparamos los nt y aas que solapan
	overlapped_nt = 0
	overlapped_nt = (@final_hit.q_end - hit.q_beg + 1)

	overlapped_aas = 0
	overlapped_aas = (@final_hit.s_end - hit.s_beg + 1)
	# ------------------------------------- 
	if (overlapped_nt % 3 == 1)
		overlapped_nt += 2
	elsif (overlapped_nt % 3 == 2)
		overlapped_nt += 1
	end
	# -------------------------------------   calculamos el numero de x a aƱadir 
	@number_x = (((correccion_x + overlapped_nt)/3)+1)*3
	@number_x_aa = overlapped_aas
	num_x = ''
	num_x_aa = ''

	if (@number_x.to_i > 0)
		num_x = 'x'*@number_x.to_i
	elsif (@number_x.to_i == 0)
		num_x = ''
	else
		@msgs = "ERROR#2 unexpected negative index in x_number, "
		# puts "ERROR#2 unexpected negative index in x_number"
	end

	if (@number_x_aa.to_i > 0)
		num_x_aa = 'x'*@number_x_aa.to_i
	elsif (@number_x_aa.to_i == 0)
		num_x_aa = ''
	else
		num_x_aa = 'x'*@number_x_aa.to_i.abs
		@msgs = "Warning!, your query overlaps and the subject is separated, "
	end

	if (@number_x_aa.to_i >= 0)
		@final_hit.q_seq = "#{@final_hit.q_seq[0..@final_hit.q_seq.length - 1 - overlapped_aas]}#{num_x_aa}#{hit.q_seq[overlapped_aas..hit.q_seq.length]}"
		@final_hit.s_seq = "#{@final_hit.s_seq[0..@final_hit.s_seq.length - 1 - overlapped_aas]}#{num_x_aa}#{hit.s_seq[overlapped_aas..hit.s_seq.length]}"
	else
		@final_hit.q_seq = "#{@final_hit.q_seq[0..@final_hit.q_seq.length - 1]}#{num_x_aa}#{hit.q_seq[0..hit.q_seq.length]}"
		@final_hit.s_seq = "#{@final_hit.s_seq[0..@final_hit.s_seq.length - 1]}#{num_x_aa}#{hit.s_seq[0..hit.s_seq.length]}"
	end
	
	
	output_seq_tmp = "#{@output_seq[0..(@final_hit.q_end - overlapped_nt)]}#{num_x}#{@output_seq[(hit.q_beg + overlapped_nt)..(@output_seq.length)]}"

	full_prot_tmp = output_seq_tmp[@final_hit.q_frame-1, output_seq_tmp.length+1].translate
	
	(is_ok_tmp, q_index_start_tmp) = contenidos_en_prot(@final_hit, full_prot_tmp, q)

	@output_seq = output_seq_tmp.dup
	
	@final_hit.q_beg = @final_hit.q_beg
	@final_hit.q_end = hit.q_end
	
	@final_hit.s_beg = [@final_hit.s_beg,hit.s_beg].min
	@final_hit.s_end = [@final_hit.s_end,hit.s_end].max
	
end

#same_frame_hits(hit) ⇒ Object



105
106
107
108
109
110
111
112
113
# File 'lib/full_lengther_next/classes/une_los_hit.rb', line 105

def same_frame_hits(hit)
	
	@final_hit.q_beg = @final_hit.q_beg
	@final_hit.q_end = hit.q_end
	
	@final_hit.s_beg = [@final_hit.s_beg,hit.s_beg].min
	@final_hit.s_end = [@final_hit.s_end,hit.s_end].max
	
end

#same_hit(hit) ⇒ Object



88
89
90
91
92
93
94
95
96
97
98
99
100
101
# File 'lib/full_lengther_next/classes/une_los_hit.rb', line 88

def same_hit(hit)

	same = false
	if (hit.score == @final_hit.score &&
		hit.q_beg == @final_hit.q_beg &&
		hit.q_end == @final_hit.q_end &&
		hit.s_beg == @final_hit.s_beg &&
		hit.s_end == @final_hit.s_end)
	
		same = true
	end

	return same
end

#separated_hits(hit, correccion_x, q) ⇒ Object



190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# File 'lib/full_lengther_next/classes/une_los_hit.rb', line 190

def separated_hits(hit,correccion_x,q)
	
	# puts "los hits estan separados!!!"
	
	if (@msgs.empty?)
		@msgs = "Separated hits, possible frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
	else
		@msgs += " and possible frame ERROR between #{@final_hit.q_end + 1} and #{hit.q_beg + 1}, "
	end
	
	# PROBAR CON % 3!!!!!!!!!!!!!!!!!!!!!!!!
	@number_x = (hit.q_beg - @final_hit.q_end - 1)
	# @number_x = (hit.q_beg - @final_hit.q_end - 1) + correccion_x
	if (@number_x % 3 == 1)
		@number_x += 2
	elsif (@number_x % 3 == 2)
		@number_x += 1
	end
	

	if (@number_x.to_i > 0)
		num_x = 'x'*@number_x.to_i
		num_x_aa = 'x'*(@number_x.to_i/3)
	elsif (@number_x.to_i == 0)
		num_x = ''
		num_x_aa = ''
	else
		@msgs = "ERROR#2 unexpected negative index in x_number"
		# puts "ERROR#2 unexpected negative index in x_number"
	end

	@output_seq = @output_seq[0..@final_hit.q_end-1]+num_x+@output_seq[hit.q_beg-1..@output_seq.length-1]
	# @output_seq = @output_seq[0..@final_hit.q_end-1]+num_x+@output_seq[hit.q_beg-1..@output_seq.length-1]

	@final_hit.score += 1
	@final_hit.q_beg = @final_hit.q_beg
	@final_hit.q_end = hit.q_end
	@final_hit.s_beg = @final_hit.s_beg
	@final_hit.s_end = hit.s_end

	@final_hit.q_seq = "#{@final_hit.q_seq[0, @final_hit.q_seq.length]}#{num_x_aa}#{hit.q_seq[0, hit.q_seq.length]}"
	@final_hit.s_seq = "#{@final_hit.s_seq[0, @final_hit.s_seq.length]}#{num_x_aa}#{hit.s_seq[0, hit.s_seq.length]}"

	num_x = ''
	num_x_aa = ''
	
	full_prot_tmp = @output_seq[@final_hit.q_frame-1, @output_seq.length+1].translate
	# puts "\n\nfull_prot_tmp:#{full_prot_tmp}"
	# puts "\n\n#{@output_seq[@final_hit.q_frame-1, @output_seq.length+1]}"
	
	(is_ok_tmp, q_index_start_tmp) = contenidos_en_prot(@final_hit, full_prot_tmp, q)
	
	# puts "#{q.query_def}: is_ok_tmp: #{is_ok_tmp} separated hits"
end