Class: BlastTableResult

Inherits:
BlastResult show all
Defined in:
lib/scbi_blast/blast_table_result.rb

Overview

Extracts results from blast table’s file and uses it to create instances of “BlastQuery” and “BlastHit”

Instance Attribute Summary

Attributes inherited from BlastResult

#querys

Instance Method Summary collapse

Methods inherited from BlastResult

#compare?, #empty?, #find_query, #inspect, #size

Constructor Details

#initialize(input) ⇒ BlastTableResult

Parser initialization



31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/scbi_blast/blast_table_result.rb', line 31

def initialize(input)

  super(input)

  return if input.empty?
  
  if input.is_a?(Array)
    lines=input

  else

    fich = File.open(input,'r')
    lines = fich.readlines
    fich.close

  end
  parse(lines)
end

Instance Method Details

#parse(lines) ⇒ Object



50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/scbi_blast/blast_table_result.rb', line 50

def parse(lines)
  
  with_comments=false

  if lines.first.index('#')==0 
      with_comments=true
      if !(lines.last =~ /# BLAST processed (\d+) queries/)
          raise "Blast didn't processed your queries"
      end
  end
  
  query_name=''

  lines.each do |line|

    line.chomp! #delete end of line

    if line =~ /^\s*#/
      if line =~ /^#\sQuery:\s+(.+)$/
        query_name = $1
      elsif line =~ /^#\s0\shits\sfound$/
        @querys.push BlastQuery.new(query_name)
      end
      
      # 0 hits found

    else
      params = line.split(/\t+/)

      # puts "Extracted #{params[0]} #{params[1]} #{params[2]} #{params[3]} #{params[4]} #{params[5]} #{params[6]} #{params[7]} #{params[8]} #{params[9]} #{params[10]} #{params[11]}"
      #         Options 6, 7, and 10 can be additionally configured to produce
      #   a custom format specified by space delimited format specifiers.
      #   The supported format specifiers are:
      #            qseqid means Query Seq-id
      #               qgi means Query GI
      #              qacc means Query accesion
      #            sseqid means Subject Seq-id
      #         sallseqid means All subject Seq-id(s), separated by a ';'
      #               sgi means Subject GI
      #            sallgi means All subject GIs
      #              sacc means Subject accession
      #           sallacc means All subject accessions
      #            qstart means Start of alignment in query
      #              qend means End of alignment in query
      #            sstart means Start of alignment in subject
      #              send means End of alignment in subject
      #              qseq means Aligned part of query sequence
      #              sseq means Aligned part of subject sequence
      #            evalue means Expect value
      #          bitscore means Bit score
      #             score means Raw score
      #            length means Alignment length
      #                        pident means Percentage of identical matches
      #            nident means Number of identical matches
      #          mismatch means Number of mismatches
      #          positive means Number of positive-scoring matches
      #           gapopen means Number of gap openings
      #              gaps means Total number of gaps
      #              ppos means Percentage of positive-scoring matches
      #            frames means Query and subject frames separated by a '/'
      #            qframe means Query frame
      #            sframe means Subject frame
      #   When not provided, the default value is:
      #   'qseqid sseqid pident length mismatch gapopen qstart qend sstart send
      #   evalue bitscore', which is equivalent to the keyword 'std'

      # if  the query doesn't exist, then create a new one,
      # else the hit will be added to the last query

      qseqid,sacc,pident,length,mismatch,gapopen,qstart,qend,sstart,send,evalue,bitscore,score,qframe,sframe,qseq,sseq,qlen,slen,stitle = params
      
      # if format6 
      #if !with_comments and query_name!=qseqid
      #    @querys.push BlastQuery.new(query_name)
      #    query_name=qseqid
      #end

      # creates the hit
      hit = BlastHit.new(qstart,qend,sstart,send)

      hit.align_len=length
      hit.ident=pident

      hit.gaps=gapopen
      hit.mismatches=mismatch
      hit.e_val=evalue
      hit.bit_score=bitscore

      hit.score = score
      hit.q_frame = qframe
      hit.s_frame = sframe

      hit.subject_id = sacc
      hit.full_subject_length=slen # era 0
      hit.definition=stitle # era sacc
      hit.acc=sacc
      hit.q_seq=qseq
      hit.s_seq=sseq
      hit.q_len=qlen
      hit.s_len=slen
      

      query=find_query(@querys,qseqid)

      if (query)   #if it is a new query, it is created and added
        query.add_hit(hit)

      else      # else the hit is added in last query added
        query = BlastQuery.new(qseqid)
        query.add_hit(hit)
        @querys.push query
      end

      query.full_query_length=qlen

      #Description

      # read_blast_tab read tabular BLAST format created with blast_seq and written to file with write_blast - or with blastall and the -m 8 or -m 9 switch.
      #           Each column in the table corresponds to the following keys:
      #
      #              1. Q_ID - Query ID.
      #              2. S_ID - Subject ID.
      #              3. IDENT - Identity (%).
      #              4. ALIGN_LEN - Alignment length.
      #              5. MISMATCHES - Number of mismatches.
      #              6. GAPS - Number of gaps.
      #              7. Q_BEG - Query begin.
      #              8. Q_END - Query end.
      #              9. S_BEG - Subject begin.
      #             10. S_END - Subject end.
      #             11. E_VAL - Expect value.
      #             12. BIT_SCORE - Bit score.
      #
      #           Furthermore, two extra keys are added to the record:
      #
      #               * STRAND - Strand.
      #               * REC_TYPE - Record type.
    end
  end

  #inspect

end