Class: BatchBlast

Inherits:
Object
  • Object
show all
Defined in:
lib/scbi_blast/batch_blast.rb

Overview

class to execute Blast without temporary files (it uses pipes)

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(database, blast_type = 'blastn', extra_params = '') ⇒ BatchBlast

class initialization



28
29
30
31
32
33
34
# File 'lib/scbi_blast/batch_blast.rb', line 28

def initialize(database, blast_type = 'blastn', extra_params = '')

  @blast_type = blast_type
  @database = database
  @extra_params = extra_params

end

Class Method Details

.do_blast_cmd(seq_fasta, cmd) ⇒ Object



119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
# File 'lib/scbi_blast/batch_blast.rb', line 119

def self.do_blast_cmd(seq_fasta, cmd)
  res=''
  if !seq_fasta.empty?
    # Ojo, que una vez nos ibamos a volver locos buscando porque esto no devolvia todos los hits que se  encontraban al ejecutar el blast a mano, y era porque en el blast a mano le estabamos pasando la secuencia completa mientras que en el MID le estabamos pasando solo los 20 primeros nt.

    # Change the buffering type in factor command,
    # assuming that factor uses stdio for stdout buffering.
    # If IO.pipe is used instead of PTY.open,
    # this code deadlocks because factor's stdout is fully buffered.
    
    
    # require 'pty'
    # require 'io/console' # for IO#raw!
    # res = []
    # m, s = PTY.open
    # s.raw! # disable newline conversion.
    # r, w = IO.pipe
    # pid = spawn(cmd, :in=>r, :out=>s)
    # r.close
    # s.close
    # w.puts seq_fasta
    # w.close
    # while !m.eof do
    #   res << m.gets
    # end
    
    
    
    # puts "="*60
    # puts res
    # puts "="*60
    require 'tempfile'
      if !ENV['SCBI_BLAST_TMPDIR'].nil?
        file = Tempfile.new('scbi_blast_',ENV['SCBI_BLAST_TMPDIR'])
      else
        file = Tempfile.new('scbi_blast_')
      end
    begin
      file.puts seq_fasta
      file.close
      
      res=`#{cmd} -query #{file.path}`
      res=res.split("\n")
      
      if !$?.exitstatus.nil? && $?.exitstatus>0
        raise "Error doing blast #{cmd} to fasta: #{seq_fasta}"
      end
      
       # puts "FILEPATH"+file.path
    ensure
       file.close!   # Closes the file handle. If the file wasn't unlinked
                     # because #unlink failed, then this method will attempt
                     # to do so again.
       file.unlink   # On Windows this silently fails.
    end
    
        # IO.popen(cmd,'w+') {|blast|
        #         blast.sync = true
        #         blast.write(seq_fasta)
        #         blast.close_write
        #         res = blast.readlines
        #         blast.close_read
        #       }
        #         
    # if !$?.exitstatus.nil? && $?.exitstatus>0
    #   raise "Error doing blast #{cmd} to fasta: #{seq_fasta}"
    # end
  end
  
  return res

end

Instance Method Details

#closeObject



209
210
211
# File 'lib/scbi_blast/batch_blast.rb', line 209

def close

end

#do_blast(seqs, fmt = :table, parse_output = true, file = nil) ⇒ Object

do a blast to seqs



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# File 'lib/scbi_blast/batch_blast.rb', line 67

def do_blast(seqs, fmt = :table,parse_output=true,file=nil)

  if seqs.is_a?(Array)
    seq_fasta=seqs.join("\n")
  else
    seq_fasta=seqs
  end

  cmd = get_blast_cmd(fmt,file)

  if !seqs.empty?
      res = BatchBlast.do_blast_cmd(seq_fasta,cmd)
      if !file.nil? and !file.empty?
        res=file
      end
  else
    res=''
  end



  # check if all sequences where processed
  if parse_output
    if fmt == :table

      res = BlastTableResult.new(res)
    elsif fmt == :xml
      res = BlastStreamxmlResult.new(res)
      # elsif fmt ==:xml2
      # res = BlastXmlResult.new(res)
    end

    # puts "#{seq_fasta.count('>')}, #{res.querys.count}"

    if seq_fasta.count('>')!=res.querys.count
      not_processed = seqs.select{|e| e.index('>')}

      res.querys.each do |query|
        if not_processed.include?('>'+query.query_id)
          not_processed.delete('>'+query.query_id)
        end
      end

      raise "If using table format, please, use format 7. These queries where empty or not processed: #{seq_fasta.count('>')},#{res.querys.count}  by CMD: #{cmd}:\n #{not_processed} \n Full_data:\n seqs=#{seqs};\n"
    end

  end

  return res

end

#do_blast_seqs(seqs, fmt = :table, parse_output = true, file = nil) ⇒ Object

do blast to an array of Sequence objects



193
194
195
196
197
198
199
200
201
202
203
204
205
206
# File 'lib/scbi_blast/batch_blast.rb', line 193

def do_blast_seqs(seqs, fmt = :table,parse_output=true, file=nil)

  # cmd = get_blast_cmd(fmt)

  fastas=[]

  seqs.each do |seq|
    fastas.push '>'+seq.seq_name
    fastas.push seq.seq_fasta
  end

  return do_blast(fastas,fmt,parse_output,file)

end

#get_blast_cmd(fmt = :table, file = nil) ⇒ Object

returns the blast cmd that will be used to launch blast



37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
# File 'lib/scbi_blast/batch_blast.rb', line 37

def get_blast_cmd(fmt = :table, file=nil)

  if fmt==:table
    format = ' -outfmt "7 qseqid sacc pident length mismatch gapopen qstart qend sstart send evalue bitscore score qframe sframe qseq sseq qlen slen stitle" '
  elsif fmt ==:xml
    format = ' -outfmt 5 '
  end
  
  out=''
  
  if !file.nil? and !file.empty?
    out=" -out #{file}"
  else
    # if  RUBY_PLATFORM.index('darwin')
    #   out =" | sed -l"
    # else
    #   out =" | sed -l 0"
    # end
    # out =" | grep --line-buffered ''"
  end

  dust=''

  cmd = @blast_type+' '+dust+@extra_params + format + @database + out

  return cmd

end