Class: RemoteNCBI

Inherits:
Object
  • Object
show all
Defined in:
lib/bacterial-annotator/remote-ncbi.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(db, seq_file, outfile, pidentity) ⇒ RemoteNCBI

initialize stuff for a remote ncbi run



18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/bacterial-annotator/remote-ncbi.rb', line 18

def initialize db, seq_file, outfile, pidentity

  if ! ["swissprot", "refseq_protein", "nr"].include? db
    @db = "bad database"
  else
    @db = db
  end

  url = 'http://blast.ncbi.nlm.nih.gov/Blast.cgi'\
        '?PROGRAM=blastp&BLAST_PROGRAMS=blastp'\
        '&PAGE_TYPE=BlastSearch&SHOW_DEFAULTS=on'\
        '&LINK_LOC=blasthome'

  @seq_file = seq_file
  @outfile = outfile
  @resultURI = submit_blast url
  @pidentity = pidentity

  if @resultURI != ""
    @xmloutput = ""
    @valid = validate_output
  else
    @valid = false
  end

end

Instance Attribute Details

#aln_hitsObject (readonly)

Returns the value of attribute aln_hits.



15
16
17
# File 'lib/bacterial-annotator/remote-ncbi.rb', line 15

def aln_hits
  @aln_hits
end

#dbObject (readonly)

Returns the value of attribute db.



15
16
17
# File 'lib/bacterial-annotator/remote-ncbi.rb', line 15

def db
  @db
end

#xmloutputObject (readonly)

Returns the value of attribute xmloutput.



15
16
17
# File 'lib/bacterial-annotator/remote-ncbi.rb', line 15

def xmloutput
  @xmloutput
end

Instance Method Details

#extract_blast_resultsObject

extract blast results from



151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
# File 'lib/bacterial-annotator/remote-ncbi.rb', line 151

def extract_blast_results

  if !@valid
    @aln_hits = nil
    return
  end

  flat = Bio::FlatFile.auto("#{@outfile}")
  @aln_hits = {}

  flat.each_entry do |report|

    report.iterations.each do |query_it|
      prot_id = query_it.query_def.split(" ")[0]
      query_it.hits.each do |hit|
        if ! @aln_hits.has_key? prot_id
          p_identity = hit.identity.to_f/hit.target_len.to_f*100
          if p_identity >= @pidentity
            # cleaning product definition
            definition_clean = hit.definition.split(">")[0]
            product = definition_clean.
                      gsub("MULTISPECIES: ","").
                      gsub(/ \[.*\]/,"").
                      gsub("RecName: Full=","").
                      split("; AltName")[0].
                      split("; Flags:")[0].
                      split(" ; Short=")[0].strip
            gi = hit.hit_id.to_s.split("|")[1]
            organism = ""
            definition_clean = hit.definition.split(">")[0]
            if ! definition_clean[/\[.*\]/].nil?                
              organism = definition_clean[/\[.*\]/].gsub("[","").gsub("]","")
            end
            @aln_hits[prot_id] = {
              pId: (hit.identity.to_f/hit.target_len.to_f*100).round(2),
              length: hit.target_len.to_i,
              evalue: hit.evalue,
              score: hit.bit_score.to_f,
              hits: [{gi: gi, product: product, org: organism}]
            }
          end
        end
      end
    end
  end

end

#submit_blast(ncbiURL) ⇒ Object

submit blast to ncbi



47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/bacterial-annotator/remote-ncbi.rb', line 47

def submit_blast ncbiURL

  f = @seq_file.split("/")[-1]

  seq_fasta = File.read(@seq_file)

  a = Mechanize.new { |agent|
    agent.user_agent_alias = 'Linux Firefox'
    agent.ignore_bad_chunking = true
  }

  toBreak = 0
  requestID = ""
  try = 1

  while requestID == "" and try < 12

    begin

      a.get(ncbiURL) do |page|

        search = page.form_with(:name => 'searchForm') { |form|
          form.textareas[0].value = File.read(@seq_file)
          form.field_with(:name => 'DATABASE').value = @db
          form.field_with(:name => 'MAX_NUM_SEQ').value = 40
        }.submit

        search.parser.css('td').each do |td|
          if toBreak == 1
            requestID = td.text.gsub(" ","")
            # puts "breaking because #{requestID}"
            break
          end
          if td.text == "Request ID"
            toBreak = 1
          end
        end

      end

    rescue
      try += 1
      puts "#{try} POST try for #{f}"
      sleep 3
    end

  end

  uri_parsed = "http://blast.ncbi.nlm.nih.gov/Blast.cgi?CMD=Get&RID=#{requestID}"
  puts "NCBI Blast for #{f}: #{uri_parsed}"

  return URI.parse("http://blast.ncbi.nlm.nih.gov/Blast.cgi?RESULTS_FILE=on&RID=#{requestID}&FORMAT_TYPE=XML&FORMAT_OBJECT=Alignment&CMD=Get")

end

#validate_outputObject

validate the xml blast results



104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/bacterial-annotator/remote-ncbi.rb', line 104

def validate_output

  xmloutput = ""
  valid = true
  finish = false

  while valid and ! finish

    response = Net::HTTP.get_response(@resultURI)
    body = response.body.split("\n")
    if body[0] =~ /<?xml version=/
      xmloutput = body.join("\n")
      valid = true
      finish = true
    else
      valid = false
      body.each do |l|
        if l =~ /Status=/
          status = l.strip.gsub("Status=", "")
          if status == "WAITING"
            valid = true
          end
        end
        break if valid
      end
    end

    case @db
    when 'nr', 'refseq_protein'
      sleep 30
    when 'swissprot'
      sleep 10
    end

  end

  if finish
    File.open("#{@outfile}", "w") do |f|
      f.write(xmloutput)
    end
    return finish
  end
  valid

end