Class: GeneValidator::FetchRawSequences

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/genevalidator/get_raw_sequences.rb

Class Method Summary collapse

Class Method Details

.batch_raw_seq_cmd(index_file) ⇒ Object



176
177
178
179
# File 'lib/genevalidator/get_raw_sequences.rb', line 176

def batch_raw_seq_cmd(index_file)
  "blastdbcmd -entry_batch '#{index_file}' -db '#{opt[:db]}'" \
  " -outfmt '%f' -out '#{opt[:raw_sequences]}'"
end

.extract_from_index(identifier) ⇒ Object

Gets raw sequence by fasta identifier from a fasta index file Params: identifier: String Output: String with the nucleotide sequence corresponding to the identifier



148
149
150
151
152
153
154
155
# File 'lib/genevalidator/get_raw_sequences.rb', line 148

def extract_from_index(identifier)
  idx         = config[:raw_seq_file_load][identifier]
  query       = IO.binread(opt[:raw_sequences], idx[1] - idx[0], idx[0])
  parse_query = query.scan(/>([^\n]*)\n([A-Za-z\n]*)/)[0]
  parse_query[1].gsub("\n", '')
rescue
  'Error' # return error so it can then try alternative fetching method.
end

.extract_from_local_db(batch, accno = nil, idx_file = nil) ⇒ Object

Gets raw sequence by accession number from a givem database Params: accno: accession number as String db: database as String Output: String with the nucleotide sequence corresponding to the accession



164
165
166
167
168
169
170
171
172
173
174
# File 'lib/genevalidator/get_raw_sequences.rb', line 164

def extract_from_local_db(batch, accno = nil, idx_file = nil)
  cmd = (batch) ? batch_raw_seq_cmd(idx_file) : single_raw_seq_cmd(accno)
  efile = Tempfile.new('blast_out')
  `#{cmd} &>#{efile.path}`
  raw_seqs = efile.read
  failed_raw_sequences(raw_seqs) if batch && raw_seqs =~ /Error/
  raw_seqs # when obtaining a single raw_seq, this contains the sequence
ensure
  efile.close
  efile.unlink
end

.extract_from_remote_db(accession, db_seq_type = 'protein') ⇒ Object



198
199
200
201
202
203
204
205
206
207
208
209
210
# File 'lib/genevalidator/get_raw_sequences.rb', line 198

def extract_from_remote_db(accession, db_seq_type = 'protein')
  uri     = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?' \
            "db=#{db_seq_type}&retmax=1&usehistory=y&term=#{accession}/"
  result  = Net::HTTP.get(URI.parse(uri))
  query   = result.match(%r{<\bQueryKey\b>([\w\W\d]+)</\bQueryKey\b>})[1]
  web_env = result.match(%r{<\bWebEnv\b>([\w\W\d]+)</\bWebEnv\b>})[1]

  uri     = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?' \
            'rettype=fasta&retmode=text&retstart=0&retmax=1&' \
            "db=#{db_seq_type}&query_key=#{query}&WebEnv=#{web_env}"
  result  = Net::HTTP.get(URI.parse(uri))
  result[0..result.length - 2]
end

.failed_raw_sequences(blast_output) ⇒ Object



185
186
187
188
189
190
191
192
193
194
195
196
# File 'lib/genevalidator/get_raw_sequences.rb', line 185

def failed_raw_sequences(blast_output)
  blast_output.each_line do |line|
    acc = line.match(/Error: (\w+): OID not found/)[1]
    $stderr.puts "\nCould not find sequence '#{acc.chomp}' within the" \
                 ' BLAST database.'
    $stderr.puts "Attempting to obtain sequence '#{acc.chomp}' from" \
                 ' remote BLAST databases.'
    File.open(opt[:raw_sequences], 'a+') do |f|
      f.puts extract_from_remote_db(acc)
    end
  end
end

.run(identifier, accession) ⇒ Object



127
128
129
130
131
132
133
134
135
136
137
138
139
140
# File 'lib/genevalidator/get_raw_sequences.rb', line 127

def run(identifier, accession)
  # first try to extract from previously created raw_sequences HASH
  raw_seq = extract_from_index(identifier) if opt[:raw_sequences]
  # then try to just extract that sequence based on accession.
  if opt[:db] !~ /remote/ && (raw_seq.nil? || raw_seq =~ /Error/)
    raw_seq = extract_from_local_db(false, accession)
  end
  # then try to extract from remote database
  if opt[:db] =~ /remote/ && (raw_seq.nil? || raw_seq =~ /Error/)
    raw_seq = extract_from_remote_db(accession)
  end
  # return nil if the raw_sequence still produces an error.
  (raw_seq =~ /Error/) ? nil : raw_seq
end

.single_raw_seq_cmd(accession) ⇒ Object



181
182
183
# File 'lib/genevalidator/get_raw_sequences.rb', line 181

def single_raw_seq_cmd(accession)
  "blastdbcmd -entry '#{accession}' -db '#{opt[:db]}' -outfmt '%s'"
end