Class: GeneValidator::FetchRawSequences
- Inherits:
-
Object
- Object
- GeneValidator::FetchRawSequences
- Extended by:
- Forwardable
- Defined in:
- lib/genevalidator/get_raw_sequences.rb
Class Method Summary collapse
- .batch_raw_seq_cmd(index_file) ⇒ Object
-
.extract_from_index(identifier) ⇒ Object
Gets raw sequence by fasta identifier from a fasta index file Params:
identifier
: String Output: String with the nucleotide sequence corresponding to the identifier. -
.extract_from_local_db(batch, accno = nil, idx_file = nil) ⇒ Object
Gets raw sequence by accession number from a givem database Params:
accno
: accession number as Stringdb
: database as String Output: String with the nucleotide sequence corresponding to the accession. - .extract_from_remote_db(accession, db_seq_type = 'protein') ⇒ Object
- .failed_raw_sequences(blast_output) ⇒ Object
- .run(identifier, accession) ⇒ Object
- .single_raw_seq_cmd(accession) ⇒ Object
Class Method Details
.batch_raw_seq_cmd(index_file) ⇒ Object
176 177 178 179 |
# File 'lib/genevalidator/get_raw_sequences.rb', line 176 def batch_raw_seq_cmd(index_file) "blastdbcmd -entry_batch '#{index_file}' -db '#{opt[:db]}'" \ " -outfmt '%f' -out '#{opt[:raw_sequences]}'" end |
.extract_from_index(identifier) ⇒ Object
Gets raw sequence by fasta identifier from a fasta index file Params: identifier
: String Output: String with the nucleotide sequence corresponding to the identifier
148 149 150 151 152 153 154 155 |
# File 'lib/genevalidator/get_raw_sequences.rb', line 148 def extract_from_index(identifier) idx = config[:raw_seq_file_load][identifier] query = IO.binread(opt[:raw_sequences], idx[1] - idx[0], idx[0]) parse_query = query.scan(/>([^\n]*)\n([A-Za-z\n]*)/)[0] parse_query[1].gsub("\n", '') rescue 'Error' # return error so it can then try alternative fetching method. end |
.extract_from_local_db(batch, accno = nil, idx_file = nil) ⇒ Object
Gets raw sequence by accession number from a givem database Params: accno
: accession number as String db
: database as String Output: String with the nucleotide sequence corresponding to the accession
164 165 166 167 168 169 170 171 172 173 174 |
# File 'lib/genevalidator/get_raw_sequences.rb', line 164 def extract_from_local_db(batch, accno = nil, idx_file = nil) cmd = (batch) ? batch_raw_seq_cmd(idx_file) : single_raw_seq_cmd(accno) efile = Tempfile.new('blast_out') `#{cmd} &>#{efile.path}` raw_seqs = efile.read failed_raw_sequences(raw_seqs) if batch && raw_seqs =~ /Error/ raw_seqs # when obtaining a single raw_seq, this contains the sequence ensure efile.close efile.unlink end |
.extract_from_remote_db(accession, db_seq_type = 'protein') ⇒ Object
198 199 200 201 202 203 204 205 206 207 208 209 210 |
# File 'lib/genevalidator/get_raw_sequences.rb', line 198 def extract_from_remote_db(accession, db_seq_type = 'protein') uri = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?' \ "db=#{db_seq_type}&retmax=1&usehistory=y&term=#{accession}/" result = Net::HTTP.get(URI.parse(uri)) query = result.match(%r{<\bQueryKey\b>([\w\W\d]+)</\bQueryKey\b>})[1] web_env = result.match(%r{<\bWebEnv\b>([\w\W\d]+)</\bWebEnv\b>})[1] uri = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?' \ 'rettype=fasta&retmode=text&retstart=0&retmax=1&' \ "db=#{db_seq_type}&query_key=#{query}&WebEnv=#{web_env}" result = Net::HTTP.get(URI.parse(uri)) result[0..result.length - 2] end |
.failed_raw_sequences(blast_output) ⇒ Object
185 186 187 188 189 190 191 192 193 194 195 196 |
# File 'lib/genevalidator/get_raw_sequences.rb', line 185 def failed_raw_sequences(blast_output) blast_output.each_line do |line| acc = line.match(/Error: (\w+): OID not found/)[1] $stderr.puts "\nCould not find sequence '#{acc.chomp}' within the" \ ' BLAST database.' $stderr.puts "Attempting to obtain sequence '#{acc.chomp}' from" \ ' remote BLAST databases.' File.open(opt[:raw_sequences], 'a+') do |f| f.puts extract_from_remote_db(acc) end end end |
.run(identifier, accession) ⇒ Object
127 128 129 130 131 132 133 134 135 136 137 138 139 140 |
# File 'lib/genevalidator/get_raw_sequences.rb', line 127 def run(identifier, accession) # first try to extract from previously created raw_sequences HASH raw_seq = extract_from_index(identifier) if opt[:raw_sequences] # then try to just extract that sequence based on accession. if opt[:db] !~ /remote/ && (raw_seq.nil? || raw_seq =~ /Error/) raw_seq = extract_from_local_db(false, accession) end # then try to extract from remote database if opt[:db] =~ /remote/ && (raw_seq.nil? || raw_seq =~ /Error/) raw_seq = extract_from_remote_db(accession) end # return nil if the raw_sequence still produces an error. (raw_seq =~ /Error/) ? nil : raw_seq end |
.single_raw_seq_cmd(accession) ⇒ Object
181 182 183 |
# File 'lib/genevalidator/get_raw_sequences.rb', line 181 def single_raw_seq_cmd(accession) "blastdbcmd -entry '#{accession}' -db '#{opt[:db]}' -outfmt '%s'" end |