Class: GeneValidator::RawSequences

Inherits:
Object
  • Object
show all
Extended by:
Forwardable
Defined in:
lib/genevalidator/get_raw_sequences.rb

Overview

Gets the raw sequences for each hit in a BLAST output file

Class Method Summary collapse

Class Method Details

.index_raw_seq_file(raw_seq_file = ) ⇒ Object

Index the raw sequences file…



46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
# File 'lib/genevalidator/get_raw_sequences.rb', line 46

def index_raw_seq_file(raw_seq_file = opt[:raw_sequences])
  # leave only the identifiers in the fasta description
  content = File.open(raw_seq_file, 'rb').read.gsub(/ .*/, '')
  File.open(raw_seq_file, 'w+') { |f| f.write(content) }

  # index the fasta file
  keys   = content.scan(/>(.*)\n/).flatten
  values = content.enum_for(:scan, /(>[^>]+)/).map { Regexp.last_match.begin(0) }

  # make an index hash
  index_hash = {}
  keys.each_with_index do |k, i|
    start = values[i]
    endf  = (i == values.length - 1) ? content.length - 1 : values[i + 1]
    index_hash[k] = [start, endf]
  end

  # create FASTA index
  config[:raw_seq_file_index] = "#{raw_seq_file}.idx"
  config[:raw_seq_file_load]  = index_hash

  File.open(config[:raw_seq_file_index], 'w') do |f|
    YAML.dump(index_hash, f)
  end
  content = nil
end

.runObject

Obtains raw_sequences from BLAST output file…



19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# File 'lib/genevalidator/get_raw_sequences.rb', line 19

def run
  @opt = opt
  @config = config

  $stderr.puts 'Extracting sequences within the BLAST output file from' \
               ' the BLAST database'

  if @opt[:blast_xml_file]
    @blast_file  = @opt[:blast_xml_file]
  else
    @blast_file = @opt[:blast_tabular_file]
  end

  @opt[:raw_sequences] = @blast_file + '.raw_seq'
  index_file           = @blast_file + '.index'

  if opt[:db] =~ /remote/
    write_a_raw_seq_file(@opt[:raw_sequences], 'remote')
  else
    write_an_index_file(index_file, 'local')
    obtain_raw_seqs_from_local_db(index_file, @opt[:raw_sequences])
  end
  index_raw_seq_file(@opt[:raw_sequences])
end