Class: GeneValidator::Sequence

Inherits:
Object
  • Object
show all
Defined in:
lib/genevalidator/sequences.rb

Overview

This is a class for the storing data on each sequence

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initializeSequence

Returns a new instance of Sequence.



21
22
23
24
25
26
# File 'lib/genevalidator/sequences.rb', line 21

def initialize
  @hsp_list            = []
  @raw_sequence        = nil
  @protein_translation = nil
  @nucleotide_rf       = nil
end

Instance Attribute Details

#accession_noObject

Returns the value of attribute accession_no.



12
13
14
# File 'lib/genevalidator/sequences.rb', line 12

def accession_no
  @accession_no
end

#definitionObject

Returns the value of attribute definition.



9
10
11
# File 'lib/genevalidator/sequences.rb', line 9

def definition
  @definition
end

#hsp_listObject

array of Hsp objects



15
16
17
# File 'lib/genevalidator/sequences.rb', line 15

def hsp_list
  @hsp_list
end

#identifierObject

Returns the value of attribute identifier.



10
11
12
# File 'lib/genevalidator/sequences.rb', line 10

def identifier
  @identifier
end

#length_proteinObject

Returns the value of attribute length_protein.



13
14
15
# File 'lib/genevalidator/sequences.rb', line 13

def length_protein
  @length_protein
end

#nucleotide_rfObject

used only for nucleotides



19
20
21
# File 'lib/genevalidator/sequences.rb', line 19

def nucleotide_rf
  @nucleotide_rf
end

#protein_translationObject

used only for nucleotides



18
19
20
# File 'lib/genevalidator/sequences.rb', line 18

def protein_translation
  @protein_translation
end

#raw_sequenceObject

Returns the value of attribute raw_sequence.



17
18
19
# File 'lib/genevalidator/sequences.rb', line 17

def raw_sequence
  @raw_sequence
end

#reading_frameObject

Returns the value of attribute reading_frame.



14
15
16
# File 'lib/genevalidator/sequences.rb', line 14

def reading_frame
  @reading_frame
end

#speciesObject

Returns the value of attribute species.



11
12
13
# File 'lib/genevalidator/sequences.rb', line 11

def species
  @species
end

#typeObject

protein | mRNA



8
9
10
# File 'lib/genevalidator/sequences.rb', line 8

def type
  @type
end

Instance Method Details

#get_sequence_by_accession_no(accno, dbtype, db) ⇒ Object

Gets raw sequence by accession number from a givem database Params: accno: accession number as String db: database as String Output: String with the nucleotide sequence corresponding to the accno



60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'lib/genevalidator/sequences.rb', line 60

def get_sequence_by_accession_no(accno, dbtype, db)
  if db !~ /remote/
    blast_cmd     = "blastdbcmd -entry '#{accno}' -db '#{db}' -outfmt '%s'"
    seq           = `#{blast_cmd}  2>&1`
    if /Error/ =~ seq
      fail IOError, 'GeneValidator was unable to obtain the raw sequences' \
                    ' for the BLAST hits.'
    end
    @raw_sequence = seq
  else
    $stderr.puts "Getting sequence for '#{accno}' from NCBI - avoid this with '-r'."
    uri = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/esearch.fcgi?'\
          "db=#{dbtype}&retmax=1&usehistory=y&term=#{accno}/"
    result = Net::HTTP.get(URI.parse(uri))

    query   = result.scan(%r{<\bQueryKey\b>([\w\W\d]+)</\bQueryKey\b>})[0][0]
    web_env = result.scan(%r{<\bWebEnv\b>([\w\W\d]+)</\bWebEnv\b>})[0][0]

    uri = 'http://www.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?'\
          "rettype=fasta&retmode=text&retstart=0&retmax=1&db=#{dbtype}" \
          "&query_key=#{query}&WebEnv=#{web_env}"
    result = Net::HTTP.get(URI.parse(uri))

    # parse FASTA output
    nl            = result.index("\n")
    seq           = result[nl + 1..-1]
    @raw_sequence = seq.gsub!(/\n/, '')
    @raw_sequence = '' unless @raw_sequence.index(/ERROR/).nil?
  end
  @raw_sequence
end

#get_sequence_from_index_file(raw_seq_file, index_file_name, identifier, hash = nil) ⇒ Object

Gets raw sequence by fasta identifier from a fasta index file Params: raw_seq_file: name of the fasta file with raw sequences index_file_name: name of the fasta index file identifier: String hash: String - loaded content of the index file Output: String with the nucleotide sequence corresponding to the identifier



41
42
43
44
45
46
47
48
49
50
51
# File 'lib/genevalidator/sequences.rb', line 41

def get_sequence_from_index_file(raw_seq_file, index_file_name, identifier,
                                 hash = nil)
  hash = YAML.load_file(index_file_name) if hash.nil?
  idx           = hash[identifier]
  query         = IO.binread(raw_seq_file, idx[1] - idx[0], idx[0])
  parse_query   = query.scan(/>([^\n]*)\n([A-Za-z\n]*)/)[0]
  @raw_sequence = parse_query[1].gsub("\n", '')
rescue Exception
  #   $stderr.print "Unable to retrieve raw sequence for the following"    #                 "id: #{identifier}\n"
end

#init_tabular_attribute(hash) ⇒ Object

Initializes the corresponding attribute of the sequence with respect to the column name of the tabular blast output



95
96
97
98
99
# File 'lib/genevalidator/sequences.rb', line 95

def init_tabular_attribute(hash)
  @identifier     = hash['sseqid'] if hash['sseqid']
  @accession_no   = hash['sacc'] if hash['sacc']
  @length_protein = hash['slen'].to_i if hash['slen']
end