Class: Bio::KEGG::GENOME

Inherits:

Bio::KEGGDB

Object
DB
NCBIDB
Bio::KEGGDB
Bio::KEGG::GENOME

show all

Defined in:: lib/bio/db/kegg/genome.rb

Overview

Description

Parser for the KEGG GENOME database

References

ftp.genome.jp/pub/kegg/genomes/genome

Constant Summary collapse

DELIMITER =

RS = "\n///\n"

TAGSIZE =

Instance Method Summary collapse

#chromosomes ⇒ Object

CHROMOSOME – Returns contents of the CHROMOSOME records as an Array of Hash.
#comment ⇒ Object

COMMENT – Returns contents of the COMMENT record as a String.
#data_source ⇒ Object

DATA_SOURCE – Returns contents of the DATA_SOURCE record as a String.
#definition ⇒ Object (also: #organism)

DEFINITION – Returns contents of the DEFINITION record as a String.
#disease ⇒ Object

DISEASE – Returns contents of the COMMENT record as a String.
#entry_id ⇒ Object

ENTRY – Returns contents of the ENTRY record as a String.
#initialize(entry) ⇒ GENOME constructor

A new instance of GENOME.
#lineage ⇒ Object

Returns contents of the TAXONOMY/LINEAGE record as a String.
#nalen ⇒ Object (also: #length)

Returns number of nucleotides from the STATISTICS record as a Fixnum.
#name ⇒ Object

NAME – Returns contents of the NAME record as a String.
#num_gene ⇒ Object

Returns number of protein genes from the STATISTICS record as a Fixnum.
#num_rna ⇒ Object

Returns number of rna from the STATISTICS record as a Fixnum.
#original_db ⇒ Object

ORIGINAL_DB – Returns contents of the ORIGINAL_DB record as a String.
#plasmids ⇒ Object

PLASMID – Returns contents of the PLASMID records as an Array of Hash.
#references ⇒ Object

REFERENCE – Returns contents of the REFERENCE records as an Array of Bio::Reference objects.
#statistics ⇒ Object

STATISTICS – Returns contents of the STATISTICS record as a Hash.
#taxid ⇒ Object

Returns NCBI taxonomy ID from the TAXONOMY record as a String.
#taxonomy ⇒ Object

TAXONOMY – Returns contents of the TAXONOMY record as a Hash.

Methods inherited from DB

#exists?, #fetch, #get, open, #tags

Constructor Details

#initialize(entry) ⇒ `GENOME`

Returns a new instance of GENOME.



28
29
30

# File 'lib/bio/db/kegg/genome.rb', line 28

def initialize(entry)
  super(entry, TAGSIZE)
end

Instance Method Details

#chromosomes ⇒ `Object`

CHROMOSOME – Returns contents of the CHROMOSOME records as an Array of Hash.

# File 'lib/bio/db/kegg/genome.rb', line 133

def chromosomes
  unless @data['CHROMOSOME']
    @data['CHROMOSOME'] = []
    toptag2array(get('CHROMOSOME')).each do |chr|
      hash = Hash.new('')
      subtag2array(chr).each do |field|
        hash[tag_get(field)] = truncate(tag_cut(field))
      end
      @data['CHROMOSOME'].push(hash)
    end
  end
  @data['CHROMOSOME']
end

#comment ⇒ `Object`

COMMENT – Returns contents of the COMMENT record as a String.



90
91
92

# File 'lib/bio/db/kegg/genome.rb', line 90

def comment
  field_fetch('COMMENT')
end

#data_source ⇒ `Object`

DATA_SOURCE – Returns contents of the DATA_SOURCE record as a String.



75
76
77

# File 'lib/bio/db/kegg/genome.rb', line 75

def data_source
  field_fetch('DATA_SOURCE')
end

#definition ⇒ `Object` Also known as: organism

DEFINITION – Returns contents of the DEFINITION record as a String.



44
45
46

# File 'lib/bio/db/kegg/genome.rb', line 44

def definition
  field_fetch('DEFINITION')
end

#disease ⇒ `Object`

DISEASE – Returns contents of the COMMENT record as a String.



85
86
87

# File 'lib/bio/db/kegg/genome.rb', line 85

def disease
  field_fetch('DISEASE')
end

#entry_id ⇒ `Object`

ENTRY – Returns contents of the ENTRY record as a String.



34
35
36

# File 'lib/bio/db/kegg/genome.rb', line 34

def entry_id
  field_fetch('ENTRY')[/\S+/]
end

#lineage ⇒ `Object`

Returns contents of the TAXONOMY/LINEAGE record as a String.



70
71
72

# File 'lib/bio/db/kegg/genome.rb', line 70

def lineage
  taxonomy['lineage']
end

#nalen ⇒ `Object` Also known as: length

Returns number of nucleotides from the STATISTICS record as a Fixnum.



182
183
184

# File 'lib/bio/db/kegg/genome.rb', line 182

def nalen
  statistics['num_nuc']
end

#name ⇒ `Object`

NAME – Returns contents of the NAME record as a String.



39
40
41

# File 'lib/bio/db/kegg/genome.rb', line 39

def name
  field_fetch('NAME')
end

#num_gene ⇒ `Object`

Returns number of protein genes from the STATISTICS record as a Fixnum.



188
189
190

# File 'lib/bio/db/kegg/genome.rb', line 188

def num_gene
  statistics['num_gene']
end

#num_rna ⇒ `Object`

Returns number of rna from the STATISTICS record as a Fixnum.



193
194
195

# File 'lib/bio/db/kegg/genome.rb', line 193

def num_rna
  statistics['num_rna']
end

#original_db ⇒ `Object`

ORIGINAL_DB – Returns contents of the ORIGINAL_DB record as a String.



80
81
82

# File 'lib/bio/db/kegg/genome.rb', line 80

def original_db
  field_fetch('ORIGINAL_DB')
end

#plasmids ⇒ `Object`

PLASMID – Returns contents of the PLASMID records as an Array of Hash.

# File 'lib/bio/db/kegg/genome.rb', line 148

def plasmids
  unless @data['PLASMID']
    @data['PLASMID'] = []
    toptag2array(get('PLASMID')).each do |chr|
      hash = Hash.new('')
      subtag2array(chr).each do |field|
        hash[tag_get(field)] = truncate(tag_cut(field))
      end
      @data['PLASMID'].push(hash)
    end
  end
  @data['PLASMID']
end

#references ⇒ `Object`

REFERENCE – Returns contents of the REFERENCE records as an Array of Bio::Reference objects.

# File 'lib/bio/db/kegg/genome.rb', line 96

def references
  unless @data['REFERENCE']
    ary = []
    toptag2array(get('REFERENCE')).each do |ref|
      hash = Hash.new('')
      subtag2array(ref).each do |field|
        case tag_get(field)
        when /AUTHORS/
          authors = truncate(tag_cut(field))
          authors = authors.split(', ')
          authors[-1] = authors[-1].split(/\s+and\s+/)
          authors = authors.flatten.map { |a| a.sub(',', ', ') }
          hash['authors']	= authors
        when /TITLE/
          hash['title']	= truncate(tag_cut(field))
        when /JOURNAL/
          journal = truncate(tag_cut(field))
          if journal =~ /(.*) (\d+):(\d+)-(\d+) \((\d+)\) \[UI:(\d+)\]$/
            hash['journal']	= $1
            hash['volume']	= $2
            hash['pages']	= $3
            hash['year']	= $5
            hash['medline']	= $6
          else
            hash['journal'] = journal
          end
        end
      end
      ary.push(Reference.new(hash))
    end
    @data['REFERENCE'] = References.new(ary)
  end
  @data['REFERENCE']
end

#statistics ⇒ `Object`

STATISTICS – Returns contents of the STATISTICS record as a Hash.

# File 'lib/bio/db/kegg/genome.rb', line 163

def statistics
  unless @data['STATISTICS']
    hash = Hash.new(0.0)
    get('STATISTICS').each_line do |line|
      case line
      when /nucleotides:\s+(\d+)/
        hash['num_nuc'] = $1.to_i
      when /protein genes:\s+(\d+)/
        hash['num_gene'] = $1.to_i
      when /RNA genes:\s+(\d+)/
        hash['num_rna'] = $1.to_i
      end
    end
    @data['STATISTICS'] = hash
  end
  @data['STATISTICS']
end

#taxid ⇒ `Object`

Returns NCBI taxonomy ID from the TAXONOMY record as a String.



65
66
67

# File 'lib/bio/db/kegg/genome.rb', line 65

def taxid
  taxonomy['taxid']
end

#taxonomy ⇒ `Object`

TAXONOMY – Returns contents of the TAXONOMY record as a Hash.

# File 'lib/bio/db/kegg/genome.rb', line 50

def taxonomy
  unless @data['TAXONOMY']
    taxid, lineage = subtag2array(get('TAXONOMY'))
    taxid   = taxid   ? truncate(tag_cut(taxid))   : ''
    lineage = lineage ? truncate(tag_cut(lineage)) : ''
    @data['TAXONOMY'] = {
      'taxid'	=> taxid,
      'lineage'	=> lineage,
    }
    @data['TAXONOMY'].default = ''
  end
  @data['TAXONOMY']
end

Class: Bio::KEGG::GENOME

Overview

Description

References

Constant Summary collapse

Instance Method Summary collapse

Methods inherited from DB

Constructor Details

#initialize(entry) ⇒ GENOME

Instance Method Details

#chromosomes ⇒ Object

#comment ⇒ Object

#data_source ⇒ Object

#definition ⇒ Object Also known as: organism

#disease ⇒ Object

#entry_id ⇒ Object

#lineage ⇒ Object

#nalen ⇒ Object Also known as: length

#name ⇒ Object

#num_gene ⇒ Object

#num_rna ⇒ Object

#original_db ⇒ Object

#plasmids ⇒ Object

#references ⇒ Object

#statistics ⇒ Object

#taxid ⇒ Object

#taxonomy ⇒ Object