Class: Bio::KEGG::GENOME

Inherits:
Bio::KEGGDB show all
Defined in:
lib/bio/db/kegg/genome.rb

Overview

Description

Parser for the KEGG GENOME database

References

Constant Summary collapse

DELIMITER =
RS = "\n///\n"
TAGSIZE =
12

Instance Method Summary collapse

Methods inherited from DB

#exists?, #fetch, #get, open, #tags

Constructor Details

#initialize(entry) ⇒ GENOME

Returns a new instance of GENOME.



28
29
30
# File 'lib/bio/db/kegg/genome.rb', line 28

def initialize(entry)
  super(entry, TAGSIZE)
end

Instance Method Details

#chromosomesObject

CHROMOSOME – Returns contents of the CHROMOSOME records as an Array of Hash.



133
134
135
136
137
138
139
140
141
142
143
144
145
# File 'lib/bio/db/kegg/genome.rb', line 133

def chromosomes
  unless @data['CHROMOSOME']
    @data['CHROMOSOME'] = []
    toptag2array(get('CHROMOSOME')).each do |chr|
      hash = Hash.new('')
      subtag2array(chr).each do |field|
        hash[tag_get(field)] = truncate(tag_cut(field))
      end
      @data['CHROMOSOME'].push(hash)
    end
  end
  @data['CHROMOSOME']
end

#commentObject

COMMENT – Returns contents of the COMMENT record as a String.



90
91
92
# File 'lib/bio/db/kegg/genome.rb', line 90

def comment
  field_fetch('COMMENT')
end

#data_sourceObject

DATA_SOURCE – Returns contents of the DATA_SOURCE record as a String.



75
76
77
# File 'lib/bio/db/kegg/genome.rb', line 75

def data_source
  field_fetch('DATA_SOURCE')
end

#definitionObject Also known as: organism

DEFINITION – Returns contents of the DEFINITION record as a String.



44
45
46
# File 'lib/bio/db/kegg/genome.rb', line 44

def definition
  field_fetch('DEFINITION')
end

#diseaseObject

DISEASE – Returns contents of the COMMENT record as a String.



85
86
87
# File 'lib/bio/db/kegg/genome.rb', line 85

def disease
  field_fetch('DISEASE')
end

#entry_idObject

ENTRY – Returns contents of the ENTRY record as a String.



34
35
36
# File 'lib/bio/db/kegg/genome.rb', line 34

def entry_id
  field_fetch('ENTRY')[/\S+/]
end

#lineageObject

Returns contents of the TAXONOMY/LINEAGE record as a String.



70
71
72
# File 'lib/bio/db/kegg/genome.rb', line 70

def lineage
  taxonomy['lineage']
end

#nalenObject Also known as: length

Returns number of nucleotides from the STATISTICS record as a Fixnum.



182
183
184
# File 'lib/bio/db/kegg/genome.rb', line 182

def nalen
  statistics['num_nuc']
end

#nameObject

NAME – Returns contents of the NAME record as a String.



39
40
41
# File 'lib/bio/db/kegg/genome.rb', line 39

def name
  field_fetch('NAME')
end

#num_geneObject

Returns number of protein genes from the STATISTICS record as a Fixnum.



188
189
190
# File 'lib/bio/db/kegg/genome.rb', line 188

def num_gene
  statistics['num_gene']
end

#num_rnaObject

Returns number of rna from the STATISTICS record as a Fixnum.



193
194
195
# File 'lib/bio/db/kegg/genome.rb', line 193

def num_rna
  statistics['num_rna']
end

#original_dbObject

ORIGINAL_DB – Returns contents of the ORIGINAL_DB record as a String.



80
81
82
# File 'lib/bio/db/kegg/genome.rb', line 80

def original_db
  field_fetch('ORIGINAL_DB')
end

#plasmidsObject

PLASMID – Returns contents of the PLASMID records as an Array of Hash.



148
149
150
151
152
153
154
155
156
157
158
159
160
# File 'lib/bio/db/kegg/genome.rb', line 148

def plasmids
  unless @data['PLASMID']
    @data['PLASMID'] = []
    toptag2array(get('PLASMID')).each do |chr|
      hash = Hash.new('')
      subtag2array(chr).each do |field|
        hash[tag_get(field)] = truncate(tag_cut(field))
      end
      @data['PLASMID'].push(hash)
    end
  end
  @data['PLASMID']
end

#referencesObject

REFERENCE – Returns contents of the REFERENCE records as an Array of Bio::Reference objects.



96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
# File 'lib/bio/db/kegg/genome.rb', line 96

def references
  unless @data['REFERENCE']
    ary = []
    toptag2array(get('REFERENCE')).each do |ref|
      hash = Hash.new('')
      subtag2array(ref).each do |field|
        case tag_get(field)
        when /AUTHORS/
          authors = truncate(tag_cut(field))
          authors = authors.split(', ')
          authors[-1] = authors[-1].split(/\s+and\s+/)
          authors = authors.flatten.map { |a| a.sub(',', ', ') }
          hash['authors']	= authors
        when /TITLE/
          hash['title']	= truncate(tag_cut(field))
        when /JOURNAL/
          journal = truncate(tag_cut(field))
          if journal =~ /(.*) (\d+):(\d+)-(\d+) \((\d+)\) \[UI:(\d+)\]$/
            hash['journal']	= $1
            hash['volume']	= $2
            hash['pages']	= $3
            hash['year']	= $5
            hash['medline']	= $6
          else
            hash['journal'] = journal
          end
        end
      end
      ary.push(Reference.new(hash))
    end
    @data['REFERENCE'] = References.new(ary)
  end
  @data['REFERENCE']
end

#statisticsObject

STATISTICS – Returns contents of the STATISTICS record as a Hash.



163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
# File 'lib/bio/db/kegg/genome.rb', line 163

def statistics
  unless @data['STATISTICS']
    hash = Hash.new(0.0)
    get('STATISTICS').each_line do |line|
      case line
      when /nucleotides:\s+(\d+)/
        hash['num_nuc'] = $1.to_i
      when /protein genes:\s+(\d+)/
        hash['num_gene'] = $1.to_i
      when /RNA genes:\s+(\d+)/
        hash['num_rna'] = $1.to_i
      end
    end
    @data['STATISTICS'] = hash
  end
  @data['STATISTICS']
end

#taxidObject

Returns NCBI taxonomy ID from the TAXONOMY record as a String.



65
66
67
# File 'lib/bio/db/kegg/genome.rb', line 65

def taxid
  taxonomy['taxid']
end

#taxonomyObject

TAXONOMY – Returns contents of the TAXONOMY record as a Hash.



50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/bio/db/kegg/genome.rb', line 50

def taxonomy
  unless @data['TAXONOMY']
    taxid, lineage = subtag2array(get('TAXONOMY'))
    taxid   = taxid   ? truncate(tag_cut(taxid))   : ''
    lineage = lineage ? truncate(tag_cut(lineage)) : ''
    @data['TAXONOMY'] = {
      'taxid'	=> taxid,
      'lineage'	=> lineage,
    }
    @data['TAXONOMY'].default = ''
  end
  @data['TAXONOMY']
end