Module: Bio::NCBIDB::Common

Defined in:
lib/bio/db/genbank/common.rb

Overview

Description

This module defines a common framework among GenBank, GenPept, RefSeq, and DDBJ. For more details, see the documentations in each genbank/*.rb files.

References

Constant Summary

DELIMITER =
RS = "\n//\n"
TAGSIZE =
12

Instance Method Summary (collapse)

Instance Method Details

- (Object) acc_version

Returns the first part of the VERSION record as "ACCESSION.VERSION" String.



57
58
59
# File 'lib/bio/db/genbank/common.rb', line 57

def acc_version
  versions.first.to_s
end

- (Object) accession

Returns the ACCESSION part of the acc_version.



62
63
64
# File 'lib/bio/db/genbank/common.rb', line 62

def accession
  acc_version.split(/\./).first.to_s
end

- (Object) accessions

ACCESSION -- Returns contents of the ACCESSION record as an Array.



46
47
48
# File 'lib/bio/db/genbank/common.rb', line 46

def accessions
  field_fetch('ACCESSION').strip.split(/\s+/)
end

- (Object) comment

COMMENT -- Returns contents of the COMMENT record as a String.



199
200
201
202
203
204
# File 'lib/bio/db/genbank/common.rb', line 199

def comment
  str = get('COMMENT').to_s.sub(/\ACOMMENT     /, '')
  str.gsub!(/^ {12}/, '')
  str.chomp!
  str
end

- (Object) common_name Also known as: vernacular_name



120
121
122
# File 'lib/bio/db/genbank/common.rb', line 120

def common_name
  source['common_name']
end

- (Object) definition

DEFINITION -- Returns contents of the DEFINITION record as a String.



40
41
42
# File 'lib/bio/db/genbank/common.rb', line 40

def definition
  field_fetch('DEFINITION')
end

- (Object) features

FEATURES -- Returns contents of the FEATURES record as an array of Bio::Feature objects.



209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
# File 'lib/bio/db/genbank/common.rb', line 209

def features
  unless @data['FEATURES']
    ary = []
    in_quote = false
    get('FEATURES').each_line do |line|
      next if line =~ /^FEATURES/

      # feature type  (source, CDS, ...)
      head = line[0,20].to_s.strip

      # feature value (position or /qualifier=)
      body = line[20,60].to_s.chomp

      # sub-array [ feature type, position, /q="data", ... ]
      if line =~ /^ {5}\S/
        ary.push([ head, body ])

      # feature qualifier start (/q="data..., /q="data...", /q=data, /q)
      elsif body =~ /^ \// and not in_quote		# gb:IRO125195
        ary.last.push(body)
        
        # flag for open quote (/q="data...)
        if body =~ /="/ and body !~ /"$/
          in_quote = true
        end

      # feature qualifier continued (...data..., ...data...")
      else
        ary.last.last << body

        # flag for closing quote (/q="data... lines  ...")
        if body =~ /"$/
          in_quote = false
        end
      end
    end

    ary.collect! do |subary|
      parse_qualifiers(subary)
    end

    @data['FEATURES'] = ary.extend(Bio::Features::BackwardCompatibility)
  end
  if block_given?
    @data['FEATURES'].each do |f|
      yield f
    end
  else
    @data['FEATURES']
  end
end

- (Object) gi

Returns the second part of the VERSION record as a "GI:#######" String.



72
73
74
# File 'lib/bio/db/genbank/common.rb', line 72

def gi
  versions.last
end

- (Common) initialize(entry)

A new instance of Common

Returns:

  • (Common)

    a new instance of Common



30
31
32
# File 'lib/bio/db/genbank/common.rb', line 30

def initialize(entry)
  super(entry, TAGSIZE)
end

- (Object) keywords

KEYWORDS -- Returns contents of the KEYWORDS record as an Array of Strings.



84
85
86
# File 'lib/bio/db/genbank/common.rb', line 84

def keywords
  @data['KEYWORDS'] ||= fetch('KEYWORDS').chomp('.').split(/; /)
end

- (Object) locus

LOCUS -- Locus class must be defined in child classes.



35
36
37
# File 'lib/bio/db/genbank/common.rb', line 35

def locus
  # must be overrided in each subclass
end

- (Object) nid

NID -- Returns contents of the NID record as a String.



78
79
80
# File 'lib/bio/db/genbank/common.rb', line 78

def nid
  field_fetch('NID')
end

- (Object) organism



125
126
127
# File 'lib/bio/db/genbank/common.rb', line 125

def organism
  source['organism']
end

- (Object) origin

ORIGIN -- Returns contents of the ORIGIN record as a String.



263
264
265
266
267
268
269
270
271
# File 'lib/bio/db/genbank/common.rb', line 263

def origin
  unless @data['ORIGIN']
    ori, seqstr = get('ORIGIN').split("\n", 2)
    seqstr ||= ""
    @data['ORIGIN'] = truncate(tag_cut(ori))
    @data['SEQUENCE'] = seqstr.tr("0-9 \t\n\r\/", '')
  end
  @data['ORIGIN']
end

- (Object) references

REFERENCE -- Returns contents of the REFERENCE records as an Array of Bio::Reference objects.



136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/bio/db/genbank/common.rb', line 136

def references
  unless @data['REFERENCE']
    ary = []
    toptag2array(get('REFERENCE')).each do |ref|
      hash = Hash.new
      subtag2array(ref).each do |field|
        case tag_get(field)
        when /REFERENCE/
          if /(\d+)(\s*\((.+)\))?/m =~ tag_cut(field) then
            hash['embl_gb_record_number'] = $1.to_i
            if $3 and $3 != 'sites' then
              seqpos = $3
              seqpos.sub!(/\A\s*bases\s+/, '')
              seqpos.gsub!(/(\d+)\s+to\s+(\d+)/, "\\1-\\2")
              seqpos.gsub!(/\s*\;\s*/, ', ')
              hash['sequence_position'] = seqpos
            end
          end
        when /AUTHORS/
          authors = truncate(tag_cut(field))
          authors = authors.split(/, /)
          authors[-1] = authors[-1].split(/\s+and\s+/) if authors[-1]
          authors = authors.flatten.map { |a| a.sub(/,/, ', ') }
          hash['authors']	= authors
        when /TITLE/
          hash['title']	= truncate(tag_cut(field))
          # CHECK Actually GenBank is not demanding for dot at the end of TITLE
          #+ '.'
        when /JOURNAL/
          journal = truncate(tag_cut(field))
          if journal =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/
    	hash['journal']	= $1
    	hash['volume']	= $2
    	hash['issue']	= $3
    	hash['pages']	= $4
    	hash['year']	= $5
          else
    	hash['journal'] = journal
          end
        when /MEDLINE/
          hash['medline']	= truncate(tag_cut(field))
        when /PUBMED/
          hash['pubmed']	= truncate(tag_cut(field))
        when /REMARK/
          hash['comments'] ||= []
          hash['comments'].push truncate(tag_cut(field))
        end
      end
      ary.push(Reference.new(hash))
    end
    @data['REFERENCE'] = ary.extend(Bio::References::BackwardCompatibility)
  end
  if block_given?
    @data['REFERENCE'].each do |r|
      yield r
    end
  else
    @data['REFERENCE']
  end
end

- (Object) segment

SEGMENT -- Returns contents of the SEGMENT record as a "m/n" form String.



90
91
92
# File 'lib/bio/db/genbank/common.rb', line 90

def segment
  @data['SEGMENT'] ||= fetch('SEGMENT').scan(/\d+/).join("/")
end

- (Object) source

SOURCE -- Returns contents of the SOURCE record as a Hash.



96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/bio/db/genbank/common.rb', line 96

def source
  unless @data['SOURCE']
    name, org = get('SOURCE').split('ORGANISM')
    org ||= ""
    if org[/\S+;/]
      organism = $`
      taxonomy = $& + $'
    elsif org[/\S+\./]				# rs:NC_001741
      organism = $`
      taxonomy = $& + $'
    else
      organism = org
      taxonomy = ''
    end
    @data['SOURCE'] = {
      'common_name'	=> truncate(tag_cut(name)),
      'organism'	=> truncate(organism),
      'taxonomy'	=> truncate(taxonomy),
    }
    @data['SOURCE'].default = ''
  end
  @data['SOURCE']
end

- (Object) taxonomy



129
130
131
# File 'lib/bio/db/genbank/common.rb', line 129

def taxonomy
  source['taxonomy']
end

- (Object) version

Returns the VERSION part of the acc_version as a Fixnum



67
68
69
# File 'lib/bio/db/genbank/common.rb', line 67

def version
  acc_version.split(/\./).last.to_i
end

- (Object) versions

VERSION -- Returns contents of the VERSION record as an Array of Strings.



52
53
54
# File 'lib/bio/db/genbank/common.rb', line 52

def versions
  @data['VERSION'] ||= fetch('VERSION').split(/\s+/)
end