Module: Bio::NCBIDB::Common

Included in:
GenBank, GenPept
Defined in:
lib/bio/db/genbank/common.rb

Overview

Description

This module defines a common framework among GenBank, GenPept, RefSeq, and DDBJ. For more details, see the documentations in each genbank/*.rb files.

References

Constant Summary collapse

DELIMITER =
RS = "\n//\n"
TAGSIZE =
12

Instance Method Summary collapse

Instance Method Details

#acc_versionObject

Returns the first part of the VERSION record as “ACCESSION.VERSION” String.



57
58
59
# File 'lib/bio/db/genbank/common.rb', line 57

def acc_version
  versions.first.to_s
end

#accessionObject

Returns the ACCESSION part of the acc_version.



62
63
64
# File 'lib/bio/db/genbank/common.rb', line 62

def accession
  acc_version.split(/\./).first.to_s
end

#accessionsObject

ACCESSION – Returns contents of the ACCESSION record as an Array.



46
47
48
# File 'lib/bio/db/genbank/common.rb', line 46

def accessions
  field_fetch('ACCESSION').strip.split(/\s+/)
end

#commentObject

COMMENT – Returns contents of the COMMENT record as a String.



199
200
201
202
203
204
# File 'lib/bio/db/genbank/common.rb', line 199

def comment
  str = get('COMMENT').to_s.sub(/\ACOMMENT     /, '')
  str.gsub!(/^ {12}/, '')
  str.chomp!
  str
end

#common_nameObject Also known as: vernacular_name



120
121
122
# File 'lib/bio/db/genbank/common.rb', line 120

def common_name
  source['common_name']
end

#definitionObject

DEFINITION – Returns contents of the DEFINITION record as a String.



40
41
42
# File 'lib/bio/db/genbank/common.rb', line 40

def definition
  field_fetch('DEFINITION')
end

#featuresObject

FEATURES – Returns contents of the FEATURES record as an array of Bio::Feature objects.



209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
# File 'lib/bio/db/genbank/common.rb', line 209

def features
  unless @data['FEATURES']
    ary = []
    in_quote = false
    get('FEATURES').each_line do |line|
      next if line =~ /^FEATURES/

      # feature type  (source, CDS, ...)
      head = line[0,20].to_s.strip

      # feature value (position or /qualifier=)
      body = line[20,60].to_s.chomp

      # sub-array [ feature type, position, /q="data", ... ]
      if line =~ /^ {5}\S/
        ary.push([ head, body ])

      # feature qualifier start (/q="data..., /q="data...", /q=data, /q)
      elsif body =~ /^ \// and not in_quote		# gb:IRO125195
        ary.last.push(body)
        
        # flag for open quote (/q="data...)
        if body =~ /="/ and body !~ /"$/
          in_quote = true
        end

      # feature qualifier continued (...data..., ...data...")
      else
        ary.last.last << body

        # flag for closing quote (/q="data... lines  ...")
        if body =~ /"$/
          in_quote = false
        end
      end
    end

    ary.collect! do |subary|
      parse_qualifiers(subary)
    end

    @data['FEATURES'] = ary.extend(Bio::Features::BackwardCompatibility)
  end
  if block_given?
    @data['FEATURES'].each do |f|
      yield f
    end
  else
    @data['FEATURES']
  end
end

#giObject

Returns the second part of the VERSION record as a “GI:#######” String.



72
73
74
# File 'lib/bio/db/genbank/common.rb', line 72

def gi
  versions.last
end

#initialize(entry) ⇒ Object



30
31
32
# File 'lib/bio/db/genbank/common.rb', line 30

def initialize(entry)
  super(entry, TAGSIZE)
end

#keywordsObject

KEYWORDS – Returns contents of the KEYWORDS record as an Array of Strings.



84
85
86
# File 'lib/bio/db/genbank/common.rb', line 84

def keywords
  @data['KEYWORDS'] ||= fetch('KEYWORDS').chomp('.').split(/; /)
end

#locusObject

LOCUS – Locus class must be defined in child classes.



35
36
37
# File 'lib/bio/db/genbank/common.rb', line 35

def locus
  # must be overrided in each subclass
end

#nidObject

NID – Returns contents of the NID record as a String.



78
79
80
# File 'lib/bio/db/genbank/common.rb', line 78

def nid
  field_fetch('NID')
end

#organismObject



125
126
127
# File 'lib/bio/db/genbank/common.rb', line 125

def organism
  source['organism']
end

#originObject

ORIGIN – Returns contents of the ORIGIN record as a String.



263
264
265
266
267
268
269
270
271
# File 'lib/bio/db/genbank/common.rb', line 263

def origin
  unless @data['ORIGIN']
    ori, seqstr = get('ORIGIN').split("\n", 2)
    seqstr ||= ""
    @data['ORIGIN'] = truncate(tag_cut(ori))
    @data['SEQUENCE'] = seqstr.tr("0-9 \t\n\r\/", '')
  end
  @data['ORIGIN']
end

#referencesObject

REFERENCE – Returns contents of the REFERENCE records as an Array of Bio::Reference objects.



136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/bio/db/genbank/common.rb', line 136

def references
  unless @data['REFERENCE']
    ary = []
    toptag2array(get('REFERENCE')).each do |ref|
      hash = Hash.new
      subtag2array(ref).each do |field|
        case tag_get(field)
        when /REFERENCE/
          if /(\d+)(\s*\((.+)\))?/m =~ tag_cut(field) then
            hash['embl_gb_record_number'] = $1.to_i
            if $3 and $3 != 'sites' then
              seqpos = $3
              seqpos.sub!(/\A\s*bases\s+/, '')
              seqpos.gsub!(/(\d+)\s+to\s+(\d+)/, "\\1-\\2")
              seqpos.gsub!(/\s*\;\s*/, ', ')
              hash['sequence_position'] = seqpos
            end
          end
        when /AUTHORS/
          authors = truncate(tag_cut(field))
          authors = authors.split(/, /)
          authors[-1] = authors[-1].split(/\s+and\s+/) if authors[-1]
          authors = authors.flatten.map { |a| a.sub(/,/, ', ') }
          hash['authors']	= authors
        when /TITLE/
          hash['title']	= truncate(tag_cut(field))
          # CHECK Actually GenBank is not demanding for dot at the end of TITLE
          #+ '.'
        when /JOURNAL/
          journal = truncate(tag_cut(field))
          if journal =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/
    	hash['journal']	= $1
    	hash['volume']	= $2
    	hash['issue']	= $3
    	hash['pages']	= $4
    	hash['year']	= $5
          else
    	hash['journal'] = journal
          end
        when /MEDLINE/
          hash['medline']	= truncate(tag_cut(field))
        when /PUBMED/
          hash['pubmed']	= truncate(tag_cut(field))
        when /REMARK/
          hash['comments'] ||= []
          hash['comments'].push truncate(tag_cut(field))
        end
      end
      ary.push(Reference.new(hash))
    end
    @data['REFERENCE'] = ary.extend(Bio::References::BackwardCompatibility)
  end
  if block_given?
    @data['REFERENCE'].each do |r|
      yield r
    end
  else
    @data['REFERENCE']
  end
end

#segmentObject

SEGMENT – Returns contents of the SEGMENT record as a “m/n” form String.



90
91
92
# File 'lib/bio/db/genbank/common.rb', line 90

def segment
  @data['SEGMENT'] ||= fetch('SEGMENT').scan(/\d+/).join("/")
end

#sourceObject

SOURCE – Returns contents of the SOURCE record as a Hash.



96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
# File 'lib/bio/db/genbank/common.rb', line 96

def source
  unless @data['SOURCE']
    name, org = get('SOURCE').split('ORGANISM')
    org ||= ""
    if org[/\S+;/]
      organism = $`
      taxonomy = $& + $'
    elsif org[/\S+\./]				# rs:NC_001741
      organism = $`
      taxonomy = $& + $'
    else
      organism = org
      taxonomy = ''
    end
    @data['SOURCE'] = {
      'common_name'	=> truncate(tag_cut(name)),
      'organism'	=> truncate(organism),
      'taxonomy'	=> truncate(taxonomy),
    }
    @data['SOURCE'].default = ''
  end
  @data['SOURCE']
end

#taxonomyObject



129
130
131
# File 'lib/bio/db/genbank/common.rb', line 129

def taxonomy
  source['taxonomy']
end

#versionObject

Returns the VERSION part of the acc_version as a Fixnum



67
68
69
# File 'lib/bio/db/genbank/common.rb', line 67

def version
  acc_version.split(/\./).last.to_i
end

#versionsObject

VERSION – Returns contents of the VERSION record as an Array of Strings.



52
53
54
# File 'lib/bio/db/genbank/common.rb', line 52

def versions
  @data['VERSION'] ||= fetch('VERSION').split(/\s+/)
end