Module: Bio::NCBIDB::Common
- Defined in:
- lib/bio/db/genbank/common.rb
Overview
Description
This module defines a common framework among GenBank, GenPept, RefSeq, and DDBJ. For more details, see the documentations in each genbank/*.rb files.
References
Constant Summary
- DELIMITER =
RS = "\n//\n"
- TAGSIZE =
12
Instance Method Summary (collapse)
-
- (Object) acc_version
Returns the first part of the VERSION record as "ACCESSION.VERSION" String.
-
- (Object) accession
Returns the ACCESSION part of the acc_version.
-
- (Object) accessions
ACCESSION -- Returns contents of the ACCESSION record as an Array.
-
- (Object) comment
COMMENT -- Returns contents of the COMMENT record as a String.
- - (Object) common_name (also: #vernacular_name)
-
- (Object) definition
DEFINITION -- Returns contents of the DEFINITION record as a String.
-
- (Object) features
FEATURES -- Returns contents of the FEATURES record as an array of Bio::Feature objects.
-
- (Object) gi
Returns the second part of the VERSION record as a "GI:#######" String.
-
- (Common) initialize(entry)
A new instance of Common.
-
- (Object) keywords
KEYWORDS -- Returns contents of the KEYWORDS record as an Array of Strings.
-
- (Object) locus
LOCUS -- Locus class must be defined in child classes.
-
- (Object) nid
NID -- Returns contents of the NID record as a String.
- - (Object) organism
-
- (Object) origin
ORIGIN -- Returns contents of the ORIGIN record as a String.
-
- (Object) references
REFERENCE -- Returns contents of the REFERENCE records as an Array of Bio::Reference objects.
-
- (Object) segment
SEGMENT -- Returns contents of the SEGMENT record as a "m/n" form String.
-
- (Object) source
SOURCE -- Returns contents of the SOURCE record as a Hash.
- - (Object) taxonomy
-
- (Object) version
Returns the VERSION part of the acc_version as a Fixnum.
-
- (Object) versions
VERSION -- Returns contents of the VERSION record as an Array of Strings.
Instance Method Details
- (Object) acc_version
Returns the first part of the VERSION record as "ACCESSION.VERSION" String.
57 58 59 |
# File 'lib/bio/db/genbank/common.rb', line 57 def acc_version versions.first.to_s end |
- (Object) accession
Returns the ACCESSION part of the acc_version.
62 63 64 |
# File 'lib/bio/db/genbank/common.rb', line 62 def accession acc_version.split(/\./).first.to_s end |
- (Object) accessions
ACCESSION -- Returns contents of the ACCESSION record as an Array.
46 47 48 |
# File 'lib/bio/db/genbank/common.rb', line 46 def accessions field_fetch('ACCESSION').strip.split(/\s+/) end |
- (Object) comment
COMMENT -- Returns contents of the COMMENT record as a String.
199 200 201 202 203 204 |
# File 'lib/bio/db/genbank/common.rb', line 199 def comment str = get('COMMENT').to_s.sub(/\ACOMMENT /, '') str.gsub!(/^ {12}/, '') str.chomp! str end |
- (Object) common_name Also known as: vernacular_name
120 121 122 |
# File 'lib/bio/db/genbank/common.rb', line 120 def common_name source['common_name'] end |
- (Object) definition
DEFINITION -- Returns contents of the DEFINITION record as a String.
40 41 42 |
# File 'lib/bio/db/genbank/common.rb', line 40 def definition field_fetch('DEFINITION') end |
- (Object) features
FEATURES -- Returns contents of the FEATURES record as an array of Bio::Feature objects.
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 |
# File 'lib/bio/db/genbank/common.rb', line 209 def features unless @data['FEATURES'] ary = [] in_quote = false get('FEATURES').each_line do |line| next if line =~ /^FEATURES/ # feature type (source, CDS, ...) head = line[0,20].to_s.strip # feature value (position or /qualifier=) body = line[20,60].to_s.chomp # sub-array [ feature type, position, /q="data", ... ] if line =~ /^ {5}\S/ ary.push([ head, body ]) # feature qualifier start (/q="data..., /q="data...", /q=data, /q) elsif body =~ /^ \// and not in_quote # gb:IRO125195 ary.last.push(body) # flag for open quote (/q="data...) if body =~ /="/ and body !~ /"$/ in_quote = true end # feature qualifier continued (...data..., ...data...") else ary.last.last << body # flag for closing quote (/q="data... lines ...") if body =~ /"$/ in_quote = false end end end ary.collect! do || parse_qualifiers() end @data['FEATURES'] = ary.extend(Bio::Features::BackwardCompatibility) end if block_given? @data['FEATURES'].each do |f| yield f end else @data['FEATURES'] end end |
- (Object) gi
Returns the second part of the VERSION record as a "GI:#######" String.
72 73 74 |
# File 'lib/bio/db/genbank/common.rb', line 72 def gi versions.last end |
- (Common) initialize(entry)
A new instance of Common
30 31 32 |
# File 'lib/bio/db/genbank/common.rb', line 30 def initialize(entry) super(entry, TAGSIZE) end |
- (Object) keywords
KEYWORDS -- Returns contents of the KEYWORDS record as an Array of Strings.
84 85 86 |
# File 'lib/bio/db/genbank/common.rb', line 84 def keywords @data['KEYWORDS'] ||= fetch('KEYWORDS').chomp('.').split(/; /) end |
- (Object) locus
LOCUS -- Locus class must be defined in child classes.
35 36 37 |
# File 'lib/bio/db/genbank/common.rb', line 35 def locus # must be overrided in each subclass end |
- (Object) nid
NID -- Returns contents of the NID record as a String.
78 79 80 |
# File 'lib/bio/db/genbank/common.rb', line 78 def nid field_fetch('NID') end |
- (Object) organism
125 126 127 |
# File 'lib/bio/db/genbank/common.rb', line 125 def organism source['organism'] end |
- (Object) origin
ORIGIN -- Returns contents of the ORIGIN record as a String.
263 264 265 266 267 268 269 270 271 |
# File 'lib/bio/db/genbank/common.rb', line 263 def origin unless @data['ORIGIN'] ori, seqstr = get('ORIGIN').split("\n", 2) seqstr ||= "" @data['ORIGIN'] = truncate(tag_cut(ori)) @data['SEQUENCE'] = seqstr.tr("0-9 \t\n\r\/", '') end @data['ORIGIN'] end |
- (Object) references
REFERENCE -- Returns contents of the REFERENCE records as an Array of Bio::Reference objects.
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
# File 'lib/bio/db/genbank/common.rb', line 136 def references unless @data['REFERENCE'] ary = [] toptag2array(get('REFERENCE')).each do |ref| hash = Hash.new subtag2array(ref).each do |field| case tag_get(field) when /REFERENCE/ if /(\d+)(\s*\((.+)\))?/m =~ tag_cut(field) then hash['embl_gb_record_number'] = $1.to_i if $3 and $3 != 'sites' then seqpos = $3 seqpos.sub!(/\A\s*bases\s+/, '') seqpos.gsub!(/(\d+)\s+to\s+(\d+)/, "\\1-\\2") seqpos.gsub!(/\s*\;\s*/, ', ') hash['sequence_position'] = seqpos end end when /AUTHORS/ = truncate(tag_cut(field)) = .split(/, /) [-1] = [-1].split(/\s+and\s+/) if [-1] = .flatten.map { |a| a.sub(/,/, ', ') } hash['authors'] = when /TITLE/ hash['title'] = truncate(tag_cut(field)) # CHECK Actually GenBank is not demanding for dot at the end of TITLE #+ '.' when /JOURNAL/ journal = truncate(tag_cut(field)) if journal =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/ hash['journal'] = $1 hash['volume'] = $2 hash['issue'] = $3 hash['pages'] = $4 hash['year'] = $5 else hash['journal'] = journal end when /MEDLINE/ hash['medline'] = truncate(tag_cut(field)) when /PUBMED/ hash['pubmed'] = truncate(tag_cut(field)) when /REMARK/ hash['comments'] ||= [] hash['comments'].push truncate(tag_cut(field)) end end ary.push(Reference.new(hash)) end @data['REFERENCE'] = ary.extend(Bio::References::BackwardCompatibility) end if block_given? @data['REFERENCE'].each do |r| yield r end else @data['REFERENCE'] end end |
- (Object) segment
SEGMENT -- Returns contents of the SEGMENT record as a "m/n" form String.
90 91 92 |
# File 'lib/bio/db/genbank/common.rb', line 90 def segment @data['SEGMENT'] ||= fetch('SEGMENT').scan(/\d+/).join("/") end |
- (Object) source
SOURCE -- Returns contents of the SOURCE record as a Hash.
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
# File 'lib/bio/db/genbank/common.rb', line 96 def source unless @data['SOURCE'] name, org = get('SOURCE').split('ORGANISM') org ||= "" if org[/\S+;/] organism = $` taxonomy = $& + $' elsif org[/\S+\./] # rs:NC_001741 organism = $` taxonomy = $& + $' else organism = org taxonomy = '' end @data['SOURCE'] = { 'common_name' => truncate(tag_cut(name)), 'organism' => truncate(organism), 'taxonomy' => truncate(taxonomy), } @data['SOURCE'].default = '' end @data['SOURCE'] end |
- (Object) taxonomy
129 130 131 |
# File 'lib/bio/db/genbank/common.rb', line 129 def taxonomy source['taxonomy'] end |
- (Object) version
Returns the VERSION part of the acc_version as a Fixnum
67 68 69 |
# File 'lib/bio/db/genbank/common.rb', line 67 def version acc_version.split(/\./).last.to_i end |
- (Object) versions
VERSION -- Returns contents of the VERSION record as an Array of Strings.
52 53 54 |
# File 'lib/bio/db/genbank/common.rb', line 52 def versions @data['VERSION'] ||= fetch('VERSION').split(/\s+/) end |