Module: Bio::NCBIDB::Common
Overview
Description
This module defines a common framework among GenBank, GenPept, RefSeq, and DDBJ. For more details, see the documentations in each genbank/*.rb files.
References
Constant Summary collapse
- DELIMITER =
RS = "\n//\n"
- TAGSIZE =
12
Instance Method Summary collapse
-
#acc_version ⇒ Object
Returns the first part of the VERSION record as “ACCESSION.VERSION” String.
-
#accession ⇒ Object
Returns the ACCESSION part of the acc_version.
-
#accessions ⇒ Object
ACCESSION – Returns contents of the ACCESSION record as an Array.
-
#comment ⇒ Object
COMMENT – Returns contents of the COMMENT record as a String.
- #common_name ⇒ Object (also: #vernacular_name)
-
#definition ⇒ Object
DEFINITION – Returns contents of the DEFINITION record as a String.
-
#features ⇒ Object
FEATURES – Returns contents of the FEATURES record as an array of Bio::Feature objects.
-
#gi ⇒ Object
Returns the second part of the VERSION record as a “GI:#######” String.
- #initialize(entry) ⇒ Object
-
#keywords ⇒ Object
KEYWORDS – Returns contents of the KEYWORDS record as an Array of Strings.
-
#locus ⇒ Object
LOCUS – Locus class must be defined in child classes.
-
#nid ⇒ Object
NID – Returns contents of the NID record as a String.
- #organism ⇒ Object
-
#origin ⇒ Object
ORIGIN – Returns contents of the ORIGIN record as a String.
-
#references ⇒ Object
REFERENCE – Returns contents of the REFERENCE records as an Array of Bio::Reference objects.
-
#segment ⇒ Object
SEGMENT – Returns contents of the SEGMENT record as a “m/n” form String.
-
#source ⇒ Object
SOURCE – Returns contents of the SOURCE record as a Hash.
- #taxonomy ⇒ Object
-
#version ⇒ Object
Returns the VERSION part of the acc_version as a Fixnum.
-
#versions ⇒ Object
VERSION – Returns contents of the VERSION record as an Array of Strings.
Instance Method Details
#acc_version ⇒ Object
Returns the first part of the VERSION record as “ACCESSION.VERSION” String.
57 58 59 |
# File 'lib/bio/db/genbank/common.rb', line 57 def acc_version versions.first.to_s end |
#accession ⇒ Object
Returns the ACCESSION part of the acc_version.
62 63 64 |
# File 'lib/bio/db/genbank/common.rb', line 62 def accession acc_version.split(/\./).first.to_s end |
#accessions ⇒ Object
ACCESSION – Returns contents of the ACCESSION record as an Array.
46 47 48 |
# File 'lib/bio/db/genbank/common.rb', line 46 def accessions field_fetch('ACCESSION').strip.split(/\s+/) end |
#comment ⇒ Object
COMMENT – Returns contents of the COMMENT record as a String.
199 200 201 202 203 204 |
# File 'lib/bio/db/genbank/common.rb', line 199 def comment str = get('COMMENT').to_s.sub(/\ACOMMENT /, '') str.gsub!(/^ {12}/, '') str.chomp! str end |
#common_name ⇒ Object Also known as: vernacular_name
120 121 122 |
# File 'lib/bio/db/genbank/common.rb', line 120 def common_name source['common_name'] end |
#definition ⇒ Object
DEFINITION – Returns contents of the DEFINITION record as a String.
40 41 42 |
# File 'lib/bio/db/genbank/common.rb', line 40 def definition field_fetch('DEFINITION') end |
#features ⇒ Object
FEATURES – Returns contents of the FEATURES record as an array of Bio::Feature objects.
209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 |
# File 'lib/bio/db/genbank/common.rb', line 209 def features unless @data['FEATURES'] ary = [] in_quote = false get('FEATURES').each_line do |line| next if line =~ /^FEATURES/ # feature type (source, CDS, ...) head = line[0,20].to_s.strip # feature value (position or /qualifier=) body = line[20,60].to_s.chomp # sub-array [ feature type, position, /q="data", ... ] if line =~ /^ {5}\S/ ary.push([ head, body ]) # feature qualifier start (/q="data..., /q="data...", /q=data, /q) elsif body =~ /^ \// and not in_quote # gb:IRO125195 ary.last.push(body) # flag for open quote (/q="data...) if body =~ /="/ and body !~ /"$/ in_quote = true end # feature qualifier continued (...data..., ...data...") else ary.last.last << body # flag for closing quote (/q="data... lines ...") if body =~ /"$/ in_quote = false end end end ary.collect! do || parse_qualifiers() end @data['FEATURES'] = ary.extend(Bio::Features::BackwardCompatibility) end if block_given? @data['FEATURES'].each do |f| yield f end else @data['FEATURES'] end end |
#gi ⇒ Object
Returns the second part of the VERSION record as a “GI:#######” String.
72 73 74 |
# File 'lib/bio/db/genbank/common.rb', line 72 def gi versions.last end |
#initialize(entry) ⇒ Object
30 31 32 |
# File 'lib/bio/db/genbank/common.rb', line 30 def initialize(entry) super(entry, TAGSIZE) end |
#keywords ⇒ Object
KEYWORDS – Returns contents of the KEYWORDS record as an Array of Strings.
84 85 86 |
# File 'lib/bio/db/genbank/common.rb', line 84 def keywords @data['KEYWORDS'] ||= fetch('KEYWORDS').chomp('.').split(/; /) end |
#locus ⇒ Object
LOCUS – Locus class must be defined in child classes.
35 36 37 |
# File 'lib/bio/db/genbank/common.rb', line 35 def locus # must be overrided in each subclass end |
#nid ⇒ Object
NID – Returns contents of the NID record as a String.
78 79 80 |
# File 'lib/bio/db/genbank/common.rb', line 78 def nid field_fetch('NID') end |
#organism ⇒ Object
125 126 127 |
# File 'lib/bio/db/genbank/common.rb', line 125 def organism source['organism'] end |
#origin ⇒ Object
ORIGIN – Returns contents of the ORIGIN record as a String.
263 264 265 266 267 268 269 270 271 |
# File 'lib/bio/db/genbank/common.rb', line 263 def origin unless @data['ORIGIN'] ori, seqstr = get('ORIGIN').split("\n", 2) seqstr ||= "" @data['ORIGIN'] = truncate(tag_cut(ori)) @data['SEQUENCE'] = seqstr.tr("0-9 \t\n\r\/", '') end @data['ORIGIN'] end |
#references ⇒ Object
REFERENCE – Returns contents of the REFERENCE records as an Array of Bio::Reference objects.
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 |
# File 'lib/bio/db/genbank/common.rb', line 136 def references unless @data['REFERENCE'] ary = [] toptag2array(get('REFERENCE')).each do |ref| hash = Hash.new subtag2array(ref).each do |field| case tag_get(field) when /REFERENCE/ if /(\d+)(\s*\((.+)\))?/m =~ tag_cut(field) then hash['embl_gb_record_number'] = $1.to_i if $3 and $3 != 'sites' then seqpos = $3 seqpos.sub!(/\A\s*bases\s+/, '') seqpos.gsub!(/(\d+)\s+to\s+(\d+)/, "\\1-\\2") seqpos.gsub!(/\s*\;\s*/, ', ') hash['sequence_position'] = seqpos end end when /AUTHORS/ = truncate(tag_cut(field)) = .split(/, /) [-1] = [-1].split(/\s+and\s+/) if [-1] = .flatten.map { |a| a.sub(/,/, ', ') } hash['authors'] = when /TITLE/ hash['title'] = truncate(tag_cut(field)) # CHECK Actually GenBank is not demanding for dot at the end of TITLE #+ '.' when /JOURNAL/ journal = truncate(tag_cut(field)) if journal =~ /(.*) (\d+) \((\d+)\), (\d+-\d+) \((\d+)\)$/ hash['journal'] = $1 hash['volume'] = $2 hash['issue'] = $3 hash['pages'] = $4 hash['year'] = $5 else hash['journal'] = journal end when /MEDLINE/ hash['medline'] = truncate(tag_cut(field)) when /PUBMED/ hash['pubmed'] = truncate(tag_cut(field)) when /REMARK/ hash['comments'] ||= [] hash['comments'].push truncate(tag_cut(field)) end end ary.push(Reference.new(hash)) end @data['REFERENCE'] = ary.extend(Bio::References::BackwardCompatibility) end if block_given? @data['REFERENCE'].each do |r| yield r end else @data['REFERENCE'] end end |
#segment ⇒ Object
SEGMENT – Returns contents of the SEGMENT record as a “m/n” form String.
90 91 92 |
# File 'lib/bio/db/genbank/common.rb', line 90 def segment @data['SEGMENT'] ||= fetch('SEGMENT').scan(/\d+/).join("/") end |
#source ⇒ Object
SOURCE – Returns contents of the SOURCE record as a Hash.
96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
# File 'lib/bio/db/genbank/common.rb', line 96 def source unless @data['SOURCE'] name, org = get('SOURCE').split('ORGANISM') org ||= "" if org[/\S+;/] organism = $` taxonomy = $& + $' elsif org[/\S+\./] # rs:NC_001741 organism = $` taxonomy = $& + $' else organism = org taxonomy = '' end @data['SOURCE'] = { 'common_name' => truncate(tag_cut(name)), 'organism' => truncate(organism), 'taxonomy' => truncate(taxonomy), } @data['SOURCE'].default = '' end @data['SOURCE'] end |
#taxonomy ⇒ Object
129 130 131 |
# File 'lib/bio/db/genbank/common.rb', line 129 def taxonomy source['taxonomy'] end |
#version ⇒ Object
Returns the VERSION part of the acc_version as a Fixnum
67 68 69 |
# File 'lib/bio/db/genbank/common.rb', line 67 def version acc_version.split(/\./).last.to_i end |
#versions ⇒ Object
VERSION – Returns contents of the VERSION record as an Array of Strings.
52 53 54 |
# File 'lib/bio/db/genbank/common.rb', line 52 def versions @data['VERSION'] ||= fetch('VERSION').split(/\s+/) end |