Class: Bio::SQL::Sequence

Inherits:
Object show all
Defined in:
lib/bio/io/sql.rb

Overview

for lazy fetching

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(dbh, entry) ⇒ Sequence

Returns a new instance of Sequence.



48
49
50
51
52
53
54
55
56
# File 'lib/bio/io/sql.rb', line 48

def initialize(dbh, entry)
  @dbh = dbh
  @bioentry_id = entry['bioentry_id']
  @database_id = entry['biodatabase_id']
  @entry_id = entry['display_id']
  @accession = entry['accession']
  @version = entry['entry_version']
  @division = entry['division']
end

Instance Attribute Details

#accessionObject (readonly)

Returns the value of attribute accession.



57
58
59
# File 'lib/bio/io/sql.rb', line 57

def accession
  @accession
end

#divisionObject (readonly)

Returns the value of attribute division.



57
58
59
# File 'lib/bio/io/sql.rb', line 57

def division
  @division
end

#entry_idObject (readonly)

Returns the value of attribute entry_id.



57
58
59
# File 'lib/bio/io/sql.rb', line 57

def entry_id
  @entry_id
end

#versionObject (readonly)

Returns the value of attribute version.



57
58
59
# File 'lib/bio/io/sql.rb', line 57

def version
  @version
end

Instance Method Details

#commentObject

Returns the first comment. For complete comments, use comments method.



163
164
165
166
167
# File 'lib/bio/io/sql.rb', line 163

def comment
  query = "select * from comment where bioentry_id = ?"
  row = @dbh.execute(query, @bioentry_id).fetch
  row ? row['comment_text'] : ''
end

#commentsObject

Returns comments in an Array of Strings.



170
171
172
173
174
175
176
177
178
179
# File 'lib/bio/io/sql.rb', line 170

def comments
  array = []
  query = "select * from comment where bioentry_id = ?"
  @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
    next unless row
    rank = row['rank'].to_i - 1
    array[rank] = row['comment_text']
  end
  return array
end

#common_nameObject



230
231
232
233
# File 'lib/bio/io/sql.rb', line 230

def common_name
  taxonomy unless @common_name
  return @common_name
end

#databaseObject



181
182
183
184
185
# File 'lib/bio/io/sql.rb', line 181

def database
  query = "select * from biodatabase where biodatabase_id = ?"
  row = @dbh.execute(query, @database_id).fetch
  row ? row['name'] : ''
end

#dateObject



187
188
189
190
191
# File 'lib/bio/io/sql.rb', line 187

def date
  query = "select * from bioentry_date where bioentry_id = ?"
  row = @dbh.execute(query, @bioentry_id).fetch
  row ? row['date'] : ''
end


193
194
195
196
197
# File 'lib/bio/io/sql.rb', line 193

def dblink
  query = "select * from bioentry_direct_links where source_bioentry_id = ?"
  row = @dbh.execute(query, @bioentry_id).fetch
  row ? [row['dbname'], row['accession']] : []
end

#definitionObject



199
200
201
202
203
# File 'lib/bio/io/sql.rb', line 199

def definition
  query = "select * from bioentry_description where bioentry_id = ?"
  row = @dbh.execute(query, @bioentry_id).fetch
  row ? row['description'] : ''
end

#featuresObject

Returns Bio::Features object.



104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/bio/io/sql.rb', line 104

def features
  array = []
  query = "select * from seqfeature where bioentry_id = ?"
  @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
    next unless row

    f_id = row['seqfeature_id']
    k_id = row['type_term_id']
    s_id = row['source_term_id']
    rank = row['rank'].to_i - 1

    # key : type (gene, CDS, ...)
    type = feature_key(k_id)

    # source : database (EMBL/GenBank/SwissProt)
    database = feature_source(s_id)

    # location : position
    locations = feature_locations(f_id)

    # qualifier
    qualifiers = feature_qualifiers(f_id)

    # rank
    array[rank] = Bio::Feature.new(type, locations, qualifiers)
  end
  return Bio::Features.new(array)
end

#keywordObject



205
206
207
208
209
# File 'lib/bio/io/sql.rb', line 205

def keyword
  query = "select * from bioentry_keywords where bioentry_id = ?"
  row = @dbh.execute(query, @bioentry_id).fetch
  row ? row['keywords'] : ''
end

#lineageObject



225
226
227
228
# File 'lib/bio/io/sql.rb', line 225

def lineage
  taxonomy unless @lineage
  return @lineage
end

#ncbi_taxa_idObject



235
236
237
238
# File 'lib/bio/io/sql.rb', line 235

def ncbi_taxa_id
  taxonomy unless @ncbi_taxa_id
  return @ncbi_taxa_id
end

#referencesObject

Returns reference informations in Array of Hash (not Bio::Reference).



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
# File 'lib/bio/io/sql.rb', line 135

def references
  array = []
  query = <<-END
    select * from bioentry_reference, reference
    where bioentry_id = ? and
    bioentry_reference.reference_id = reference.reference_id
  END
  @dbh.execute(query, @bioentry_id).fetch_all.each do |row|
    next unless row

    hash = {
      'start'	=> row['start_pos'],
      'end'		=> row['end_pos'],
      'journal'	=> row['location'],
      'title'	=> row['title'],
      'authors'	=> row['authors'],
      'medline'	=> row['crc']
    }
    hash.default = ''

    rank = row['rank'].to_i - 1
    array[rank] = hash
  end
  return array
end

#seqObject

Returns Bio::Sequence::NA or AA object.



67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/bio/io/sql.rb', line 67

def seq
  query = "select * from biosequence where bioentry_id = ?"
  row = @dbh.execute(query, @bioentry_id).fetch
  return unless row

  mol = row['alphabet']
  seq = row['seq']

  case mol
  when /.na/i			# 'dna' or 'rna'
    Bio::Sequence::NA.new(seq)
  else				# 'protein'
    Bio::Sequence::AA.new(seq)
  end
end

#subseq(from, to) ⇒ Object

Returns Bio::Sequence::NA or AA object (by lazy fetching).



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'lib/bio/io/sql.rb', line 84

def subseq(from, to)
  length = to - from + 1
  query = "select alphabet, substring(seq, ?, ?) as subseq" +
          " from biosequence where bioentry_id = ?"
  row = @dbh.execute(query, from, length, @bioentry_id).fetch
  return unless row

  mol = row['alphabet']
  seq = row['subseq']

  case mol
  when /.na/i			# 'dna' or 'rna'
    Bio::Sequence::NA.new(seq)
  else				# 'protein'
    Bio::Sequence::AA.new(seq)
  end
end

#taxonomyObject

Use lineage, common_name, ncbi_taxa_id methods to extract in detail.



212
213
214
215
216
217
218
219
220
221
222
223
# File 'lib/bio/io/sql.rb', line 212

def taxonomy
  query = <<-END
    select taxon_name.name, taxon.ncbi_taxon_id from bioentry
    join taxon_name using(taxon_id) join taxon using (taxon_id)
    where bioentry_id = ?
  END
  row = @dbh.execute(query, @bioentry_id).fetch
#     @lineage = row ? row['full_lineage'] : ''
  @common_name = row ? row['name'] : ''
  @ncbi_taxa_id = row ? row['ncbi_taxon_id'] : ''
  row ? [@lineage, @common_name, @ncbi_taxa_id] : []
end

#to_fastaObject



60
61
62
63
64
# File 'lib/bio/io/sql.rb', line 60

def to_fasta
  if seq = seq
    return seq.to_fasta(@accession)
  end
end