Class: Ensembl::DBRegistry::Base

Inherits:
ActiveRecord::Base
  • Object
show all
Defined in:
lib/bio-ensembl/db_connection.rb

Overview

The Ensembl::Registry::Base is a super class providing general methods to get database and connection info.

Direct Known Subclasses

Core::DBConnection, Variation::DBConnection

Class Method Summary collapse

Class Method Details

.generic_connect(db_type, species, release, args = {}) ⇒ Object



100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# File 'lib/bio-ensembl/db_connection.rb', line 100

def self.generic_connect(db_type, species, release, args = {})
  
  # check which release is used and load the correct VariationFeature version
  require (release < 62) ? File.dirname(__FILE__) + '/variation/variation_feature.rb' : File.dirname(__FILE__) + '/variation/variation_feature62.rb'
  Ensembl::SESSION.reset
  Ensembl::SESSION.release = release
  db_name = nil
  # if the connection is established with Ensembl Genomes, set the default port and host
  if args[:ensembl_genomes] then
    args[:port] = EG_PORT
    args[:host] = EG_HOST
  end    
  if args[:port].nil? then
    args[:port] = ( release > 47 ) ? 5306 : 3306
  end
  if args[:database]
    db_name = args[:database]
  else 
    db_name = self.get_name_from_db(db_type,species,release,args) # try to find the corresponding database 
  end 
  establish_connection(
                      :adapter => args[:adapter] || Ensembl::DB_ADAPTER,
                      :host => args[:host] || Ensembl::DB_HOST,
                      :database => db_name,
                      :username => args[:username] || Ensembl::DB_USERNAME,
                      :password => args[:password] || Ensembl::DB_PASSWORD,
                      :port => args[:port]
                    )
  
  self.retrieve_connection # Check if the connection is working

end

.get_infoObject



58
59
60
61
62
63
64
65
66
67
# File 'lib/bio-ensembl/db_connection.rb', line 58

def self.get_info
  host,user,password,db_name,port = self.retrieve_connection.instance_values["connection_options"]
  db_name =~/(\w+_\w+)_(core|variation|funcgen|compara)_(\d+)_\S+/
  species,release = $1,$3 # just works for standard Ensembl database names
  if species.nil? and release.nil? then
    raise NameError, "Can't get database name from #{db_name}. Are you using non conventional names?"
  else
    return host,user,password,db_name,port,species,release.to_i
  end
end

.get_name_from_db(db_type, species, release, args) ⇒ Object

Method to retrieve the name of a database, using species, release and connection parameters passed by the user.



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/bio-ensembl/db_connection.rb', line 71

def self.get_name_from_db(db_type,species,release,args)
  species = species.underscore.tr(' ','_') # Always in lowercase. This keeps things simple when dealing with complex species names like in Ensembl Genomes database
  dummy_db = DummyDBConnection.connect(args)
  dummy_connection = dummy_db.connection
  # check if a database exists with exactly the species name passed (regular way)
  db_name = dummy_connection.select_values("SHOW DATABASES LIKE '%#{species}_#{db_type}_#{release.to_s}%'")[0]
  # if a database is not found and we are working on Ensembl Genomes database...
  if db_name.nil? and args[:ensembl_genomes] then
    words = species.split(/_/)
    first = words.shift
    # ...try to find a collection database using the first name of the species passed (convention used for collection databases)
    db_name = dummy_connection.select_values("SHOW DATABASES").select {|d| d=~/#{first}.*_collection_#{db_type}_#{release.to_s}/}[0]
    # if a collection database match is found, then look inside to find the species
    if db_name != nil then
      dummy_db.disconnect! # close the generic connection with the host
      args[:database] = db_name
      dummy_db = DummyDBConnection.connect(args) # open a new connection directly with the collection database
      species_name = species.gsub(first,first[0..0]) # transform the species name, so it can match the species names stored in the collection database
      Ensembl::SESSION.collection_species = species_name # set the species used for this session, so it's easier to fetch slices from the genome of that species
      
      # check that the species passed is present in the collection database, otherwise returns a warning
      exists = dummy_db.connection.select_values("SELECT species_id FROM meta WHERE LOWER(meta_value) = '#{species_name}' AND meta_key = 'species.db_name'")[0]
      warn "WARNING: No species '#{species}' found in the database. Please check that the name is correct." if !exists
    end
  end
  warn "WARNING: No connection to database established. Check that the species is in snake_case (was: #{species})." if db_name.nil?
  return db_name
end