Module: Ensembl::FTP
- Defined in:
- lib/rbbt/sources/ensembl_ftp.rb
Constant Summary collapse
- SERVER =
"ftp.ensembl.org"
Class Method Summary collapse
- .base_url(organism) ⇒ Object
- .ensembl_tsv(organism, table, key_field = nil, fields = nil, options = {}) ⇒ Object
- .fields_for(organism, table) ⇒ Object
- .ftp_directory_for(organism) ⇒ Object
- .ftp_name_for(organism) ⇒ Object
- .has_table?(organism, table) ⇒ Boolean
- .mysql_path(release) ⇒ Object
- .url_for(organism, table) ⇒ Object
Class Method Details
.base_url(organism) ⇒ Object
51 52 53 |
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 51 def self.base_url(organism) File.join("ftp://" + SERVER, ftp_directory_for(organism) ) end |
.ensembl_tsv(organism, table, key_field = nil, fields = nil, options = {}) ⇒ Object
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 71 def self.ensembl_tsv(organism, table, key_field = nil, fields = nil, = {}) url = url_for(organism, table) if key_field and fields all_fields = fields_for(organism, table) key_pos = all_fields.index key_field field_pos = fields.collect{|f| all_fields.index f} [:key_field] = key_pos [:fields] = field_pos end tsv = TSV.open(url, ) tsv.key_field = key_field tsv.fields = fields tsv end |
.fields_for(organism, table) ⇒ Object
64 65 66 67 68 69 |
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 64 def self.fields_for(organism, table) sql_file = Open.read("#{base_url(organism)}/#{File.basename(base_url(organism))}.sql.gz") chunk = sql_file.match(/^CREATE TABLE .#{table}. \((.*?)^\)/sm)[1] chunk.scan(/^\s+`(.*?)`/).flatten end |
.ftp_directory_for(organism) ⇒ Object
42 43 44 45 46 47 48 49 |
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 42 def self.ftp_directory_for(organism) release, ftp_name = ftp_name_for(organism) if release == 'current' File.join('/pub/', 'current_mysql', ftp_name) else File.join('/pub/', release, 'mysql', ftp_name) end end |
.ftp_name_for(organism) ⇒ Object
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 16 def self.ftp_name_for(organism) code, build = organism.split "/" build ||= "current" if build.to_s == "current" release = 'current' name = Organism.scientific_name(organism) ftp = Net::FTP.new(Ensembl::FTP::SERVER) ftp.passive = true ftp.login ftp.chdir(File.join('pub', 'current_mysql')) file = ftp.list(name.downcase.gsub(" ",'_') + "_core_*").collect{|l| l.split(" ").last}.last ftp.close else release = Ensembl.releases[build] name = Organism.scientific_name(organism) ftp = Net::FTP.new(Ensembl::FTP::SERVER) ftp.passive = true ftp.login ftp.chdir(File.join('pub', release, 'mysql')) file = ftp.list(name.downcase.gsub(" ",'_') + "_core_*").collect{|l| l.split(" ").last}.last ftp.close end [release, file] end |
.has_table?(organism, table) ⇒ Boolean
59 60 61 62 |
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 59 def self.has_table?(organism, table) sql_file = Open.read("#{base_url(organism)}/#{File.basename(base_url(organism))}.sql.gz") ! sql_file.match(/^CREATE TABLE .#{table}. \((.*?)^\)/sm).nil? end |
.mysql_path(release) ⇒ Object
13 14 |
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 13 def self.mysql_path(release) end |
.url_for(organism, table) ⇒ Object
55 56 57 |
# File 'lib/rbbt/sources/ensembl_ftp.rb', line 55 def self.url_for(organism, table) "#{base_url(organism)}/#{table}.txt.gz" end |