Module: Populator

Includes:
XlsParser
Included in:
Vardb
Defined in:
lib/vardb/database_populator.rb

Instance Method Summary collapse

Methods included from XlsParser

load_meta_fields

Instance Method Details

#populate_matrixObject



7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
# File 'lib/vardb/database_populator.rb', line 7

def populate_matrix

  host = ConfigData.get_connection

  conn = PGconn.connect(:host => host[:host], :port => host[:port], :dbname => host[:dbname], :user => host[:user], :password => host[:password])
  
  #Matrix File Command Preparation
  conn.prepare('load_snps', 'INSERT INTO snps (id, locus, annotation_id) values ($1, $2, $3)')
  conn.prepare('load_annos', 'INSERT INTO annotations (id, cds, transcript, transcript_id, info, orientation, cds_locus, codon_pos, codon, peptide, amino_a, syn ) values ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11, $12)')
  conn.prepare('load_samples_snps', 'INSERT INTO samples_snps (sample_id, snp_id) values ($1, $2)')
  conn.prepare('load_samples', 'INSERT INTO samples (id, name) values ($1, $2)')

  #Matrix File Load-ins
  text=File.open(ConfigData.get_matrix).read

  linenum = 1
  sample_number = 1

  snps = []
  anno_tabs = []
  anno_vals = []

  text.each_line do |line|
(header, line_data) = line.split(' ', 2)
if (header == '#snp_pos')
  puts "populating snps table..."
  snps = line_data.split("\t")
  snp_counter = 1
  snps.each do |locus| 
    conn.exec_prepared('load_snps', [snp_counter, locus, snp_counter])
    snp_counter += 1
  end  
elsif (header == '#annotation')
  puts "populating annotations table..."
  anno_tabs = line_data.split("\t")
  anno_tabs.each { |tab| anno_vals << tab.split(',', 11) }
  anno_counter = 1
        anno_vals.each do |anno|
         anno.insert(0, anno_counter)
         if anno[1].match('intergenic')
            conn.exec_prepared('load_annos', [ anno[0], 0, 0, 0,  anno[1], 0, 0, 0, 0, 0, 0, 0 ])
            else 
                conn.exec_prepared('load_annos', [ anno[0], anno[1], anno[2], anno[3], anno[4], anno[5], anno[6], anno[7], anno[8], anno[9], anno[10], anno[11] ])
            end
            anno_counter += 1  
        end
else
  if sample_number == 1 then
      puts "loading reference..."
    else
      puts "loading in sample #{sample_number - 1}..."
  end  
  conn.exec_prepared('load_samples', [sample_number, header])
  line_data.split("\t").each_with_index do |n, i|
    if (n == '1')
      conn.exec_prepared('load_samples_snps', [sample_number, i])
    end
       end
       sample_number += 1
end
    linenum += 1
  end
end

#populate_metadataObject



71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
# File 'lib/vardb/database_populator.rb', line 71

def 

  host = ConfigData.get_connection

  conn = PGconn.connect(:host => host[:host], :port => host[:port], :dbname => host[:dbname], :user => host[:user], :password => host[:password])

  #Excel Spreadsheet Command Preparaton
   = XlsParser.load_meta_fields(ConfigData.)

   = "id "

  .each do |item|
 << item
  end

   = "$1 "

  .length.times do |i|
     << ", $#{i+2}"
  end

  conn.prepare('load_metadata', "INSERT INTO sample_metadata (#{metadata_fields_string}) values (#{metadata_values_string})")

  #Excel Spreadsheet Load-ins
  s = Roo::Excel.new(ConfigData.)
  s.default_sheet = s.sheets.first

  row = 2

  puts "populating sample metadata..."

  until s.cell(row, 1).nil?
row_contents = ["#{row-1}"]
.length.times do |i|
  row_contents << "#{s.cell(row, i)}"
end
conn.exec_prepared('load_metadata', row_contents)
row += 1
  end
end