Class: BacterialComparator

Inherits:
Object
  • Object
show all
Defined in:
lib/bacterial-comparator.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(options, root) ⇒ BacterialComparator

Initialize BacterialAnnotator options, options, ROOT, options, options)



19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/bacterial-comparator.rb', line 19

def initialize options, root

  @root = root
  @outdir = options[:outdir]
  Dir.mkdir(@outdir) if ! Dir.exists? @outdir
  @genomes_list = options[:genomes_list]
  @proc = options[:proc].to_i

  min_cov = options[:min_cov].to_f
  min_pid = options[:pidentity].to_f
  if min_cov > 1
    min_cov = min_cov/100
  end
  if min_pid > 1
    min_pid = min_pid/100
  end

  @ref_prot = get_ref_prot
  @synteny = read_prot_synteny
  @stats = extract_syntenic_fasta min_cov, min_pid

end

Instance Attribute Details

#genomes_listObject (readonly)

Returns the value of attribute genomes_list.



15
16
17
# File 'lib/bacterial-comparator.rb', line 15

def genomes_list
  @genomes_list
end

#statsObject (readonly)

Returns the value of attribute stats.



15
16
17
# File 'lib/bacterial-comparator.rb', line 15

def stats
  @stats
end

Instance Method Details

#build_multifasta(ref_prot, synteny) ⇒ Object



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
# File 'lib/bacterial-comparator.rb', line 84

def build_multifasta ref_prot, synteny

  pep_out_dir = "./#{@outdir}/genes-align-pep"
  dna_out_dir = "./#{@outdir}/genes-align-dna"

  # create multifasta by syntenic proteins (pep)
  if ! File.exists? pep_out_dir+"/#{ref_prot}.pep"
    pep_out = File.open(pep_out_dir+"/#{ref_prot}.pep", "w")
    pep_file = Dir["#{@genomes_list[0]}/*.pep"]
    flatfile = Bio::FlatFile.auto("#{pep_file[0]}")
    pep_out.write(get_sequence_from_flatfile flatfile, ref_prot)
    @genomes_list.each_with_index do |g,i|
      flatfile = Bio::FlatFile.auto("#{g}/Proteins.fa")
      pep_out.write(get_sequence_from_flatfile flatfile, synteny[i][:query_prot])
    end
    pep_out.close
  end

  # create multifasta by syntenic genes (dna)
  if ! File.exists? dna_out_dir+"/#{ref_prot}.dna"
    dna_out = File.open(dna_out_dir+"/#{ref_prot}.dna", "w")
    # create multifasta by syntenic proteins
    dna_file = Dir["#{@genomes_list[0]}/*.dna"]
    flatfile = Bio::FlatFile.auto("#{dna_file[0]}")
    dna_out.write(get_sequence_from_flatfile flatfile, ref_prot)
    @genomes_list.each_with_index do |g,i|
      flatfile = Bio::FlatFile.auto("#{g}/Genes.fa")
      dna_out.write(get_sequence_from_flatfile flatfile, synteny[i][:query_prot])
    end
    dna_out.close
  end

end

#extract_syntenic_fasta(min_cov, min_pid) ⇒ Object



119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
# File 'lib/bacterial-comparator.rb', line 119

def extract_syntenic_fasta min_cov, min_pid

  "# Extracting Proteins and Genes multifasta.."
  nb_of_syntenic = 0
  stats = {}
  stats[:syntenic] = []
  fout = File.open("#{@outdir}/cds-synteny.tsv", "w")
  fout.write("Gene\t"+@genomes_list.join("\t")+"\n")

  to_build_multifasta = []

  @synteny.each do |k,v|
    is_syntenic = 1
    v.each do |v_|
      if v_[:query_cov].nil?
        is_syntenic = 0
        break
      elsif v_[:query_cov] > min_cov and
           v_[:ref_cov] > min_cov and
           v_[:pId] > min_pid
      # synteny -> great !
      else
        is_syntenic = 0
        break
      end
    end

    if is_syntenic == 1
      nb_of_syntenic += 1
      # build_multifasta k, v
      to_build_multifasta << [k,v]
      fout.write("#{k}")
      v.each do |x|
        fout.write("\t#{x[:query_prot]}|#{x[:query_cov]}|#{x[:ref_cov]}")
        stats[:syntenic] << k
      end
      fout.write("\n")
    end

  end

  fout.close

  pep_out_dir = "./#{@outdir}/genes-align-pep"
  dna_out_dir = "./#{@outdir}/genes-align-dna"
  Dir.mkdir(pep_out_dir) if ! Dir.exists? pep_out_dir
  Dir.mkdir(dna_out_dir) if ! Dir.exists? dna_out_dir

  Parallel.map(to_build_multifasta, in_processes: @proc) { |k,v|
    build_multifasta k, v
  }

  stats[:nb_of_syntenic] = nb_of_syntenic
  puts "Syntenic genes : " + nb_of_syntenic.to_s + " / " + @ref_prot.length.to_s

end

#get_ref_protObject



59
60
61
62
63
64
65
66
67
# File 'lib/bacterial-comparator.rb', line 59

def get_ref_prot
  ref_prot = []
  pep_file = Dir["#{@genomes_list[0]}/*.pep"]
  flatfile = Bio::FlatFile.auto("#{pep_file[0]}")
  flatfile.each_entry do |entry|
    ref_prot << entry.definition.split(" ")[0]
  end
  ref_prot
end

#get_sequence_from_flatfile(flatfile, name) ⇒ Object



70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/bacterial-comparator.rb', line 70

def get_sequence_from_flatfile flatfile, name

  out = ""
  flatfile.each_entry do |entry|
    if entry.definition.split(" ")[0] == name
      bioseq = Bio::Sequence.auto(entry.seq)
      out = bioseq.output_fasta("#{name}",60)
    end
  end
  out

end

#mafft_align(f) ⇒ Object



177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
# File 'lib/bacterial-comparator.rb', line 177

def mafft_align f

  trying = 0
  begin
    cmd = system("#{@root}/mafft.linux --quiet #{f} > #{f}.aln")
    if File.size("#{f}.aln") == 0
      puts "File size of 0.. --#{f}--"
      puts "Command used : #{@root}/mafft.linux --quiet #{f} > #{f}.aln"
      fail
    else
      status = "OK"
      status = "FAILED" if cmd != true
      puts "Alignment #{f} : #{status}"
    end
  rescue
    if trying < 3
      trying += 1
      retry
    end
    status = "FAILED"
    puts "Alignment #{f} : #{status}"
  end

end

#mafft_align_all_dnaObject



211
212
213
214
215
216
217
218
219
# File 'lib/bacterial-comparator.rb', line 211

def mafft_align_all_dna
  puts "# MAFFT multialign all gene sequences.."
  puts "# MAFFT multialign all protein sequences.."
  Dir.chdir("#{@outdir}/genes-align-dna/")
  Parallel.map(Dir["*.dna"], in_processes: @proc) { |f|
    mafft_align f
  }
  Dir.chdir("..")
end

#mafft_align_all_pepObject



202
203
204
205
206
207
208
209
# File 'lib/bacterial-comparator.rb', line 202

def mafft_align_all_pep
  puts "# MAFFT multialign all protein sequences.."
  Dir.chdir("#{@outdir}/genes-align-pep/")
  Parallel.map(Dir["*.pep"], in_processes: @proc) { |f|
    mafft_align f
  }
  Dir.chdir("..")
end

#read_prot_syntenyObject



42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
# File 'lib/bacterial-comparator.rb', line 42

def read_prot_synteny
  synteny = {}
  @genomes_list.each do |g|
    puts "#{g}/Prot-Synteny.tsv"
    file = File.open("#{g}/Prot-Synteny.tsv", "r")
    l = file.gets             # skip header
    while l = file.gets
      # AAK98805.1      spr0001 453     1.0     100.0   ABAC01000005_14 453     1.0
      lA = l.chomp.split("\t")
      synteny[lA[0]] =  [] if ! synteny.has_key? lA[0]
      synteny[lA[0]] << {ref_cov: lA[3].to_f, pId: lA[4].to_f, query_prot: lA[5], query_cov: lA[7].to_f}
    end
    file.close
  end
  synteny
end