Class: Transfuse::Cluster

Inherits:
Object
  • Object
show all
Defined in:
lib/transfuse/cluster.rb

Instance Method Summary collapse

Constructor Details

#initialize(threads, verbose, id) ⇒ Cluster

Returns a new instance of Cluster.



8
9
10
11
12
13
14
# File 'lib/transfuse/cluster.rb', line 8

def initialize threads, verbose, id
  @vsearch = Which::which('vsearch').first
  raise "vsearch was not in the PATH - please install it" unless @vsearch
  @id = id.to_s
  @threads = threads
  @verbose = verbose
end

Instance Method Details

#generate_vsearch_command(fasta, out, msa) ⇒ Object



32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/transfuse/cluster.rb', line 32

def generate_vsearch_command fasta, out, msa
  vsearch = "#{@vsearch}"
  vsearch << " --cluster_fast #{fasta}"
  vsearch << " --id #{@id}"
  vsearch << " --iddef 0" # cd-hit definition of sequence id
  vsearch << " --qmask none" # no masking
  vsearch << " --strand both"
  vsearch << " --uc #{out}"
  vsearch << " --msaout #{msa}"
  vsearch << " --threads #{@threads}"
  return vsearch
end

#parse_vsearch_output(cluster_output, msa_output) ⇒ Object



45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
# File 'lib/transfuse/cluster.rb', line 45

def parse_vsearch_output cluster_output, msa_output
  print "parsing vsearch output" if @verbose
  clusters = {}
  lookup = {}
  second = 0
  count = 0
  File.open(cluster_output).each_line do |line|
    count+=1
    if line.start_with?("S") or line.start_with?("H")
      cols = line.chomp.split("\t")
      cluster = cols[1]
      len = cols[2].to_i
      cigar = cols[7]
      strand = cols[4]
      strand = "+" if strand == "*"
      contig_name = cols[8]

      clusters[cluster] ||= []
      clusters[cluster] << { :name => contig_name, :strand => strand }
      lookup[contig_name] = cluster
    end
    if count%10_000==0 and @verbose
      print "."
    end
  end
  puts " Done" if @verbose
  print "parsing msa output    " if @verbose
  count = 0
  msa = {}
  Bio::FastaFormat.open(msa_output).each do |entry|
    count += 1
    name = entry.entry_id
    if name != "consensus"
      # name = name[1..-1]
      if name[0]=="*"
        name = name[1..-1]
      end
      # what cluster is name in?
      cluster = lookup[name]
      msa[cluster] ||= []
      msa[cluster] << { :name => name, :seq => entry.seq.seq }
    end
    if count%10_000==0 and @verbose
      print "."
    end

  end
  puts " Done" if @verbose
  return msa
end

#run(fasta) ⇒ Object



16
17
18
19
# File 'lib/transfuse/cluster.rb', line 16

def run fasta
  cluster_output, msa_output = vsearch fasta
  return parse_vsearch_output(cluster_output, msa_output)
end

#vsearch(fasta) ⇒ Object



21
22
23
24
25
26
27
28
29
30
# File 'lib/transfuse/cluster.rb', line 21

def vsearch fasta
  print "running vsearch..." if @verbose
  cluster_output = "#{File.basename(fasta)}-#{@id}.clust"
  msa_output = "#{File.basename(fasta)}-#{@id}.aln"
  vsearch_cmd = generate_vsearch_command fasta, cluster_output, msa_output
  cluster = Cmd.new vsearch_cmd
  cluster.run cluster_output
  puts " Done. Created #{cluster_output}" if @verbose
  return [cluster_output, msa_output]
end