Class: Transfuse::Cluster
- Inherits:
-
Object
- Object
- Transfuse::Cluster
- Defined in:
- lib/transfuse/cluster.rb
Instance Method Summary collapse
- #generate_vsearch_command(fasta, out, msa) ⇒ Object
-
#initialize(threads, verbose, id) ⇒ Cluster
constructor
A new instance of Cluster.
- #parse_vsearch_output(cluster_output, msa_output) ⇒ Object
- #run(fasta) ⇒ Object
- #vsearch(fasta) ⇒ Object
Constructor Details
#initialize(threads, verbose, id) ⇒ Cluster
Returns a new instance of Cluster.
8 9 10 11 12 13 14 |
# File 'lib/transfuse/cluster.rb', line 8 def initialize threads, verbose, id @vsearch = Which::which('vsearch').first raise "vsearch was not in the PATH - please install it" unless @vsearch @id = id.to_s @threads = threads @verbose = verbose end |
Instance Method Details
#generate_vsearch_command(fasta, out, msa) ⇒ Object
32 33 34 35 36 37 38 39 40 41 42 43 |
# File 'lib/transfuse/cluster.rb', line 32 def generate_vsearch_command fasta, out, msa vsearch = "#{@vsearch}" vsearch << " --cluster_fast #{fasta}" vsearch << " --id #{@id}" vsearch << " --iddef 0" # cd-hit definition of sequence id vsearch << " --qmask none" # no masking vsearch << " --strand both" vsearch << " --uc #{out}" vsearch << " --msaout #{msa}" vsearch << " --threads #{@threads}" return vsearch end |
#parse_vsearch_output(cluster_output, msa_output) ⇒ Object
45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 |
# File 'lib/transfuse/cluster.rb', line 45 def parse_vsearch_output cluster_output, msa_output print "parsing vsearch output" if @verbose clusters = {} lookup = {} second = 0 count = 0 File.open(cluster_output).each_line do |line| count+=1 if line.start_with?("S") or line.start_with?("H") cols = line.chomp.split("\t") cluster = cols[1] len = cols[2].to_i cigar = cols[7] strand = cols[4] strand = "+" if strand == "*" contig_name = cols[8] clusters[cluster] ||= [] clusters[cluster] << { :name => contig_name, :strand => strand } lookup[contig_name] = cluster end if count%10_000==0 and @verbose print "." end end puts " Done" if @verbose print "parsing msa output " if @verbose count = 0 msa = {} Bio::FastaFormat.open(msa_output).each do |entry| count += 1 name = entry.entry_id if name != "consensus" # name = name[1..-1] if name[0]=="*" name = name[1..-1] end # what cluster is name in? cluster = lookup[name] msa[cluster] ||= [] msa[cluster] << { :name => name, :seq => entry.seq.seq } end if count%10_000==0 and @verbose print "." end end puts " Done" if @verbose return msa end |
#run(fasta) ⇒ Object
16 17 18 19 |
# File 'lib/transfuse/cluster.rb', line 16 def run fasta cluster_output, msa_output = vsearch fasta return parse_vsearch_output(cluster_output, msa_output) end |
#vsearch(fasta) ⇒ Object
21 22 23 24 25 26 27 28 29 30 |
# File 'lib/transfuse/cluster.rb', line 21 def vsearch fasta print "running vsearch..." if @verbose cluster_output = "#{File.basename(fasta)}-#{@id}.clust" msa_output = "#{File.basename(fasta)}-#{@id}.aln" vsearch_cmd = generate_vsearch_command fasta, cluster_output, msa_output cluster = Cmd.new vsearch_cmd cluster.run cluster_output puts " Done. Created #{cluster_output}" if @verbose return [cluster_output, msa_output] end |