Class: Lederhosen::CLI
- Inherits:
-
Thor
- Object
- Thor
- Lederhosen::CLI
- Includes:
- Thor::Actions
- Defined in:
- lib/lederhosen/cli.rb,
lib/lederhosen/no_tasks.rb,
lib/lederhosen/tasks/trim.rb,
lib/lederhosen/tasks/cluster.rb,
lib/lederhosen/tasks/version.rb,
lib/lederhosen/tasks/get_reps.rb,
lib/lederhosen/tasks/make_udb.rb,
lib/lederhosen/tasks/otu_table.rb,
lib/lederhosen/tasks/otu_filter.rb,
lib/lederhosen/tasks/split_fasta.rb,
lib/lederhosen/tasks/join_otu_tables.rb
Overview
The CLI class holds all of the Thor tasks
Instance Attribute Summary collapse
-
#taxonomy_format ⇒ Object
Returns the value of attribute taxonomy_format.
Instance Method Summary collapse
- #cluster ⇒ Object
- #get_reps ⇒ Object
- #join_otu_tables ⇒ Object
- #make_udb ⇒ Object
- #otu_filter ⇒ Object
- #otu_table ⇒ Object
- #split_fasta ⇒ Object
- #trim ⇒ Object
- #version ⇒ Object
Instance Attribute Details
#taxonomy_format ⇒ Object
Returns the value of attribute taxonomy_format.
4 5 6 |
# File 'lib/lederhosen/no_tasks.rb', line 4 def taxonomy_format @taxonomy_format end |
Instance Method Details
#cluster ⇒ Object
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 |
# File 'lib/lederhosen/tasks/cluster.rb', line 14 def cluster input = File.([:input]) database = File.([:database]) threads = [:threads] identity = [:identity] output = File.([:output]) strand = [:strand] ohai "clustering #{input} to #{database} and saving to #{output}" .each_pair do |key, value| ohai "#{key} = #{value}" end cmd = ['usearch', "--usearch_local #{input}", "--id #{identity}", "--uc #{output}", "--db #{database}", "--strand #{strand}" ] # threads = False : use all threads (default) if threads != false cmd << "--threads #{threads}" end cmd = cmd.join(' ') run cmd end |
#get_reps ⇒ Object
11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
# File 'lib/lederhosen/tasks/get_reps.rb', line 11 def get_reps inputs = Dir[[:input]] database = File.([:database]) output = File.([:output]) taxa = Set.new ohai "getting representative database sequences from #{database} using #{inputs.size} cluster file(s) and saving to #{output}" # parse uc file, get list of taxa we need to get # full sequences for from the database = ProgressBar.new 'reading uc(s)', inputs.size inputs.each do |input| File.open(input) do |handle| .inc handle.each do |line| header = parse_usearch_line(line.strip) taxa << header['original'] rescue nil end end end .finish ohai "found #{taxa.size} representative sequences" # print representative sequences from database output = File.open(output, 'w') kept = 0 File.open(database) do |handle| Dna.new(handle).each do |record| if taxa.include? record.name output.puts record kept += 1 end end end output.close ohai "saved #{kept} representatives" end |
#join_otu_tables ⇒ Object
12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 |
# File 'lib/lederhosen/tasks/join_otu_tables.rb', line 12 def join_otu_tables input = Dir[[:input]] output = File.([:output]) ohai "combining #{input.size} file(s) and saving to #{output}" all_otu_names = Set.new all_samples = Set.new sample_name_count = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = 0 } } # read all of the csv files input.each do |input_file| File.open(input_file) do |handle| otu_names = handle.gets.strip.split(',')[1..-1] all_otu_names += otu_names.to_set handle.each do |line| line = line.strip.split(',') sample = File.basename(input_file) all_samples << sample read_counts = line[1..-1] otu_names.zip(read_counts) do |name, count| sample_name_count[sample][name] = count end end end end # save to csv File.open(output, 'w') do |handle| header = all_otu_names.to_a.sort handle.puts "-,#{header.join(',')}" all_samples.to_a.sort.each do |sample| handle.print "#{sample}" header.each do |name| handle.print ",#{sample_name_count[sample][name]}" end handle.print "\n" end end end |
#make_udb ⇒ Object
9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 |
# File 'lib/lederhosen/tasks/make_udb.rb', line 9 def make_udb input = [:input] output = [:output] word_length = [:word_length] ohai "making udb w/ #{input}, saving as #{output}." cmd = ['usearch', "-makeudb_usearch #{input}", "-output #{output}"] cmd = cmd.join(' ') run cmd end |
#otu_filter ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
# File 'lib/lederhosen/tasks/otu_filter.rb', line 13 def otu_filter input = File.([:input]) output = File.([:output]) reads = [:reads] min_samples = [:samples] ohai "filtering otu file #{input} (reads = #{reads}, samples = #{min_samples})" cluster_sample_count = Hash.new { |h, k| h[k] = Hash.new } ohai "loading csv file #{input}" # slurp up CSV file File.open input do |handle| header = handle.gets.strip.split(',') cluster_ids = header[1..-1] handle.each do |line| line = line.strip.split(',') sample_id = line[0].to_sym counts = line[1..-1].map(&:to_i) cluster_ids.zip(counts).each do |cluster, count| cluster_sample_count[cluster][sample_id] = count end end end ohai "filtering" # filter sample_cluster_count filtered = cluster_sample_count.reject { |k, v| v.reject { |k, v| v < reads }.size < min_samples } ohai "saving to #{output}" # save the table out = File.open(output, 'w') samples = filtered.values.map(&:keys).flatten.uniq clusters = filtered.keys out.puts "-,#{clusters.join(',')}" samples.each do |sample| out.print "#{sample}" clusters.each do |cluster| out.print ",#{filtered[cluster][sample]}" end out.print "\n" end out.close ohai "kept #{filtered.keys.size} clusters (#{filtered.keys.size/cluster_sample_count.size.to_f})." kept_reads = filtered.values.map { |x| x.values.inject(:+) }.inject(:+) total_reads = cluster_sample_count.values.map { |x| x.values.inject(:+) }.inject(:+) ohai "kept #{kept_reads}/#{total_reads} reads (#{kept_reads/total_reads.to_f})." end |
#otu_table ⇒ Object
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 |
# File 'lib/lederhosen/tasks/otu_table.rb', line 21 def otu_table input = Dir[[:files]] prefix = [:prefix] levels = [:levels].map(&:downcase) ohai "generating #{levels.join(', ')} table(s) from #{input.size} file(s) and saving to prefix #{prefix}." # sanity check levels.each do |level| fail "bad level: #{level}" unless %w{domain phylum class order family genus species kingdom original}.include? level end level_sample_cluster_count = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = Hash.new { |h, k| h[k] = 0 } } } all_names = Hash.new { |h, k| h[k] = Set.new } # create a progress bar with the total number of bytes of # the files we're slurping up = ProgressBar.new "loading", input.size # Load cluster table input.each do |input_file| .inc File.open(input_file) do |handle| handle.each do |line| dat = parse_usearch_line(line.strip) levels.each do |level| name = if dat.nil? 'unclassified_reads' else dat[level] || 'unparsed_name' end level_sample_cluster_count[level][input_file][name] += 1 all_names[level] << name end end end end .finish # save to csv(s) levels.each do |level| ohai "saving #{level} table" File.open("#{prefix}.#{level}.csv", 'w') do |handle| header = all_names[level].to_a.compact.sort handle.puts "#{level.capitalize},#{header.join(',')}" input.each do |sample| handle.print "#{sample}" header.each do |name| handle.print ",#{level_sample_cluster_count[level][sample][name]}" end handle.print "\n" end end end end |
#split_fasta ⇒ Object
18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 |
# File 'lib/lederhosen/tasks/split_fasta.rb', line 18 def split_fasta input = File.([:input]) out_dir = [:out_dir] n = [:n].to_i gzip = [:gzip] ohai "splitting #{input} into files with #{n} reads stored in #{out_dir}" ohai "using gzip" if gzip `mkdir -p #{out_dir}` File.open input do |handle| = ProgressBar.new 'splitting', File.size(handle) Dna.new(handle).each_with_index do |record, i| .set handle.pos # I have to use a class variable here because # if I don't the variable gets set to nil after # after each iteration. @out = if i%n == 0 # start a new file # GzipWriter must be closed explicitly # this raises an exception this first time @out.close rescue nil # create an IO object depending on whether or # not the user wants to use gzip if gzip Zlib::GzipWriter.open(File.join(out_dir, "split_#{i/n}.fasta.gz")) else File.open(File.join(out_dir, "split_#{i/n}.fasta"), 'w') end else # keep using current handle @out end @out.puts record end .finish @out.close end ohai "created #{Dir[File.join(out_dir, '*')].size} files" end |
#trim ⇒ Object
14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 |
# File 'lib/lederhosen/tasks/trim.rb', line 14 def trim raw_reads = [:reads_dir] out_dir = [:out_dir] ohai "trimming #{File.dirname(raw_reads)} and saving to #{out_dir}" run "mkdir -p #{out_dir}" raw_reads = get_grouped_qseq_files raw_reads ohai "found #{raw_reads.length} pairs of reads" = ProgressBar.new "trimming", raw_reads.length raw_reads.each do |a| .inc out = File.join(out_dir, "#{File.basename(a[0])}.fasta") # TODO get total and trimmed total, trimmed = trim_pairs a[1][0], a[1][1], out, :min_length => 70 end .finish end |
#version ⇒ Object
10 11 12 |
# File 'lib/lederhosen/tasks/version.rb', line 10 def version puts "lederhosen-#{Lederhosen::Version::STRING} codename #{Lederhosen::Version::CODENAME}" end |