Top Level Namespace
- Includes:
- FastqAssessment, MiseqRunStats
Defined Under Namespace
Modules: Enumerable, FastqAssessment, MiseqRunStats
Instance Method Summary collapse
- #calculate_read_length(filename) ⇒ Object
- #delete_file_with_dependencies(file, *dependencies) ⇒ Object
- #extract_file_prefixes_and_sample_name(sample_map_file, directory) ⇒ Object
- #file_exists?(directory, *filenames) ⇒ Boolean
- #find_executable(executable_name, directory = nil) ⇒ Object
- #generate_quality_metrics(sample_map, directory, forward_reads_suffix, reverse_reads_suffix, quality_scale, quality_cutoff) ⇒ Object
- #trim_and_correct_fastqs(sample_map, directory, forward_reads_suffix, forward_reads_file_extension, reverse_reads_suffix, reverse_reads_file_extension, quality_scale, fastq_quality_trimmer_path, quake_path, trim_point_fraction, trim_quality_cutoff) ⇒ Object
-
#which(cmd) ⇒ Object
meethod to return path to command if it is in the path (works in windows).
- #write_out_fastq_trim_script ⇒ Object
Methods included from MiseqRunStats
#parse_assembly_run_stats, #parse_resequencing_run_stats
Methods included from FastqAssessment
#generate_quality_stats_for_read, #percentage_compared_to_raw
Instance Method Details
#calculate_read_length(filename) ⇒ Object
70 71 72 73 74 75 76 77 78 79 80 81 82 |
# File 'lib/trim_and_correct.rb', line 70 def calculate_read_length(filename) read_length = nil File.open(filename) do |f| f.each do |line| line.chomp! if line =~ /^[GATCgatc]/ read_length = line.size break end end end return read_length - 1 end |
#delete_file_with_dependencies(file, *dependencies) ⇒ Object
63 64 65 66 67 68 |
# File 'lib/trim_and_correct.rb', line 63 def delete_file_with_dependencies(file, *dependencies) dependencies << file if dependencies.all?{|dependency| File.exists?(dependency)} && File.exists?(file) File.delete(file) end end |
#extract_file_prefixes_and_sample_name(sample_map_file, directory) ⇒ Object
4 5 6 7 8 9 10 11 |
# File 'lib/fastq-factory.rb', line 4 def extract_file_prefixes_and_sample_name(sample_map_file, directory) sample_map = Hash.new File.read("#{directory}/#{sample_map_file}").split("\n").each do |sample_map_line| file_prefix, sample_name = sample_map_line.split("\t") sample_map[file_prefix] = sample_name end return sample_map end |
#file_exists?(directory, *filenames) ⇒ Boolean
13 14 15 16 17 18 19 |
# File 'lib/fastq-factory.rb', line 13 def file_exists?(directory, *filenames) at_least_one_file_found = false filenames.each do |filename| at_least_one_file_found = true if File.exists?("#{directory}/#{filename}") end abort("You specified a file(s): #{filenames.join(", ")}. At least one of these must exist! Please check your sample map file") unless at_least_one_file_found end |
#find_executable(executable_name, directory = nil) ⇒ Object
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 |
# File 'lib/fastq-factory.rb', line 21 def find_executable(executable_name, directory = nil) if directory.nil? if which(executable_name) return which(executable_name) elsif File.executable?("/usr/local/bin/#{executable_name}") return "/usr/local/bin/#{executable_name}" elsif File.executable?("/usr/local/#{executable_name}/#{executable_name}") return "/usr/local/#{executable_name}/#{executable_name}" else return nil end else if File.executable?("#{directory}/#{executable_name}") return "#{directory}/#{executable_name}" else return nil end end end |
#generate_quality_metrics(sample_map, directory, forward_reads_suffix, reverse_reads_suffix, quality_scale, quality_cutoff) ⇒ Object
6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 |
# File 'lib/generate_quality_metrics.rb', line 6 def generate_quality_metrics(sample_map, directory, forward_reads_suffix, reverse_reads_suffix, quality_scale, quality_cutoff) if File.exists?("#{directory}/ResequencingRunStatistics.xml") puts "Assessing quality from Miseq run stats file" run_stats = parse_resequencing_run_stats("#{directory}/ResequencingRunStatistics.xml", sample_map.values) elsif File.exists?("#{directory}/AssemblyRunStatistics.xml") puts "Assessing quality from Miseq run stats file" run_stats = parse_assembly_run_stats("#{directory}/AssemblyRunStatistics.xml", sample_map.values) else run_stats = ResequencingRunStats.new run_stats.sample_stats = Hash.new sample_map.values.each do |sample_name| run_stats.sample_stats[sample_name] = ResequencingSampleStats.new end end forward_reads_trimmed_suffix = forward_reads_suffix.sub(/(.+)(\..+?)$/, '\1.trimmed\2') reverse_reads_trimmed_suffix = reverse_reads_suffix.sub(/(.+)(\..+?)$/, '\1.trimmed\2') forward_reads_trimmed_corrected_suffix = forward_reads_suffix.sub(/(.+)(\..+?)$/, '\1.trimmed.cor\2') reverse_reads_trimmed_corrected_suffix = reverse_reads_suffix.sub(/(.+)(\..+?)$/, '\1.trimmed.cor\2') sample_map.each do |read_file_prefix, sample_name| puts "Assesing quality for #{sample_name}" run_stats.sample_stats[sample_name].fastq_stats = Hash.new run_stats.sample_stats[sample_name].fastq_stats["forward"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_suffix}",quality_scale, quality_cutoff) run_stats.sample_stats[sample_name].fastq_stats["reverse"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_suffix}",quality_scale, quality_cutoff) if File.exists?("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}") run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}",quality_scale, quality_cutoff) run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_corrected_suffix}", "#{directory}/#{read_file_prefix}#{forward_reads_suffix}") run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_corrected_suffix}",quality_scale, quality_cutoff) run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_corrected_suffix}", "#{directory}/#{read_file_prefix}#{reverse_reads_suffix}") else run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_suffix}",quality_scale, quality_cutoff) run_stats.sample_stats[sample_name].fastq_stats["forward-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{forward_reads_trimmed_suffix}", "#{directory}/#{read_file_prefix}#{forward_reads_suffix}") run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"] = generate_quality_stats_for_read("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_suffix}",quality_scale, quality_cutoff) run_stats.sample_stats[sample_name].fastq_stats["reverse-trim_corrected"].percentage_compared_to_raw = percentage_compared_to_raw("#{directory}/#{read_file_prefix}#{reverse_reads_trimmed_suffix}", "#{directory}/#{read_file_prefix}#{reverse_reads_suffix}") end end # print out data output_file = File.open("#{directory}/summary_stats.txt", "w") # print headers output_file.puts "run name\tnumber of bases(Gb)\tnumber of clusters\tsample name\tdirection\tnumber of clusters\tnumber of forward reads aligned\tnumber of reverse reads aligned\tcoverage\tnumber of snps\tnumber of contigs\tmean contig size\tn50\tnumber of bases\tmean quality\tread base where qual falls below 30\tpercent reduction compared to raw" output_file.puts "#{directory.match(/.*\/(.+?)$/).captures.first}\t#{run_stats.number_of_bases}\t#{run_stats.number_of_clusters}" run_stats.sample_stats.keys.sort.each do |sample_name| sample_stats = run_stats.sample_stats[sample_name] if sample_stats.class == Struct::ResequencingSampleStats output_file.puts "\t\t\t#{sample_name}\t\t#{sample_stats.number_of_clusters}\t#{sample_stats.number_of_forward_reads_aligned}\t#{sample_stats.number_of_reverse_reads_aligned}\t#{sample_stats.coverage}\t#{sample_stats.number_of_snps}" elsif sample_stats.class == Struct::AssemblySampleStats output_file.puts "\t\t\t#{sample_name}\t\t#{sample_stats.number_of_clusters}\t\t\t\t\t#{sample_stats.number_of_contigs}\t#{sample_stats.mean_contig_size}\t#{sample_stats.n50}\t#{sample_stats.number_of_bases}" end ["forward", "reverse", "forward-trim_corrected", "reverse-trim_corrected"].each do |direction| fastq_stats = run_stats.sample_stats[sample_name].fastq_stats[direction] output_file.puts "\t\t\t\t#{direction}\t\t\t\t\t\t\t\t\t\t#{fastq_stats.mean_quality}\t#{fastq_stats.position_where_quality_lt_20}\t#{fastq_stats.percentage_compared_to_raw}" end end output_file.close end |
#trim_and_correct_fastqs(sample_map, directory, forward_reads_suffix, forward_reads_file_extension, reverse_reads_suffix, reverse_reads_file_extension, quality_scale, fastq_quality_trimmer_path, quake_path, trim_point_fraction, trim_quality_cutoff) ⇒ Object
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
# File 'lib/trim_and_correct.rb', line 1 def trim_and_correct_fastqs(sample_map, directory, forward_reads_suffix, forward_reads_file_extension, reverse_reads_suffix, reverse_reads_file_extension, quality_scale, fastq_quality_trimmer_path, quake_path,trim_point_fraction, trim_quality_cutoff) Dir.chdir(directory) # trimming sample_map.each do |sample_file_prefix, sample_name| next if File.exists?("#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}") || File.exists?("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}") puts "Trimming files for #{sample_name}" #determine read length read_length = calculate_read_length("#{directory}/#{sample_file_prefix}#{forward_reads_suffix}.#{forward_reads_file_extension}") trim_point = (trim_point_fraction * read_length).to_i unless File.exists?("#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}") `#{fastq_quality_trimmer_path} -i #{directory}/#{sample_file_prefix}#{forward_reads_suffix}.#{forward_reads_file_extension} -o #{directory}/#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} -t #{trim_quality_cutoff} -l #{trim_point} -Q #{quality_scale} -v` `#{fastq_quality_trimmer_path} -i #{directory}/#{sample_file_prefix}#{reverse_reads_suffix}.#{reverse_reads_file_extension} -o #{directory}/#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension} -t #{trim_quality_cutoff} -l #{trim_point} -Q #{quality_scale} -v` end `perl /tmp/fastq-remove-orphans.pl -1 #{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} -2 #{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}` end # quake correction # write file for quake sample_map.each do |sample_file_prefix, sample_name| next if File.exists?("#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}") || File.exists?(("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}")) puts "Error correcting files for #{sample_name}" output_file = File.open("quake_file_list.txt","w") output_file.puts "paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension} paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}" output_file.close # run quake `#{quake_path} -f quake_file_list.txt -k 15 -q #{quality_scale}` end sample_map.each do |sample_file_prefix, sample_name| next if File.exists?("#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}") # remove orphans `perl /tmp/fastq-remove-orphans.pl -1 paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension} -2 paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}` end # cleanup and rename files sample_map.each do |sample_file_prefix, sample_name| delete_file_with_dependencies("#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}", "paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}") delete_file_with_dependencies("#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}", "paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}") delete_file_with_dependencies("orphaned_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}") delete_file_with_dependencies("orphaned_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}") if File.exists?("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}") delete_file_with_dependencies("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}") delete_file_with_dependencies("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}") delete_file_with_dependencies("error_model.paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.txt") delete_file_with_dependencies("error_model.paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.txt") delete_file_with_dependencies("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.stats.txt") delete_file_with_dependencies("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor_single.#{forward_reads_file_extension}") delete_file_with_dependencies("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.stats.txt") delete_file_with_dependencies("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor_single.#{forward_reads_file_extension}") delete_file_with_dependencies("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}") delete_file_with_dependencies("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}") delete_file_with_dependencies("orphaned_paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}") delete_file_with_dependencies("orphaned_paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}") File.rename("paired_paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}", "#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}") if File.exists?("paired_paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.cor.#{forward_reads_file_extension}") File.rename("paired_paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}", "#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}") if File.exists?("paired_paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.cor.#{reverse_reads_file_extension}") else File.rename("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}", "#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}") if File.exists?("paired_#{sample_file_prefix}#{forward_reads_suffix}.trimmed.#{forward_reads_file_extension}") File.rename("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}", "#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}") if File.exists?("paired_#{sample_file_prefix}#{reverse_reads_suffix}.trimmed.#{reverse_reads_file_extension}") end end end |
#which(cmd) ⇒ Object
meethod to return path to command if it is in the path (works in windows)
43 44 45 46 47 48 49 50 51 52 |
# File 'lib/fastq-factory.rb', line 43 def which(cmd) exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : [''] ENV['PATH'].split(File::PATH_SEPARATOR).each do |path| exts.each { |ext| exe = "#{path}/#{cmd}#{ext}" return exe if File.executable? exe } end return nil end |
#write_out_fastq_trim_script ⇒ Object
54 55 56 |
# File 'lib/fastq-factory.rb', line 54 def write_out_fastq_trim_script system("cp #{File.dirname(__FILE__)}/fastq-remove-orphans.pl /tmp/") end |