Class: Cagnut::Configuration::Checks::Datasets
- Inherits:
-
Object
- Object
- Cagnut::Configuration::Checks::Datasets
- Defined in:
- lib/cagnut/configuration/checks/datasets.rb
Instance Attribute Summary collapse
-
#config ⇒ Object
Returns the value of attribute config.
Instance Method Summary collapse
- #check(config_name) ⇒ Object
- #check_datatype(qseq_dir, fastq_dir) ⇒ Object
- #check_file_size(flist, previous_size = 0) ⇒ Object
- #check_ln_file(sample, qseq_path, fastq_path) ⇒ Object
- #check_pu(index) ⇒ Object
- #create_analysis_folder(config) ⇒ Object
- #dir_present?(dataset) ⇒ Boolean
- #dir_rm_slash(dir) ⇒ Object
- #fetch_flist(dir) ⇒ Object
- #fetch_seqs(files_path, file_end, pattern) ⇒ Object
- #files_to_much?(flist) ⇒ Boolean
-
#initialize(config) ⇒ Datasets
constructor
A new instance of Datasets.
- #link_name(flist, sample_name) ⇒ Object
- #ln_fastq_file(sample, flist, fastq_dir) ⇒ Object
- #ln_seq_files(sample, seq_txt, fastq_file) ⇒ Object
- #ln_seq_files_to_folder(sample, qseq_path, fastq_path) ⇒ Object
- #ln_seq_txt_file(sample, qseq_dir) ⇒ Object
- #make_required_folders(config, sample, index, analysis_folder) ⇒ Object
- #setup_requirements(sample, index, analysis_folder) ⇒ Object
Constructor Details
#initialize(config) ⇒ Datasets
Returns a new instance of Datasets.
8 9 10 |
# File 'lib/cagnut/configuration/checks/datasets.rb', line 8 def initialize config @config = config end |
Instance Attribute Details
#config ⇒ Object
Returns the value of attribute config.
6 7 8 |
# File 'lib/cagnut/configuration/checks/datasets.rb', line 6 def config @config end |
Instance Method Details
#check(config_name) ⇒ Object
12 13 14 15 16 17 18 19 |
# File 'lib/cagnut/configuration/checks/datasets.rb', line 12 def check config_name @config['prefix_name'] = "#{Cagnut.prefix_name}_#{config_name}" analysis_folder = create_analysis_folder config @config['samples'].each_with_index do |sample, index| setup_requirements sample, index, analysis_folder end @config end |
#check_datatype(qseq_dir, fastq_dir) ⇒ Object
133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 |
# File 'lib/cagnut/configuration/checks/datasets.rb', line 133 def check_datatype qseq_dir, fastq_dir @config['samples'].each_with_index do |sample, index| case @config['info']['data_type'] when 'TILESQSEQ' file = "#{qseq_dir}/*.txt*" pattern = '.*s_\d+_1_(\d+).*' file_end = '.fastq' when 'TILESFASTQ' file = "#{fastq_dir}/*.fastq*" pattern = '(.*_R1_.*).fastq.*+' file_end = '.fastq' end @config['samples'][index]['seqs_path']= fetch_seqs Dir[file], file_end, pattern end @config end |
#check_file_size(flist, previous_size = 0) ⇒ Object
105 106 107 108 109 110 |
# File 'lib/cagnut/configuration/checks/datasets.rb', line 105 def check_file_size flist, previous_size=0 flist.each do |file| file_size = File.size(file) abort "#{file} is empty." unless file_size > 0 end end |
#check_ln_file(sample, qseq_path, fastq_path) ⇒ Object
51 52 53 54 55 56 |
# File 'lib/cagnut/configuration/checks/datasets.rb', line 51 def check_ln_file sample, qseq_path, fastq_path fastq = Dir.glob("#{fastq_path}/*.fastq*") qseq = Dir.glob("#{qseq_path}/*") return unless (fastq + qseq).empty? abort "Not found #{sample['name']} files in fastq and qseq" end |
#check_pu(index) ⇒ Object
129 130 131 |
# File 'lib/cagnut/configuration/checks/datasets.rb', line 129 def check_pu index @config['samples'][index]['pu'] ||= 'NA' end |
#create_analysis_folder(config) ⇒ Object
21 22 23 24 25 26 |
# File 'lib/cagnut/configuration/checks/datasets.rb', line 21 def create_analysis_folder config output_data_dir = dir_rm_slash @config['cagnut']['output_data_dir'] analysis_folder = "#{output_data_dir}/#{config['prefix_name']}" FileUtils.mkdir_p "#{output_data_dir}/#{config['prefix_name']}" analysis_folder end |
#dir_present?(dataset) ⇒ Boolean
64 65 66 67 68 |
# File 'lib/cagnut/configuration/checks/datasets.rb', line 64 def dir_present? dataset return if Dir.exist?(dataset) puts "Error: Missing data directory #{@config['datasets']}" exit end |
#dir_rm_slash(dir) ⇒ Object
70 71 72 |
# File 'lib/cagnut/configuration/checks/datasets.rb', line 70 def dir_rm_slash dir dir.gsub %r{/\z}, '' end |
#fetch_flist(dir) ⇒ Object
84 85 86 87 88 |
# File 'lib/cagnut/configuration/checks/datasets.rb', line 84 def fetch_flist dir flist = Dir.glob("#{dir}/*.fastq*") return flist unless flist.empty? abort "No fastq found in #{dir}" end |
#fetch_seqs(files_path, file_end, pattern) ⇒ Object
150 151 152 153 154 |
# File 'lib/cagnut/configuration/checks/datasets.rb', line 150 def fetch_seqs files_path, file_end, pattern files_path.map do |file| return file if File.basename(file, file_end).match(/#{pattern}/) end.flatten.compact end |
#files_to_much?(flist) ⇒ Boolean
120 121 122 123 124 125 126 127 |
# File 'lib/cagnut/configuration/checks/datasets.rb', line 120 def files_to_much? flist return unless flist.size > 1 puts %( DATA_TYPE = #{@config['info']['data_type']} but more than one fastq found. Only the first would be processed. #{flist.inspect} ) end |
#link_name(flist, sample_name) ⇒ Object
112 113 114 115 116 117 118 |
# File 'lib/cagnut/configuration/checks/datasets.rb', line 112 def link_name flist, sample_name if flist[0].match '.gz' "#{sample_name}_sequence.txt.gz" else "#{sample_name}_sequence.txt" end end |
#ln_fastq_file(sample, flist, fastq_dir) ⇒ Object
90 91 92 93 94 95 96 97 98 99 100 101 102 103 |
# File 'lib/cagnut/configuration/checks/datasets.rb', line 90 def ln_fastq_file sample, flist, fastq_dir check_file_size flist if %w(ONEFASTQ ONEFASTQSE).include? @config['info']['data_type'] files_to_much? flist file_type = link_name flist, sample['name'] seq_file = "#{fastq_dir}/#{file_type}" `ln -s #{flist[0]} #{seq_file} 2>/dev/null` if flist[0].match sample['name'] else flist.each do |file| next unless file.match sample['name'] `ln -s #{file} #{fastq_dir} 2>/dev/null` end end end |
#ln_seq_files(sample, seq_txt, fastq_file) ⇒ Object
58 59 60 61 62 |
# File 'lib/cagnut/configuration/checks/datasets.rb', line 58 def ln_seq_files sample, seq_txt, fastq_file dir_present? sample['path'] ln_seq_txt_file sample, seq_txt ln_fastq_file sample, fetch_flist(sample['path']), fastq_file end |
#ln_seq_files_to_folder(sample, qseq_path, fastq_path) ⇒ Object
45 46 47 48 49 |
# File 'lib/cagnut/configuration/checks/datasets.rb', line 45 def ln_seq_files_to_folder sample, qseq_path, fastq_path ln_seq_files sample, qseq_path, fastq_path check_datatype qseq_path, fastq_path check_ln_file sample, qseq_path, fastq_path end |
#ln_seq_txt_file(sample, qseq_dir) ⇒ Object
74 75 76 77 78 79 80 81 82 |
# File 'lib/cagnut/configuration/checks/datasets.rb', line 74 def ln_seq_txt_file sample, qseq_dir files = Dir.glob("#{sample['path']}/*_sequence.txt*") + Dir.glob("#{sample['path']}/*_qseq.txt*") check_file_size files files.each do |file| next unless file.match sample['name'] `ln -s #{file} #{qseq_dir} 2>/dev/null` end end |
#make_required_folders(config, sample, index, analysis_folder) ⇒ Object
42 43 |
# File 'lib/cagnut/configuration/checks/datasets.rb', line 42 def make_required_folders config, sample, index, analysis_folder end |
#setup_requirements(sample, index, analysis_folder) ⇒ Object
28 29 30 31 32 33 34 35 36 37 38 39 40 |
# File 'lib/cagnut/configuration/checks/datasets.rb', line 28 def setup_requirements sample, index, analysis_folder puts "Dataset : #{sample['path']}" dir = "#{analysis_folder}/#{sample['name']}" FileUtils.mkdir_p dir unless Dir.exist?(dir) @config['samples'][index]['path'] = dir_rm_slash sample['path'] FileUtils.mkdir_p "#{analysis_folder}/#{sample['name']}/jobs" @config['samples'][index]['jobs'] = "#{analysis_folder}/#{sample['name']}/jobs" FileUtils.mkdir_p "#{analysis_folder}/#{sample['name']}/tmp" @config['samples'][index]['tmp'] = "#{analysis_folder}/#{sample['name']}/tmp" # mysql_insert if options[:mysql] make_required_folders @config, sample, index, dir check_pu index end |