Class: Cagnut::Configuration::Checks::Datasets

Inherits:
Object
  • Object
show all
Defined in:
lib/cagnut/configuration/checks/datasets.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(config) ⇒ Datasets

Returns a new instance of Datasets.



8
9
10
# File 'lib/cagnut/configuration/checks/datasets.rb', line 8

def initialize config
  @config = config
end

Instance Attribute Details

#configObject

Returns the value of attribute config.



6
7
8
# File 'lib/cagnut/configuration/checks/datasets.rb', line 6

def config
  @config
end

Instance Method Details

#check(config_name) ⇒ Object



12
13
14
15
16
17
18
19
# File 'lib/cagnut/configuration/checks/datasets.rb', line 12

def check config_name
  @config['prefix_name'] = "#{Cagnut.prefix_name}_#{config_name}"
  analysis_folder = create_analysis_folder config
  @config['samples'].each_with_index do |sample, index|
    setup_requirements sample, index, analysis_folder
  end
  @config
end

#check_datatype(qseq_dir, fastq_dir) ⇒ Object



133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
# File 'lib/cagnut/configuration/checks/datasets.rb', line 133

def check_datatype qseq_dir, fastq_dir
  @config['samples'].each_with_index do |sample, index|
    case @config['info']['data_type']
    when 'TILESQSEQ'
      file = "#{qseq_dir}/*.txt*"
      pattern = '.*s_\d+_1_(\d+).*'
      file_end = '.fastq'
    when 'TILESFASTQ'
      file = "#{fastq_dir}/*.fastq*"
      pattern = '(.*_R1_.*).fastq.*+'
      file_end = '.fastq'
    end
    @config['samples'][index]['seqs_path']= fetch_seqs Dir[file], file_end, pattern
  end
  @config
end

#check_file_size(flist, previous_size = 0) ⇒ Object



105
106
107
108
109
110
# File 'lib/cagnut/configuration/checks/datasets.rb', line 105

def check_file_size flist, previous_size=0
  flist.each do |file|
    file_size = File.size(file)
    abort "#{file} is empty." unless file_size > 0
  end
end

#check_ln_file(sample, qseq_path, fastq_path) ⇒ Object



51
52
53
54
55
56
# File 'lib/cagnut/configuration/checks/datasets.rb', line 51

def check_ln_file sample, qseq_path, fastq_path
  fastq = Dir.glob("#{fastq_path}/*.fastq*")
  qseq = Dir.glob("#{qseq_path}/*")
  return unless (fastq + qseq).empty?
  abort "Not found #{sample['name']} files in fastq and qseq"
end

#check_pu(index) ⇒ Object



129
130
131
# File 'lib/cagnut/configuration/checks/datasets.rb', line 129

def check_pu index
  @config['samples'][index]['pu'] ||= 'NA'
end

#create_analysis_folder(config) ⇒ Object



21
22
23
24
25
26
# File 'lib/cagnut/configuration/checks/datasets.rb', line 21

def create_analysis_folder config
  output_data_dir = dir_rm_slash @config['cagnut']['output_data_dir']
  analysis_folder = "#{output_data_dir}/#{config['prefix_name']}"
  FileUtils.mkdir_p "#{output_data_dir}/#{config['prefix_name']}"
  analysis_folder
end

#dir_present?(dataset) ⇒ Boolean

Returns:

  • (Boolean)


64
65
66
67
68
# File 'lib/cagnut/configuration/checks/datasets.rb', line 64

def dir_present? dataset
  return if Dir.exist?(dataset)
  puts "Error: Missing data directory #{@config['datasets']}"
  exit
end

#dir_rm_slash(dir) ⇒ Object



70
71
72
# File 'lib/cagnut/configuration/checks/datasets.rb', line 70

def dir_rm_slash dir
  dir.gsub %r{/\z}, ''
end

#fetch_flist(dir) ⇒ Object



84
85
86
87
88
# File 'lib/cagnut/configuration/checks/datasets.rb', line 84

def fetch_flist dir
  flist = Dir.glob("#{dir}/*.fastq*")
  return flist unless flist.empty?
  abort "No fastq found in #{dir}"
end

#fetch_seqs(files_path, file_end, pattern) ⇒ Object



150
151
152
153
154
# File 'lib/cagnut/configuration/checks/datasets.rb', line 150

def fetch_seqs files_path, file_end, pattern
  files_path.map do |file|
    return file if File.basename(file, file_end).match(/#{pattern}/)
  end.flatten.compact
end

#files_to_much?(flist) ⇒ Boolean

Returns:

  • (Boolean)


120
121
122
123
124
125
126
127
# File 'lib/cagnut/configuration/checks/datasets.rb', line 120

def files_to_much? flist
  return unless flist.size > 1
  puts %(
    DATA_TYPE = #{@config['info']['data_type']} but more than one fastq found.
    Only the first would be processed.
    #{flist.inspect}
  )
end


112
113
114
115
116
117
118
# File 'lib/cagnut/configuration/checks/datasets.rb', line 112

def link_name flist, sample_name
  if flist[0].match '.gz'
    "#{sample_name}_sequence.txt.gz"
  else
    "#{sample_name}_sequence.txt"
  end
end

#ln_fastq_file(sample, flist, fastq_dir) ⇒ Object



90
91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/cagnut/configuration/checks/datasets.rb', line 90

def ln_fastq_file sample, flist, fastq_dir
  check_file_size flist
  if %w(ONEFASTQ ONEFASTQSE).include? @config['info']['data_type']
    files_to_much? flist
    file_type = link_name flist, sample['name']
    seq_file = "#{fastq_dir}/#{file_type}"
    `ln -s #{flist[0]} #{seq_file} 2>/dev/null` if flist[0].match sample['name']
  else
    flist.each do |file|
      next unless file.match sample['name']
      `ln -s #{file} #{fastq_dir} 2>/dev/null`
    end
  end
end

#ln_seq_files(sample, seq_txt, fastq_file) ⇒ Object



58
59
60
61
62
# File 'lib/cagnut/configuration/checks/datasets.rb', line 58

def ln_seq_files sample, seq_txt, fastq_file
  dir_present? sample['path']
  ln_seq_txt_file sample, seq_txt
  ln_fastq_file sample, fetch_flist(sample['path']), fastq_file
end

#ln_seq_files_to_folder(sample, qseq_path, fastq_path) ⇒ Object



45
46
47
48
49
# File 'lib/cagnut/configuration/checks/datasets.rb', line 45

def ln_seq_files_to_folder sample, qseq_path, fastq_path
  ln_seq_files sample, qseq_path, fastq_path
  check_datatype qseq_path, fastq_path
  check_ln_file sample, qseq_path, fastq_path
end

#ln_seq_txt_file(sample, qseq_dir) ⇒ Object



74
75
76
77
78
79
80
81
82
# File 'lib/cagnut/configuration/checks/datasets.rb', line 74

def ln_seq_txt_file sample, qseq_dir
  files =
    Dir.glob("#{sample['path']}/*_sequence.txt*") + Dir.glob("#{sample['path']}/*_qseq.txt*")
  check_file_size files
  files.each do |file|
    next unless file.match sample['name']
    `ln -s #{file} #{qseq_dir} 2>/dev/null`
  end
end

#make_required_folders(config, sample, index, analysis_folder) ⇒ Object



42
43
# File 'lib/cagnut/configuration/checks/datasets.rb', line 42

def make_required_folders config, sample, index, analysis_folder
end

#setup_requirements(sample, index, analysis_folder) ⇒ Object



28
29
30
31
32
33
34
35
36
37
38
39
40
# File 'lib/cagnut/configuration/checks/datasets.rb', line 28

def setup_requirements sample, index, analysis_folder
  puts "Dataset : #{sample['path']}"
  dir = "#{analysis_folder}/#{sample['name']}"
  FileUtils.mkdir_p dir unless Dir.exist?(dir)
  @config['samples'][index]['path'] = dir_rm_slash sample['path']
  FileUtils.mkdir_p "#{analysis_folder}/#{sample['name']}/jobs"
  @config['samples'][index]['jobs'] = "#{analysis_folder}/#{sample['name']}/jobs"
  FileUtils.mkdir_p "#{analysis_folder}/#{sample['name']}/tmp"
  @config['samples'][index]['tmp'] = "#{analysis_folder}/#{sample['name']}/tmp"
  # mysql_insert if options[:mysql]
  make_required_folders @config, sample, index, dir
  check_pu index
end