Class: Cheripic::Cmd

Inherits:
Object
  • Object
show all
Defined in:
lib/cheripic/cmd.rb

Overview

A command line option and processing object to handle input options

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(args) ⇒ Cmd

creates a Cmd object using input string entry

Parameters:

  • args (String)


19
20
21
22
# File 'lib/cheripic/cmd.rb', line 19

def initialize(args)
  @options = parse_arguments(args)
  check_arguments
end

Instance Attribute Details

#optionsHash

Returns a hash of trollop option names as keys and user or default setting as values.

Returns:

  • (Hash)

    a hash of trollop option names as keys and user or default setting as values



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
# File 'lib/cheripic/cmd.rb', line 9

class Cmd

  require 'trollop'
  require 'pathname'
  require 'ostruct'

  attr_accessor :options

  # creates a Cmd object using input string entry
  # @param args [String]
  def initialize(args)
    @options = parse_arguments(args)
    check_arguments
  end

  # method to check input command string and run appropriate
  # method of the object (help or examples or parsing arguments)
  # @param args [String]
  def parse_arguments(args)
    Trollop::with_standard_exception_handling argument_parser do
      if args.empty? || args.include?('-h') || args.include?('--help')
        raise Trollop::HelpNeeded
      elsif args.include?('--examples')
        print_examples
      end
      argument_parser.parse args
    end
  end

  # trollop argument_parser for input args string and
  # @return [Hash] a hash of trollop option names as keys and user or default setting as values
  def argument_parser
    cmds = self
    Trollop::Parser.new do
      version Cheripic::VERSION
      banner cmds.help_message
      opt :assembly, 'Assembly file in FASTA format',
          :short => '-f',
          :type => String
      opt :input_format, 'bulk and parent alignment file format types - set either pileup or bam or vcf',
          :short => '-F',
          :type => String,
          :default => 'pileup'
      opt :mut_bulk, 'Pileup or sorted BAM file alignments from mutant/trait of interest bulk 1',
          :short => '-a',
          :type => String
      opt :mut_bulk_vcf, 'vcf file for variants from mutant/trait of interest bulk 1',
          :type => String,
          :default => ''
      opt :bg_bulk, 'Pileup or sorted BAM file alignments from background/wildtype bulk 2',
          :short => '-b',
          :type => String
      opt :bg_bulk_vcf, 'vcf file for variants from background/wildtype bulk 2',
          :type => String,
          :default => ''
      opt :output, 'custom name tag to include in the output file name',
          :default => 'cheripic_results'
      opt :loglevel, 'Choose any one of "info / warn / debug" level for logs generated',
          :default => 'info'
      opt :hmes_adjust, 'factor added to snp count of each contig to adjust for hme score calculations',
          :type => Float,
          :default => 0.5
      opt :htlow, 'lower level for categorizing heterozygosity',
          :type => Float,
          :default => 0.2
      opt :hthigh, 'high level for categorizing heterozygosity',
          :type => Float,
          :default => 0.9
      opt :mindepth, 'minimum read depth at a position to consider for variant calls',
          :type => Integer,
          :default => 6
      opt :max_d_multiple, "multiplication factor for average coverage to calculate maximum read coverage
if set zero no calculation will be made from bam file.\nsetting this value will override user set max depth",
          :type => Integer,
          :default => 5
      opt :maxdepth, 'maximum read depth at a position to consider for variant calls
if set to zero no user max depth will be used',
          :type => Integer,
          :default => 0
      opt :min_non_ref_count, 'minimum read depth supporting non reference base at each position',
          :type => Integer,
          :default => 3
      opt :min_indel_count_support, 'minimum read depth supporting an indel at each position',
          :type => Integer,
          :default => 3
      opt :ambiguous_ref_bases, 'including variant at completely ambiguous bases in the reference',
          :type => String,
          :default => 'false'
      opt :mapping_quality, 'minimum mapping quality of read covering the position',
          :short => '-q',
          :type => Integer,
          :default => 20
      opt :base_quality, 'minimum base quality of bases covering the position',
          :short => '-Q',
          :type => Integer,
          :default => 15
      opt :noise, 'praportion of reads for a variant to conisder as noise',
          :type => Float,
          :default => 0.1
      opt :cross_type, 'type of cross used to generated mapping population - back or out',
          :type => String,
          :default => 'back'
      opt :use_all_contigs, 'option to select all contigs or only contigs containing variants for analysis',
          :type => String,
          :default => 'false'
      opt :include_low_hmes, 'option to include or discard variants from contigs with
low hme-score or bfr score to list in the final output',
          :type => String,
          :default => 'false'
      opt :polyploidy, 'Set if the data input is from polyploids',
          :type => String,
          :default => 'false'
      opt :mut_parent, 'Pileup or sorted BAM file alignments from mutant/trait of interest parent',
          :short => '-p',
          :type => String,
          :default => ''
      opt :bg_parent, 'Pileup or sorted BAM file alignments from background/wildtype parent',
          :short => '-r',
          :type => String,
          :default => ''
      opt :repeats_file, 'repeat masker output file for the assembly ',
          :short => '-R',
          :type => String,
          :default => ''
      opt :bfr_adjust, 'factor added to hemi snp frequency of each parent to adjust for bfr calculations',
          :type => Float,
          :default => 0.05
      opt :sel_seq_len, 'sequence length to print from either side of selected variants',
          :type => Integer,
          :default => 50
      opt :examples, 'shows some example commands with explanation'
    end
  end

  # help message to display from command line
  def help_message
    msg = <<-EOS

    Cheripic v#{Cheripic::VERSION.dup}
    Authors: Shyam Rallapalli and Dan MacLean

    Description: Candidate mutation and closely linked marker selection for non reference genomes
    Uses bulk segregant data from non-reference sequence genomes

    Inputs:
    1. Needs a reference fasta file of asssembly use for variant analysis
    2. Pileup/Bam files for mutant (phenotype of interest) bulks and background (wildtype phenotype) bulks
    3. If providing bam files, you have to include vcf files for the respective bulks
    4. If polyploid species, include pileup/bam files from one or both parents

    USAGE:
    cheripic <options>

    OPTIONS:

    EOS
    msg.split("\n").map{ |line| line.lstrip }.join("\n")
  end

  # examples to display from command line
  def print_examples
    msg = <<-EOS

    Cheripic v#{Cheripic::VERSION.dup}
    Authors: Shyam Rallapalli and Dan MacLean

    EXAMPLE COMMANDS:
      1. cheripic -f assembly.fa -a mutbulk.pileup -b bgbulk.pileup --output=cheripic_output
      2. cheripic --assembly assembly.fa --mut-bulk mutbulk.pileup --bg-bulk bgbulk.pileup
            --mut-parent mutparent.pileup --bg-parent bgparent.pileup --polyploidy true --output cheripic_results
      3. cheripic --assembly assembly.fa --mut-bulk mutbulk.pileup --bg-bulk bgbulk.pileup
            --mut-parent mutparent.pileup --bg-parent bgparent.pileup --polyploidy true
            --no-only-frag-with-vars --no-filter-out-low-hmes --output cheripic_results
      4. cheripic -h or cheripic --help
      5. cheripic -v or cheripic --version

    EOS
    puts msg.split("\n").map{ |line| line.lstrip }.join("\n")
    exit(0)
  end

  # calls other methods to check if command line inputs are valid
  def check_arguments
    convert_boolean_strings
    check_output
    check_log_level
    check_input_entry
    check_input_types
  end

  # convert true or false options to boolean
  def convert_boolean_strings
    %i{ambiguous_ref_bases use_all_contigs include_low_hmes polyploidy}.each do | symbol |
      if @options.key?(symbol)
        @options[symbol] = @options[symbol] == 'false' ? false : true
      end
    end
  end

  # set file given option to false if input is nil or None or ''
  def check_input_entry
    %i{assembly mut_bulk bg_bulk mut_bulk_vcf bg_bulk_vcf mut_parent bg_parent repeats_file}.each do | symbol |
      if @options.key?(symbol)
        if @options[symbol] == 'None'
          param = (symbol.to_s + '_given').to_sym
          @options[symbol] = ''
          @options.delete(param)
        end
      end
    end
  end

  # checks input files based on bulk file type
  def check_input_types
    inputfiles = {}
    inputfiles[:required] = %i{assembly mut_bulk}
    inputfiles[:optional] = %i{bg_bulk}
    if @options[:input_format] == 'bam'
      inputfiles[:required] << %i{mut_bulk_vcf}
      inputfiles[:optional] << %i{bg_bulk_vcf}
    end
    if @options[:polyploidy]
      inputfiles[:either] = %i{mut_parent bg_parent}
    end
    check_input_files(inputfiles)
  end

  # checks if input files are valid
  def check_input_files(inputfiles)
    check = 0
    inputfiles.each_key do | type |
      inputfiles[type].flatten!
      inputfiles[type].each do | symbol |
        if @options[symbol]
          file = @options[symbol]
          @options[symbol] = File.expand_path(file)
          next if type == :optional
          if type == :required and not File.exist?(file)
            raise CheripicIOError.new "#{symbol} file, #{file} does not exist: "
          elsif type == :either and File.exist?(file)
            check = 1
          end
        elsif type == :required
          raise CheripicArgError.new "Options #{inputfiles}, all must be specified. " +
                                            'Try --help for further help.'
        end
      end
      if type == :either and check == 0
        raise CheripicArgError.new "One of the options #{inputfiles}, must be specified. " +
                                     'Try --help for further help.'
      end
    end
  end

  # checks if files with output tag name already exists
  def check_output
    if (@options[:output].split('') & %w{# / : * ? ' < > | & $ ,}).any?
      raise CheripicArgError.new 'please choose a name tag that contains ' +
                                     'alphanumeric characters, hyphen(-) and underscore(_) only'
    end
    @options[:hmes_frags] = "#{@options[:output]}_selected_hme_variants.txt"
    @options[:bfr_frags] = "#{@options[:output]}_selected_bfr_variants.txt"
    [@options[:hmes_frags], @options[:bfr_frags]].each do | file |
      if File.exist?(file)
        raise CheripicArgError.new "'#{file}' file exists " +
                                       'please choose a different name tag to be included in the output file name'
      end
    end
  end

  # checks and sets logger level
  def check_log_level
    unless %w(error info warn debug).include?(@options[:loglevel])
      raise CheripicArgError.new "Loglevel #{@options[:loglevel]} is not valid. " +
                                     'It must be one of: error, info, warn, debug.'
    end
    logger.level = Yell::Level.new @options[:loglevel].to_sym
  end

  # Initializes an Implementer object using input options
  # and calls run method of the Implementer to start the pipeline
  # A hash of trollop option names as keys and user or default
  # setting as values is passed to Implementer object
  def run
    @options[:hmes_frags] = File.expand_path @options[:hmes_frags]
    @options[:bfr_frags] = File.expand_path @options[:bfr_frags]
    analysis = Implementer.new(@options)
    analysis.run
  end

end

Instance Method Details

#argument_parserHash

trollop argument_parser for input args string and

Returns:

  • (Hash)

    a hash of trollop option names as keys and user or default setting as values



40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'lib/cheripic/cmd.rb', line 40

def argument_parser
  cmds = self
  Trollop::Parser.new do
    version Cheripic::VERSION
    banner cmds.help_message
    opt :assembly, 'Assembly file in FASTA format',
        :short => '-f',
        :type => String
    opt :input_format, 'bulk and parent alignment file format types - set either pileup or bam or vcf',
        :short => '-F',
        :type => String,
        :default => 'pileup'
    opt :mut_bulk, 'Pileup or sorted BAM file alignments from mutant/trait of interest bulk 1',
        :short => '-a',
        :type => String
    opt :mut_bulk_vcf, 'vcf file for variants from mutant/trait of interest bulk 1',
        :type => String,
        :default => ''
    opt :bg_bulk, 'Pileup or sorted BAM file alignments from background/wildtype bulk 2',
        :short => '-b',
        :type => String
    opt :bg_bulk_vcf, 'vcf file for variants from background/wildtype bulk 2',
        :type => String,
        :default => ''
    opt :output, 'custom name tag to include in the output file name',
        :default => 'cheripic_results'
    opt :loglevel, 'Choose any one of "info / warn / debug" level for logs generated',
        :default => 'info'
    opt :hmes_adjust, 'factor added to snp count of each contig to adjust for hme score calculations',
        :type => Float,
        :default => 0.5
    opt :htlow, 'lower level for categorizing heterozygosity',
        :type => Float,
        :default => 0.2
    opt :hthigh, 'high level for categorizing heterozygosity',
        :type => Float,
        :default => 0.9
    opt :mindepth, 'minimum read depth at a position to consider for variant calls',
        :type => Integer,
        :default => 6
    opt :max_d_multiple, "multiplication factor for average coverage to calculate maximum read coverage
if set zero no calculation will be made from bam file.\nsetting this value will override user set max depth",
        :type => Integer,
        :default => 5
    opt :maxdepth, 'maximum read depth at a position to consider for variant calls
if set to zero no user max depth will be used',
        :type => Integer,
        :default => 0
    opt :min_non_ref_count, 'minimum read depth supporting non reference base at each position',
        :type => Integer,
        :default => 3
    opt :min_indel_count_support, 'minimum read depth supporting an indel at each position',
        :type => Integer,
        :default => 3
    opt :ambiguous_ref_bases, 'including variant at completely ambiguous bases in the reference',
        :type => String,
        :default => 'false'
    opt :mapping_quality, 'minimum mapping quality of read covering the position',
        :short => '-q',
        :type => Integer,
        :default => 20
    opt :base_quality, 'minimum base quality of bases covering the position',
        :short => '-Q',
        :type => Integer,
        :default => 15
    opt :noise, 'praportion of reads for a variant to conisder as noise',
        :type => Float,
        :default => 0.1
    opt :cross_type, 'type of cross used to generated mapping population - back or out',
        :type => String,
        :default => 'back'
    opt :use_all_contigs, 'option to select all contigs or only contigs containing variants for analysis',
        :type => String,
        :default => 'false'
    opt :include_low_hmes, 'option to include or discard variants from contigs with
low hme-score or bfr score to list in the final output',
        :type => String,
        :default => 'false'
    opt :polyploidy, 'Set if the data input is from polyploids',
        :type => String,
        :default => 'false'
    opt :mut_parent, 'Pileup or sorted BAM file alignments from mutant/trait of interest parent',
        :short => '-p',
        :type => String,
        :default => ''
    opt :bg_parent, 'Pileup or sorted BAM file alignments from background/wildtype parent',
        :short => '-r',
        :type => String,
        :default => ''
    opt :repeats_file, 'repeat masker output file for the assembly ',
        :short => '-R',
        :type => String,
        :default => ''
    opt :bfr_adjust, 'factor added to hemi snp frequency of each parent to adjust for bfr calculations',
        :type => Float,
        :default => 0.05
    opt :sel_seq_len, 'sequence length to print from either side of selected variants',
        :type => Integer,
        :default => 50
    opt :examples, 'shows some example commands with explanation'
  end
end

#check_argumentsObject

calls other methods to check if command line inputs are valid



191
192
193
194
195
196
197
# File 'lib/cheripic/cmd.rb', line 191

def check_arguments
  convert_boolean_strings
  check_output
  check_log_level
  check_input_entry
  check_input_types
end

#check_input_entryObject

set file given option to false if input is nil or None or ”



209
210
211
212
213
214
215
216
217
218
219
# File 'lib/cheripic/cmd.rb', line 209

def check_input_entry
  %i{assembly mut_bulk bg_bulk mut_bulk_vcf bg_bulk_vcf mut_parent bg_parent repeats_file}.each do | symbol |
    if @options.key?(symbol)
      if @options[symbol] == 'None'
        param = (symbol.to_s + '_given').to_sym
        @options[symbol] = ''
        @options.delete(param)
      end
    end
  end
end

#check_input_files(inputfiles) ⇒ Object

checks if input files are valid



237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
# File 'lib/cheripic/cmd.rb', line 237

def check_input_files(inputfiles)
  check = 0
  inputfiles.each_key do | type |
    inputfiles[type].flatten!
    inputfiles[type].each do | symbol |
      if @options[symbol]
        file = @options[symbol]
        @options[symbol] = File.expand_path(file)
        next if type == :optional
        if type == :required and not File.exist?(file)
          raise CheripicIOError.new "#{symbol} file, #{file} does not exist: "
        elsif type == :either and File.exist?(file)
          check = 1
        end
      elsif type == :required
        raise CheripicArgError.new "Options #{inputfiles}, all must be specified. " +
                                          'Try --help for further help.'
      end
    end
    if type == :either and check == 0
      raise CheripicArgError.new "One of the options #{inputfiles}, must be specified. " +
                                   'Try --help for further help.'
    end
  end
end

#check_input_typesObject

checks input files based on bulk file type



222
223
224
225
226
227
228
229
230
231
232
233
234
# File 'lib/cheripic/cmd.rb', line 222

def check_input_types
  inputfiles = {}
  inputfiles[:required] = %i{assembly mut_bulk}
  inputfiles[:optional] = %i{bg_bulk}
  if @options[:input_format] == 'bam'
    inputfiles[:required] << %i{mut_bulk_vcf}
    inputfiles[:optional] << %i{bg_bulk_vcf}
  end
  if @options[:polyploidy]
    inputfiles[:either] = %i{mut_parent bg_parent}
  end
  check_input_files(inputfiles)
end

#check_log_levelObject

checks and sets logger level



280
281
282
283
284
285
286
# File 'lib/cheripic/cmd.rb', line 280

def check_log_level
  unless %w(error info warn debug).include?(@options[:loglevel])
    raise CheripicArgError.new "Loglevel #{@options[:loglevel]} is not valid. " +
                                   'It must be one of: error, info, warn, debug.'
  end
  logger.level = Yell::Level.new @options[:loglevel].to_sym
end

#check_outputObject

checks if files with output tag name already exists



264
265
266
267
268
269
270
271
272
273
274
275
276
277
# File 'lib/cheripic/cmd.rb', line 264

def check_output
  if (@options[:output].split('') & %w{# / : * ? ' < > | & $ ,}).any?
    raise CheripicArgError.new 'please choose a name tag that contains ' +
                                   'alphanumeric characters, hyphen(-) and underscore(_) only'
  end
  @options[:hmes_frags] = "#{@options[:output]}_selected_hme_variants.txt"
  @options[:bfr_frags] = "#{@options[:output]}_selected_bfr_variants.txt"
  [@options[:hmes_frags], @options[:bfr_frags]].each do | file |
    if File.exist?(file)
      raise CheripicArgError.new "'#{file}' file exists " +
                                     'please choose a different name tag to be included in the output file name'
    end
  end
end

#convert_boolean_stringsObject

convert true or false options to boolean



200
201
202
203
204
205
206
# File 'lib/cheripic/cmd.rb', line 200

def convert_boolean_strings
  %i{ambiguous_ref_bases use_all_contigs include_low_hmes polyploidy}.each do | symbol |
    if @options.key?(symbol)
      @options[symbol] = @options[symbol] == 'false' ? false : true
    end
  end
end

#help_messageObject

help message to display from command line



144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
# File 'lib/cheripic/cmd.rb', line 144

def help_message
  msg = <<-EOS

  Cheripic v#{Cheripic::VERSION.dup}
  Authors: Shyam Rallapalli and Dan MacLean

  Description: Candidate mutation and closely linked marker selection for non reference genomes
  Uses bulk segregant data from non-reference sequence genomes

  Inputs:
  1. Needs a reference fasta file of asssembly use for variant analysis
  2. Pileup/Bam files for mutant (phenotype of interest) bulks and background (wildtype phenotype) bulks
  3. If providing bam files, you have to include vcf files for the respective bulks
  4. If polyploid species, include pileup/bam files from one or both parents

  USAGE:
  cheripic <options>

  OPTIONS:

  EOS
  msg.split("\n").map{ |line| line.lstrip }.join("\n")
end

#parse_arguments(args) ⇒ Object

method to check input command string and run appropriate method of the object (help or examples or parsing arguments)

Parameters:

  • args (String)


27
28
29
30
31
32
33
34
35
36
# File 'lib/cheripic/cmd.rb', line 27

def parse_arguments(args)
  Trollop::with_standard_exception_handling argument_parser do
    if args.empty? || args.include?('-h') || args.include?('--help')
      raise Trollop::HelpNeeded
    elsif args.include?('--examples')
      print_examples
    end
    argument_parser.parse args
  end
end

examples to display from command line



169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# File 'lib/cheripic/cmd.rb', line 169

def print_examples
  msg = <<-EOS

  Cheripic v#{Cheripic::VERSION.dup}
  Authors: Shyam Rallapalli and Dan MacLean

  EXAMPLE COMMANDS:
    1. cheripic -f assembly.fa -a mutbulk.pileup -b bgbulk.pileup --output=cheripic_output
    2. cheripic --assembly assembly.fa --mut-bulk mutbulk.pileup --bg-bulk bgbulk.pileup
          --mut-parent mutparent.pileup --bg-parent bgparent.pileup --polyploidy true --output cheripic_results
    3. cheripic --assembly assembly.fa --mut-bulk mutbulk.pileup --bg-bulk bgbulk.pileup
          --mut-parent mutparent.pileup --bg-parent bgparent.pileup --polyploidy true
          --no-only-frag-with-vars --no-filter-out-low-hmes --output cheripic_results
    4. cheripic -h or cheripic --help
    5. cheripic -v or cheripic --version

  EOS
  puts msg.split("\n").map{ |line| line.lstrip }.join("\n")
  exit(0)
end

#runObject

Initializes an Implementer object using input options and calls run method of the Implementer to start the pipeline A hash of trollop option names as keys and user or default setting as values is passed to Implementer object



292
293
294
295
296
297
# File 'lib/cheripic/cmd.rb', line 292

def run
  @options[:hmes_frags] = File.expand_path @options[:hmes_frags]
  @options[:bfr_frags] = File.expand_path @options[:bfr_frags]
  analysis = Implementer.new(@options)
  analysis.run
end