Class: Cheripic::Implementer

Inherits:
Object
  • Object
show all
Defined in:
lib/cheripic/implementer.rb

Overview

An Implementer object for running pipeline from Cmd object options

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(inputs) ⇒ Implementer

Initializes an Implementer object using inputs from cmd object

Parameters:

  • inputs (Hash)

    a hash of trollop option names as keys and user or default setting as values from Cmd object



23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
# File 'lib/cheripic/implementer.rb', line 23

def initialize(inputs)
  set1 = %i{assembly
            input_format
            mut_bulk
            bg_bulk
            mut_bulk_vcf
            bg_bulk_vcf
            hmes_frags
            bfr_frags
            mut_parent
            bg_parent
            repeats_file}
  @options = OpenStruct.new(inputs.select { |k| set1.include?(k) })

  set2 = %i{hmes_adjust
            htlow
            hthigh
            mindepth
            maxdepth
            max_d_multiple
            min_non_ref_count
            min_indel_count_support
            ambiguous_ref_bases
            mapping_quality
            base_quality
            noise
            cross_type
            use_all_contigs
            include_low_hmes
            polyploidy
            bfr_adjust
            sel_seq_len}
  settings = inputs.select { |k| set2.include?(k) }
  Options.update(settings)
  logger.debug "parameter values set\n#{Options.current_values.to_yaml}"
  @vars_extracted = false
  @has_run = false
end

Instance Attribute Details

#has_runObject (readonly)

Returns the value of attribute has_run.



19
20
21
# File 'lib/cheripic/implementer.rb', line 19

def has_run
  @has_run
end

#optionsHash (readonly)

user provided file paths as values taken from Cmd object

Returns:

  • (Hash)

    a hash of required input files names as keys and



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/cheripic/implementer.rb', line 15

class Implementer

  require 'ostruct'
  require 'fileutils'
  attr_reader :options, :variants, :has_run

  # Initializes an Implementer object using inputs from cmd object
  # @param inputs [Hash] a hash of trollop option names as keys and user or default setting as values from Cmd object
  def initialize(inputs)
    set1 = %i{assembly
              input_format
              mut_bulk
              bg_bulk
              mut_bulk_vcf
              bg_bulk_vcf
              hmes_frags
              bfr_frags
              mut_parent
              bg_parent
              repeats_file}
    @options = OpenStruct.new(inputs.select { |k| set1.include?(k) })

    set2 = %i{hmes_adjust
              htlow
              hthigh
              mindepth
              maxdepth
              max_d_multiple
              min_non_ref_count
              min_indel_count_support
              ambiguous_ref_bases
              mapping_quality
              base_quality
              noise
              cross_type
              use_all_contigs
              include_low_hmes
              polyploidy
              bfr_adjust
              sel_seq_len}
    settings = inputs.select { |k| set2.include?(k) }
    Options.update(settings)
    logger.debug "parameter values set\n#{Options.current_values.to_yaml}"
    @vars_extracted = false
    @has_run = false
  end

  # Initializes a Variants object using using input options (files).
  # Each pileup file is processed and bulks are compared
  def extract_vars
    @variants = Variants.new(@options)
    @variants.compare_pileups
    @vars_extracted = true
  end

  # Extracted variants from bulk comparison are re-analysed
  # and selected variants are written to a file
  def process_variants(pos_type)
    if pos_type == :hmes_frags
      @variants.verify_bg_bulk_pileup
    end
    # print selected variants that could be potential markers or mutation
    out_file = File.open(@options[pos_type], 'w')
    out_file.puts "Score\tAlleleFreq\tlength\tseq_id\tposition\tref_base\tcoverage\tbases\tbase_quals\tsequence_left\tAlt_seq\tsequence_right"
    regions = Regions.new(@options.assembly)
    @variants.send(pos_type).each_key do | frag |
      contig_obj = @variants.assembly[frag]
      if pos_type == :hmes_frags
        positions = contig_obj.hm_pos.keys
        score = contig_obj.hme_score
      else
        positions = contig_obj.hemi_pos.keys
        score = contig_obj.bfr_score
      end
      positions.each do | pos |
        pileup = @variants.pileups[frag].mut_bulk[pos]
        seqs = regions.fetch_seq(frag,pos)
        out_file.puts "#{score}\t#{contig_obj.hm_pos[pos]}\t#{contig_obj.length}\t#{pileup.to_s.chomp}\t#{seqs[0]}\t#{pileup.consensus}\t#{seqs[1]}"
      end
    end
    out_file.close
  end

  # Wrapper to extract and isolate selected variants
  # implements extract_vars and process_variants and
  # if data is from polyploids extracts contigs with high bfr
  def run
    unless @vars_extracted
      self.extract_vars
    end
    self.process_variants(:hmes_frags)
    if Options.polyploidy
      self.process_variants(:bfr_frags)
    end
    @has_run = true
  end

end

#variants<Cheripic::Variants> (readonly)

Returns a Variants object initialized using options from Cmd object.

Returns:



15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
# File 'lib/cheripic/implementer.rb', line 15

class Implementer

  require 'ostruct'
  require 'fileutils'
  attr_reader :options, :variants, :has_run

  # Initializes an Implementer object using inputs from cmd object
  # @param inputs [Hash] a hash of trollop option names as keys and user or default setting as values from Cmd object
  def initialize(inputs)
    set1 = %i{assembly
              input_format
              mut_bulk
              bg_bulk
              mut_bulk_vcf
              bg_bulk_vcf
              hmes_frags
              bfr_frags
              mut_parent
              bg_parent
              repeats_file}
    @options = OpenStruct.new(inputs.select { |k| set1.include?(k) })

    set2 = %i{hmes_adjust
              htlow
              hthigh
              mindepth
              maxdepth
              max_d_multiple
              min_non_ref_count
              min_indel_count_support
              ambiguous_ref_bases
              mapping_quality
              base_quality
              noise
              cross_type
              use_all_contigs
              include_low_hmes
              polyploidy
              bfr_adjust
              sel_seq_len}
    settings = inputs.select { |k| set2.include?(k) }
    Options.update(settings)
    logger.debug "parameter values set\n#{Options.current_values.to_yaml}"
    @vars_extracted = false
    @has_run = false
  end

  # Initializes a Variants object using using input options (files).
  # Each pileup file is processed and bulks are compared
  def extract_vars
    @variants = Variants.new(@options)
    @variants.compare_pileups
    @vars_extracted = true
  end

  # Extracted variants from bulk comparison are re-analysed
  # and selected variants are written to a file
  def process_variants(pos_type)
    if pos_type == :hmes_frags
      @variants.verify_bg_bulk_pileup
    end
    # print selected variants that could be potential markers or mutation
    out_file = File.open(@options[pos_type], 'w')
    out_file.puts "Score\tAlleleFreq\tlength\tseq_id\tposition\tref_base\tcoverage\tbases\tbase_quals\tsequence_left\tAlt_seq\tsequence_right"
    regions = Regions.new(@options.assembly)
    @variants.send(pos_type).each_key do | frag |
      contig_obj = @variants.assembly[frag]
      if pos_type == :hmes_frags
        positions = contig_obj.hm_pos.keys
        score = contig_obj.hme_score
      else
        positions = contig_obj.hemi_pos.keys
        score = contig_obj.bfr_score
      end
      positions.each do | pos |
        pileup = @variants.pileups[frag].mut_bulk[pos]
        seqs = regions.fetch_seq(frag,pos)
        out_file.puts "#{score}\t#{contig_obj.hm_pos[pos]}\t#{contig_obj.length}\t#{pileup.to_s.chomp}\t#{seqs[0]}\t#{pileup.consensus}\t#{seqs[1]}"
      end
    end
    out_file.close
  end

  # Wrapper to extract and isolate selected variants
  # implements extract_vars and process_variants and
  # if data is from polyploids extracts contigs with high bfr
  def run
    unless @vars_extracted
      self.extract_vars
    end
    self.process_variants(:hmes_frags)
    if Options.polyploidy
      self.process_variants(:bfr_frags)
    end
    @has_run = true
  end

end

Instance Method Details

#extract_varsObject

Initializes a Variants object using using input options (files). Each pileup file is processed and bulks are compared



64
65
66
67
68
# File 'lib/cheripic/implementer.rb', line 64

def extract_vars
  @variants = Variants.new(@options)
  @variants.compare_pileups
  @vars_extracted = true
end

#process_variants(pos_type) ⇒ Object

Extracted variants from bulk comparison are re-analysed and selected variants are written to a file



72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# File 'lib/cheripic/implementer.rb', line 72

def process_variants(pos_type)
  if pos_type == :hmes_frags
    @variants.verify_bg_bulk_pileup
  end
  # print selected variants that could be potential markers or mutation
  out_file = File.open(@options[pos_type], 'w')
  out_file.puts "Score\tAlleleFreq\tlength\tseq_id\tposition\tref_base\tcoverage\tbases\tbase_quals\tsequence_left\tAlt_seq\tsequence_right"
  regions = Regions.new(@options.assembly)
  @variants.send(pos_type).each_key do | frag |
    contig_obj = @variants.assembly[frag]
    if pos_type == :hmes_frags
      positions = contig_obj.hm_pos.keys
      score = contig_obj.hme_score
    else
      positions = contig_obj.hemi_pos.keys
      score = contig_obj.bfr_score
    end
    positions.each do | pos |
      pileup = @variants.pileups[frag].mut_bulk[pos]
      seqs = regions.fetch_seq(frag,pos)
      out_file.puts "#{score}\t#{contig_obj.hm_pos[pos]}\t#{contig_obj.length}\t#{pileup.to_s.chomp}\t#{seqs[0]}\t#{pileup.consensus}\t#{seqs[1]}"
    end
  end
  out_file.close
end

#runObject

Wrapper to extract and isolate selected variants implements extract_vars and process_variants and if data is from polyploids extracts contigs with high bfr



101
102
103
104
105
106
107
108
109
110
# File 'lib/cheripic/implementer.rb', line 101

def run
  unless @vars_extracted
    self.extract_vars
  end
  self.process_variants(:hmes_frags)
  if Options.polyploidy
    self.process_variants(:bfr_frags)
  end
  @has_run = true
end