Class: Bio::DB::Tag::MD

Inherits:
Object
  • Object
show all
Includes:
Alignment::IteratePairs
Defined in:
lib/bio-sam-mutation/bio/db/tag/md.rb

Constant Summary collapse

@@regexp =
/MD:Z:([\w^]+)/
@@format =
/[\w^]+/
@@splitter =
/(?<match>\d+)|(?<substitution>[GATCN]+)|\^(?<deletion>[GATCN]+)/
@@reference =

Operations that consume reference seqeunce:

/[msd]/

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data) ⇒ MD

Returns a new instance of MD.



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/bio-sam-mutation/bio/db/tag/md.rb', line 9

def initialize(data)
    if data.is_a? String
      if data.match(@@regexp)
        @tag = $~[1]
      elsif data.match(@@format)
        #Assume tag given without MD:Z: leader
        @tag = data
      else
        raise "Tag not of expected format."
      end
    elsif data.is_a? Bio::DB::Tag
      @tag = data.value
      warn "Not an MD tag" if data.tag == "MD"
    else
      raise "Tag not of expected format."
    end

  # Splits the string into operations using the splitter regexp class variable, returns array of two-element arrays describing operations
  spl = @tag.scan(@@splitter)
  # Returns an array of matches [match,substition,deletion]
  # Although regexp captures are named, these don't get included automatically with scan as it doesn't return MatchData objects.
  spl.map! do |a|
    array = [["m", a[0]],["s", a[1]],["d", a[2]]]
    # Only one of these will be non-nil
    array.keep_if{|i| i[1]}
    array.map!{|i| if i[0] == "m" then i[1] = i[1].to_i end; i}
    array[0]
  end
  @pairs = spl

  @cumulative = []
  cumulative_length = 0
  read_length = 0
  @pairs.each do |q|
    p = q.dup
    case p[0]
      when "m"
        len = p[1]
        rlen = p[1]
      when "s"
        len = p[1].length
        rlen = p[1].length
      when "d"
        len = p[1].length
        # Deleted bases don't appear in the read, so don't count to the length
        rlen = 0
    end
    # third element in each array will be the total preceding length on the reference, i.e. the position of the operation.
    # fourth element is similar for the read.
    @cumulative << p.dup.push(cumulative_length).push(read_length)
    cumulative_length += len
    read_length += rlen
  end
end

Instance Attribute Details

#cumulativeObject

Returns the value of attribute cumulative.



3
4
5
# File 'lib/bio-sam-mutation/bio/db/tag/md.rb', line 3

def cumulative
  @cumulative
end

#pairsObject

Returns the value of attribute pairs.



3
4
5
# File 'lib/bio-sam-mutation/bio/db/tag/md.rb', line 3

def pairs
  @pairs
end

#tagObject

Returns the value of attribute tag.



3
4
5
# File 'lib/bio-sam-mutation/bio/db/tag/md.rb', line 3

def tag
  @tag
end

Instance Method Details

#deletionsObject



64
65
66
# File 'lib/bio-sam-mutation/bio/db/tag/md.rb', line 64

def deletions
  report(/d/)
end

#reconstruct_tag(array = @pairs) ⇒ Object

Reconstruct a MD:Z tag from the pairs array



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/bio-sam-mutation/bio/db/tag/md.rb', line 84

def reconstruct_tag(array=@pairs)
  new_tag = []
  array.each do |p|
    case p[0]
      when "m"
        string = p[1].to_s
      when "s"
        string = p[1]
      when "d"
        string = "^"+p[1]
    end
    new_tag << string
  end
  new_tag.join("")
end

#ref_lengthObject

Sums the total length of the reference sequence represented by the MD:Z tag (or part of)



103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/bio-sam-mutation/bio/db/tag/md.rb', line 103

def ref_length
  #Need the sum of all "movement" operations (i.e. numbers) as well as any substituted bases (count 1 each)
  if @tag =~ /^\d+$/
    @tag.to_i
  else
    temp_tag = @tag.dup
    temp_tag.gsub!(/\^/,"")  # Deletions need to be counted - sub the caret character out and count the remaining base characters
    movements = temp_tag.split(/[GATCN]+/).map(&:to_i).reduce(:+) # Sum numbers
    deletions = temp_tag.split(/\d+/).map(&:length).reduce(:+) # Sum number of base chars
    movements + deletions
  end
end

#report(regexp = /[sd]/) ⇒ Object

Report the positions of given events



73
74
75
76
77
78
79
80
81
# File 'lib/bio-sam-mutation/bio/db/tag/md.rb', line 73

def report(regexp=/[sd]/)
  to_return = []
  @cumulative.each do |p|
    if p[0] =~ regexp
      to_return << p
    end
  end
  to_return
end

#slice(offset, length) ⇒ Object

Given an offset in reference sequence and length, return an object corresponding to that subregion of the alignment



116
117
118
119
120
121
# File 'lib/bio-sam-mutation/bio/db/tag/md.rb', line 116

def slice(offset,length)
  new_array = iterate_pairs(@pairs,offset,length,@@reference)
  # Return a MDZ instance with just the new alignment
  new_tag = reconstruct_tag(new_array)
  Bio::DB::Tag::MD.new(new_tag)
end

#substitutionsObject



68
69
70
# File 'lib/bio-sam-mutation/bio/db/tag/md.rb', line 68

def substitutions
  report(/s/)
end