Class: Bio::DB::Tag::MD

Inherits:
Object
  • Object
show all
Includes:
Alignment::IteratePairs
Defined in:
lib/bio-sam-mutation/bio/db/tag/md.rb

Constant Summary collapse

@@regexp =
/MD:Z:([\w^]+)/
@@format =
/[\w^]+/
@@splitter =
/(?<match>\d+)|(?<substitution>[GATCN]+)|\^(?<deletion>[GATCN]+)/
@@reference =

Operations that consume reference seqeunce:

/[msd]/

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(data) ⇒ MD

Returns a new instance of MD.



9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
# File 'lib/bio-sam-mutation/bio/db/tag/md.rb', line 9

def initialize(data)
		if data.is_a? String
			if data.match(@@regexp)
				@tag = $~[1]
			elsif data.match(@@format)
				#Assume tag given without MD:Z: leader
				@tag = data
			else
				raise "Tag not of expected format."
			end
		elsif data.is_a? Bio::DB::Tag
			@tag = data.value
			warn "Not an MD tag" if data.tag == "MD"
		else
			raise "Tag not of expected format."
		end

	# Splits the string into operations using the splitter regexp class variable, returns array of two-element arrays describing operations
	spl = @tag.scan(@@splitter)
	# Returns an array of matches [match,substition,deletion]
	# Although regexp captures are named, these don't get included automatically with scan as it doesn't return MatchData objects.
	spl.map! do |a|
		array = [["m", a[0]],["s", a[1]],["d", a[2]]]
		# Only one of these will be non-nil
		array.keep_if{|i| i[1]}
		array.map!{|i| if i[0] == "m" then i[1] = i[1].to_i end; i}
		array[0]
	end
	@pairs = spl

	@cumulative = []
	cumulative_length = 0
	read_length = 0
	@pairs.each do |q|
		p = q.dup
		case p[0]
			when "m"
				len = p[1]
				rlen = p[1]
			when "s"
				len = p[1].length
				rlen = p[1].length
			when "d"
				len = p[1].length
				# Deleted bases don't appear in the read, so don't count to the length
				rlen = 0
		end
		# third element in each array will be the total preceding length on the reference, i.e. the position of the operation.
		# fourth element is similar for the read.
		@cumulative << p.dup.push(cumulative_length).push(read_length)
		cumulative_length += len
		read_length += rlen
	end
end

Instance Attribute Details

#cumulativeObject

Returns the value of attribute cumulative.



3
4
5
# File 'lib/bio-sam-mutation/bio/db/tag/md.rb', line 3

def cumulative
  @cumulative
end

#pairsObject

Returns the value of attribute pairs.



3
4
5
# File 'lib/bio-sam-mutation/bio/db/tag/md.rb', line 3

def pairs
  @pairs
end

#tagObject

Returns the value of attribute tag.



3
4
5
# File 'lib/bio-sam-mutation/bio/db/tag/md.rb', line 3

def tag
  @tag
end

Instance Method Details

#deletionsObject



64
65
66
# File 'lib/bio-sam-mutation/bio/db/tag/md.rb', line 64

def deletions
	report(/d/)
end

#reconstruct_tag(array = @pairs) ⇒ Object

Reconstruct a MD:Z tag from the pairs array



84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/bio-sam-mutation/bio/db/tag/md.rb', line 84

def reconstruct_tag(array=@pairs)
	new_tag = []
	array.each do |p|
		case p[0]
			when "m"
				string = p[1].to_s
			when "s"
				string = p[1]
			when "d"
				string = "^"+p[1]
		end
		new_tag << string
	end
	new_tag.join("")
end

#ref_lengthObject

Sums the total length of the reference sequence represented by the MD:Z tag (or part of)



103
104
105
106
107
108
109
110
111
112
113
114
# File 'lib/bio-sam-mutation/bio/db/tag/md.rb', line 103

def ref_length
	#Need the sum of all "movement" operations (i.e. numbers) as well as any substituted bases (count 1 each)
	if @tag =~ /^\d+$/
		@tag.to_i
	else
		temp_tag = @tag.dup
		temp_tag.gsub!(/\^/,"")  # Deletions need to be counted - sub the caret character out and count the remaining base characters
		movements = temp_tag.split(/[GATCN]+/).map(&:to_i).reduce(:+) # Sum numbers
		deletions = temp_tag.split(/\d+/).map(&:length).reduce(:+) # Sum number of base chars
		movements + deletions
	end
end

#report(regexp = /[sd]/) ⇒ Object

Report the positions of given events



73
74
75
76
77
78
79
80
81
# File 'lib/bio-sam-mutation/bio/db/tag/md.rb', line 73

def report(regexp=/[sd]/)
	to_return = []
	@cumulative.each do |p|
		if p[0] =~ regexp
			to_return << p
		end
	end
	to_return
end

#slice(offset, length) ⇒ Object

Given an offset in reference sequence and length, return an object corresponding to that subregion of the alignment



116
117
118
119
120
121
# File 'lib/bio-sam-mutation/bio/db/tag/md.rb', line 116

def slice(offset,length)
	new_array = iterate_pairs(@pairs,offset,length,@@reference)
	# Return a MDZ instance with just the new alignment
	new_tag = reconstruct_tag(new_array)
	Bio::DB::Tag::MD.new(new_tag)
end

#substitutionsObject



68
69
70
# File 'lib/bio-sam-mutation/bio/db/tag/md.rb', line 68

def substitutions
	report(/s/)
end