Class: RegexpRepetition

Inherits:
RegexpTree show all
Includes:
Comparable
Defined in:
app/models/regexp_parse.rb,
app/models/regexp_repetition.rb

Overview

Copyright (C) 2010-2012 by Greg Lawson

<[email protected]>

Copyright: See COPYING file that comes with this distribution

Defined Under Namespace

Classes: TestCases

Constant Summary

Constants inherited from RegexpTree

RegexpTree::Ascii_characters, RegexpTree::Binary_bytes

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from RegexpTree

#+, [], canonical_regexp, canonical_regexp_tree, #compare_repetitions?, #probability_space_regexp, #probability_space_size, promote, #to_a, #to_regexp, #to_s

Methods included from RegexpTree::ClassMethods

#promote

Methods inherited from NestedArray

#[], #map_branches, #map_recursive, #merge_single_element_arrays?, #promote, #reverse, #to_s

Methods included from NestedArray::Assertions::ClassMethods

#assert_post_conditions

Methods included from NestedArray::Assertions

#assert_post_conditions, #assert_pre_conditions

Constructor Details

#initialize(branch, min = nil, max = nil, probability_space_regexp = '[[:print:]]+', options = Default_options) ⇒ RegexpRepetition

RegexpRepetition.new(RegexpTree) RegexpRepetition.new(RegexpTree, UnboundedRange) RegexpRepetition.new(RegexpTree, min, max) Ambiguity of nil for third prameter: missing or infinity? Resolved by checking second parameter for numric or Range to resolve ambiguity


16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'app/models/regexp_repetition.rb', line 16

def initialize(branch, min=nil, max=nil, probability_space_regexp='[[:print:]]+', options=Default_options)
	if branch.instance_of?(RegexpParse) then
		@repeated_pattern=branch.repeated_pattern
		@repetition_length=branch.repetition_length
	else
		branch=RegexpRepetition.promote(branch)
	end #if	
	if !max.nil? then # all arguments provided
		@repetition_length=UnboundedRange.new(min, max)
		raise "min must not be nil." if min.nil? 
	elsif !min.nil? then # only one length argument
		if min.kind_of?(Range) then
			@repetition_length=min
		else #third parameter specified as nil/infinity
			@repetition_length=UnboundedRange.new(min, max)
			raise "min must not be nil." if min.nil? 
		end #if
	else # implicit length
		@repetition_length=branch.repetition_length
	end #if
end

Instance Attribute Details

#repeated_patternObject (readonly)

Returns the value of attribute repeated_pattern


10
11
12
# File 'app/models/regexp_repetition.rb', line 10

def repeated_pattern
  @repeated_pattern
end

#repetition_lengthObject (readonly)

Returns the value of attribute repetition_length


10
11
12
# File 'app/models/regexp_repetition.rb', line 10

def repetition_length
  @repetition_length
end

Instance Method Details

#&(rhs) ⇒ Object

intersection. If neither is a subset of the rhs return nil


55
56
57
58
59
60
61
# File 'app/models/regexp_repetition.rb', line 55

def &(rhs)
	lhs=self
	rhs=RegexpRepetition.promote(rhs)
 	base=lhs.repeated_pattern & rhs.repeated_pattern
 	length=lhs.repetition_length & rhs.repetition_length
	return RegexpRepetition.new(base, length)
end

#<=>(rhs) ⇒ Object

TestCases


48
49
50
51
52
53
# File 'app/models/regexp_repetition.rb', line 48

def <=>(rhs)
	lhs=self
 	base_compare=lhs.repeated_pattern <=> rhs.repeated_pattern
 	length_compare=lhs.repetition_length <=> rhs.repetition_length
	return base_compare.nonzero? || length_compare
end

#canonical_repetition_tree(min = self.repetition_length.begin, max = self.repetition_length.end) ⇒ Object

the useful inverse function of new. String to regexp


70
71
72
# File 'app/models/regexp_repetition.rb', line 70

def canonical_repetition_tree(min=self.repetition_length.begin, max=self.repetition_length.end)
	return RegexpTree.new(['{', [min.to_s, ',', max.to_s], '}'])
end

#concise_repetition_node(min = self.repetition_length.begin, max = self.repetition_length.end) ⇒ Object

Return a RegexpTree node for self Concise means to use abbreviations like '*', '+', '' rather than the canonical n,m If no repetition returns '' equivalent to 1,1


77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
# File 'app/models/regexp_repetition.rb', line 77

def concise_repetition_node(min=self.repetition_length.begin, max=self.repetition_length.end)
	if min.to_i==0 then
		if max.to_i==1 then
			return '?'
		elsif max==UnboundedFixnum::Inf then
			return '*'
		else
			return canonical_repetition_tree(min, max)
		end #if
	elsif min.to_i==1 then
		if max==1 then
			return ''
		elsif max==UnboundedFixnum::Inf then
			return '+'
		else
			return canonical_repetition_tree(min, max)
		end #if
	elsif min==max then
		return RegexpTree.new(['{', [min.to_i.to_s], '}'])
	else
		return canonical_repetition_tree(min, max)
	end #if
	return RegexpTree.new(['{', [min.to_s, max.to_s], '}'])
end

#merge_to_repetition(branch = self) ⇒ Object

recursive merging of consecutive identical pairs


156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# File 'app/models/regexp_repetition.rb', line 156

def merge_to_repetition(branch=self)
	if branch.instance_of?(Array) then
		branch=RegexpTree.new(branch)
	end #if
	if branch.size<2 then # terminate recursion
		return branch
	else
# puts "branch=#{branch}"
		first=branch[0]
		second=branch[1]
		if branch.repeated_pattern(first)==branch.repeated_pattern(second) then
			first_repetition=first.repetition_length
			second_repetition=branch.repetition_length(second)
			merged_repetition=(first_repetition+second_repetition).concise_repetition_node
			merge_to_repetition(first.repeated_pattern << merged_repetition+branch[2..-1])
		else # couldn't merge first element
			[first]+merge_to_repetition(branch[1..-1])	# shorten string to ensure recursion termination
		end #if
	end #if
end

#probability_of_repetition(repetition, match_length = nil, branch = self) ⇒ Object

Probability for a single matched repetitions of an alternative (single character) Here the probability distribution is assumed uniform across the probability space ranges from zero for an impossible match (usually avoided) to 1 for certain match like /.*/ (actually RegexpRepetition::TestCases::Any is more accurate) returns nil if indeterminate (e.g. nested repetitions) (call probability_range or RegexpMatch#probability instead) match_length (of random characters) is useful in unanchored cases match_length.nil? probability (.p) of length n I == L & R then L >= I && R >= I and L.p(n) >= I.p(n) && R.p(n) >= I.p(n)


123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
# File 'app/models/regexp_repetition.rb', line 123

def probability_of_repetition(repetition, match_length=nil, branch=self)
	if branch.instance_of?(String) then
		alternatives=1
		base=branch
		repetition_length=branch.repetition_length
		anchoring=Anchoring.new(branch)
	else
		repetition_length=branch.repetition_length
		base=branch.repeated_pattern
		anchoring=Anchoring.new(branch)
		alternative_list=alternatives?(branch.repeated_pattern) # kludge for now
		if alternative_list.nil? then
			return nil
		else
			alternatives=alternative_list.size
		end  #if
	end #if
	character_probability=alternatives.to_f/probability_space_size
	if repetition==0 then
		probability=1.0
	elsif repetition.nil? then # infinit repetition
		if character_probability==1.0 then
			probability=1.0
		else
			probability=0.0
		end #if
	else
		probability=character_probability**repetition
	end #if
	raise "probability_space_regexp=#{probability_space_regexp} is probably too restrictive for branch=#{branch.inspect}" if probability>1.0
	return probability
end

#probability_range(node = self) ⇒ Object

Probability range depending on matched length


102
103
104
105
106
107
108
109
# File 'app/models/regexp_repetition.rb', line 102

def probability_range(node=self)
	if node.instance_of?(String) then
		range=node.size..node.size
	else
		range=node.repetition_length
	end #if
	return probability_of_repetition(range.begin)..probability_of_repetition(range.end)
end

#|(rhs) ⇒ Object

Union. Unlike set union disjoint sets return a spanning set.


63
64
65
66
67
68
# File 'app/models/regexp_repetition.rb', line 63

def |(rhs)
	lhs=self
 	base=lhs.repeated_pattern | rhs.repeated_pattern
 	length=lhs.repetition_length | rhs.repetition_length
	return RegexpRepetition.new(base, length)
end