Class: RegexpTree

Inherits:
NestedArray show all
Extended by:
ClassMethods
Includes:
Comparable
Defined in:
app/models/regexp_tree.rb,
app/models/regexp_parse.rb,
app/models/regexp_alternative.rb

Overview

Copyright (C) 2010-2013 by Greg Lawson

<[email protected]>

Copyright: See COPYING file that comes with this distribution

parse tree internal format is nested Arrays. Postfix operators and brackets end embeddded arrays

Defined Under Namespace

Modules: ClassMethods

Constant Summary collapse

Ascii_characters =

to_regexp

(0..127).to_a.map { |i| i.chr}
Binary_bytes =
(0..255).to_a.map { |i| i.chr}

Class Method Summary collapse

Instance Method Summary collapse

Methods included from ClassMethods

promote

Methods inherited from NestedArray

#[], #map_branches, #map_recursive, #merge_single_element_arrays?, #promote, #reverse

Methods included from NestedArray::Assertions::ClassMethods

#assert_post_conditions

Methods included from NestedArray::Assertions

#assert_post_conditions, #assert_pre_conditions

Constructor Details

#initialize(regexp = [], probability_space_regexp = '[[:print:]]+', options = RegexpParse::Default_options) ⇒ RegexpTree

Returns a new instance of RegexpTree.


37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
# File 'app/models/regexp_tree.rb', line 37

def initialize(regexp=[], probability_space_regexp='[[:print:]]+', options=RegexpParse::Default_options)
	if regexp.kind_of?(Array) then #nested Arrays
		super(regexp)
		
	elsif regexp.instance_of?(String) then 
		super(RegexpParse.new(regexp).to_a)
	elsif regexp.instance_of?(RegexpParse) then 
		super(regexp.to_a)
	elsif regexp.instance_of?(Regexp) then 
		super()
		super(RegexpParse.new(regexp.source).to_a)
	else
		raise "unexpected regexp=#{regexp.inspect}"
	end #if
	@probability_space_regexp=probability_space_regexp
	@errors=[RegexpParse.regexp_error(regexp.to_s, options)]
#	@anchor=Anchoring.new(self) infinite recursion
end

Class Method Details

.[](*regexp_array) ⇒ Object

ClassMethods


14
15
16
17
18
19
20
# File 'app/models/regexp_parse.rb', line 14

def self.[](*regexp_array)
	if regexp_array.size==1 then # no splat
		regexp_array=regexp_array[0]	
	end #if
#	regexp_array=[*regexp_array].map{|r| RegexpParse.promote(r)}
	RegexpParse.typed?(regexp_array)
end

.canonical_regexp(regexp) ⇒ Object

initialize


55
56
57
58
59
60
61
62
63
64
65
66
# File 'app/models/regexp_tree.rb', line 55

def self.canonical_regexp(regexp)
	if regexp.instance_of?(String) then
		regexp=RegexpParse.regexp_rescued(regexp)
	elsif regexp.instance_of?(Array) || regexp.instance_of?(RegexpTree) || regexp.instance_of?(RegexpMatch) then
		regexp=RegexpParse.regexp_rescued(regexp.to_s)
	elsif regexp.nil? then
		return //
	elsif !regexp.instance_of?(Regexp) then
		raise "Unexpected regexp.class=#{regexp.class}."
	end #if
	return regexp
end

.canonical_regexp_tree(regexp) ⇒ Object

canonical_regexp


67
68
69
70
71
72
73
74
75
76
77
78
# File 'app/models/regexp_tree.rb', line 67

def self.canonical_regexp_tree(regexp)
	if regexp.instance_of?(String) then
		regexp=RegexpTree.new(regexp)
	elsif regexp.instance_of?(Array) || regexp.instance_of?(RegexpTree) || regexp.instance_of?(RegexpMatch) then
		regexp=RegexpTree.new(regexp.to_s)
	elsif regexp.nil? then
		return //
	elsif !regexp.instance_of?(Regexp) then
		raise "Unexpected regexp.class=#{regexp.class.inspect}."
	end #if
	return regexp
end

.promote(node) ⇒ Object

initialize


75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
# File 'app/models/regexp_sequence.rb', line 75

def RegexpTree.promote(node)
	if node.kind_of?(RegexpTree) then #nested Arrays
		node
	elsif node.kind_of?(Array) then #nested Arrays
		RegexpSequence.new(node)
		
	elsif node.instance_of?(String) then 
		RegexpSequence.new(RegexpParse.new(node).to_a)
	elsif node.instance_of?(RegexpParse) then 
		RegexpSequence.new(node.to_a)
	elsif node.instance_of?(Regexp) then 
		RegexpSequence.new(RegexpParse.new(node.source).to_a)
	else
		raise "unexpected node=#{node.inspect}"
	end #if
end

Instance Method Details

#&(rhs) ⇒ Object

intersetion should be interpreted as the intersection of the Languages (sets of possible matches) that can be matched by each regexp I == L & R then L >= I && R >= I if no intersetion: I==[] then L >= [] and R >= []


117
118
119
120
121
# File 'app/models/regexp_tree.rb', line 117

def &(rhs)
	repetition_length=self.repetition_length & rhs.repetition_length
	repetition_node=repetition_length.concise_repetition_node(repetition_length.begin, repetition_length.end)
	RegexpTree.new([self.repeated_pattern.sequence_intersect(rhs.repeated_pattern), repetition_node])
end

#+(other) ⇒ Object

<=>


144
145
146
# File 'app/models/regexp_tree.rb', line 144

def +(other)
	return RegexpTree.new(self.to_a+other.to_a)
end

#<=>(other) ⇒ Object

intersection


122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'app/models/regexp_tree.rb', line 122

def <=>(other)
	anchor_comparison=compare_anchors?(other)
	if self.to_s==other.to_s then # avoid recursion
		return 0
	else
		cc_comparison=compare_character_class?(other)
		if !cc_comparison.nil? then
			return cc_comparison
		else
			repetition_comparison=compare_repetitions?(other)
			if !repetition_comparison.nil? then
				return repetition_comparison
			end #if
			sequence_comparison=compare_sequence?(other)
			if !sequence_comparison.nil? then
				return sequence_comparison
			else
				return nil
			end #if
		end #if
	end #if
end

#compare_repetitions?(other) ⇒ Boolean

probability_space_size

Returns:

  • (Boolean)

86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
# File 'app/models/regexp_tree.rb', line 86

def compare_repetitions?(other)
	return nil if other.instance_of?(String)
	my_repeated_pattern=self.repeated_pattern
	other_repeated_pattern=other.repeated_pattern
	if my_repeated_pattern!=other_repeated_pattern then
		return nil # 
	else
		my_repetition_length=self.repetition_length
		other_repetition_length=other.repetition_length
		if my_repetition_length==other_repetition_length then
			return 0
		elsif my_repetition_length.begin<=other_repetition_length.begin then
			if my_repetition_length.end.nil? then
			elsif my_repetition_length.end>=other_repetition_length.end then
				return 1
			end #if
		elsif my_repetition_length.end<=other_repetition_length.end &&  my_repetition_length.begin>=other_repetition_length.begin then
			return -1
		else
			return nil
		end #if
	end #if

end

#probability_space_regexpObject

include Inline_Assertions


80
81
82
# File 'app/models/regexp_tree.rb', line 80

def probability_space_regexp
	RegexpTree.new(@probability_space_regexp)
end

#probability_space_sizeObject

probability_space_regexp


83
84
85
# File 'app/models/regexp_tree.rb', line 83

def probability_space_size
	probability_space_regexp.repeated_pattern.string_of_matching_chars.size
end

#to_aObject

+


147
148
149
# File 'app/models/regexp_tree.rb', line 147

def to_a
	return NestedArray.new(self)
end

#to_regexp(options = RegexpParse::Default_options) ⇒ Object

to_s


155
156
157
158
159
160
# File 'app/models/regexp_tree.rb', line 155

def to_regexp(options=RegexpParse::Default_options)
	regexp_string=to_s
	regexp=RegexpParse.regexp_rescued(regexp_string, options)

	return regexp
end

#to_sObject

file name glob (suitible for Dir[]) most like regexp. often matches more filenames than regexp (see pathnames)


152
153
154
# File 'app/models/regexp_tree.rb', line 152

def to_s
	to_a.join
end