Top Level Namespace
Defined Under Namespace
Modules: Interscript Classes: String
Instance Method Summary collapse
- #h(str) ⇒ Object
- #process(node) ⇒ Object
- #process_root(node) ⇒ Object
- #stringify(node) ⇒ Object
- #stringify_root(root, indent: 0) ⇒ Object
Instance Method Details
#h(str) ⇒ Object
5 6 7 |
# File 'lib/interscript/visualize.rb', line 5 def h(str) str.to_s.gsub("&", "&").gsub("<", "<").gsub(">", ">").gsub('"', """) end |
#process(node) ⇒ Object
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
# File 'lib/interscript/utils/regexp_converter.rb', line 4 def process(node) children = if node.respond_to?(:expressions) && node.expressions children = node.expressions.map.each { |expr| process(expr) } end # puts node.inspect out = case node when Regexp::Expression::Root children when Regexp::Expression::Assertion::Lookbehind [:lookbehind_start, children, :lookbehind_stop] when Regexp::Expression::Assertion::NegativeLookbehind [:negative_lookbehind_start, children, :negative_lookbehind_stop] when Regexp::Expression::Assertion::Lookahead [:lookahead_start, children, :lookahead_stop] when Regexp::Expression::Assertion::NegativeLookahead [:negative_lookahead_start, children, :negative_lookahead_stop] when Regexp::Expression::Group::Capture [:capture_start, children, :capture_stop] when Regexp::Expression::CharacterSet # puts children.inspect if children.flatten.include? (:range_start) #or children.size > 1 [:characterset_start, :array_start, children, :array_stop, :characterset_stop] else [:characterset_start, children, :characterset_stop] end when Regexp::Expression::Alternation [:alternation_start, children, :alternation_stop] when Regexp::Expression::Alternative [:alternative_start, children, :alternative_stop] when Regexp::Expression::CharacterSet::Range lit1 = node.expressions[0].text lit2 = node.expressions[1].text [:range_start, lit1, :range_mid, lit2, :range_stop] when Regexp::Expression::Anchor::WordBoundary :boundary when Regexp::Expression::Anchor::NonWordBoundary :non_word_boundary when Regexp::Expression::EscapeSequence::Backspace :boundary # most probably boundary when Regexp::Expression::CharacterType::Space :space when Regexp::Expression::Anchor::BeginningOfLine :line_start when Regexp::Expression::Anchor::EndOfLine :line_end when Regexp::Expression::CharacterType::Any :any_character when Regexp::Expression::Literal node.text when Regexp::Expression::EscapeSequence::Literal node.text when Regexp::Expression::EscapeSequence::Codepoint node.text when Regexp::Expression::PosixClass '[' + node.text + ']' when Regexp::Expression::UnicodeProperty::Script node.text when Regexp::Expression::Backreference::Number # why is there a space before after node.number? [:backref_num_start, node.number, :backref_num_stop] else out = [:missing, node.class] out << children if node.respond_to? :expressions if node.respond_to? :quantifier and node.quantifier # TODO add quantifier support pp node # out << process(node.quantifier) end out end if node.respond_to?(:quantifier) && node.quantifier&.token.to_s == "interval" && node.quantifier.max == node.quantifier.min out = [out] * node.quantifier.max elsif node.respond_to?(:quantifier) && node.quantifier qname = node.quantifier.token.to_s out = ["#{qname}_start".to_sym, [out], "#{qname}_stop".to_sym] end out end |
#process_root(node) ⇒ Object
83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 |
# File 'lib/interscript/utils/regexp_converter.rb', line 83 def process_root(node) node2 = node.dup root = {} if before = node.select { |x| x[0] == :lookbehind_start } # root[:before] = before[1] # node2.delete(before) if before.size == 1 root[:before] = before[0][1] node2.delete(before[0]) elsif before.size >1 # pp not_before a = [:alternation_start] a << before.map{|x| [:alternative_start, x[1], :alternative_stop] } a << [:alternation_stop] root[:before] = a # pp root[:not_before] before.each{|n| node2.delete(n)} end end if not_before = node.select { |x| x[0] == :negative_lookbehind_start } # root[:not_before] = not_before[1] # node2.delete(not_before) if not_before.size == 1 root[:not_before] = not_before[0][1] node2.delete(not_before[0]) elsif not_before.size >1 # pp not_before a = [:alternation_start] a << not_before.map{|x| [:alternative_start, x[1], :alternative_stop] } a << [:alternation_stop] root[:not_before] = a # pp root[:not_before] not_before.each{|n| node2.delete(n)} end end if after = node.select { |x| x[0] == :lookahead_start } # root[:after] = after[1] # node2.delete(after) if after.size == 1 root[:after] = after[0][1] node2.delete(after[0]) elsif after.size >1 # pp not_before a = [:alternation_start] a << after.map{|x| [:alternative_start, x[1], :alternative_stop] } a << [:alternation_stop] root[:after] = a # pp root[:not_before] after.each{|n| node2.delete(n)} end end if not_after = node.select { |x| x[0] == :negative_lookahead_start } # root[:not_after] = not_after[1] # node2.delete(not_after) if not_after.size == 1 root[:not_after] = not_after[0][1] node2.delete(not_after[0]) elsif not_after.size >1 # pp not_after a = [:alternation_start] a << not_after.map{|x| [:alternative_start, x[1], :alternative_stop] } a << [:alternation_stop] root[:not_after] = a # pp root[:not_after] not_after.each{|n| node2.delete(n)} end end root[:from] = node2 root end |
#stringify(node) ⇒ Object
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 |
# File 'lib/interscript/utils/regexp_converter.rb', line 163 def stringify(node) tokens = node.flatten subs = { characterset_start: 'any(', characterset_stop: ')', array_start: '[', array_stop: ']', capture_start: 'capture(', capture_stop: ')', zero_or_one_start: 'maybe(', zero_or_one_stop: ')', zero_or_more_start: 'maybe_some(', zero_or_more_stop: ')', one_or_more_start: 'some(', one_or_more_stop: ')', alternation_start: 'any([', alternation_stop: '])', alternative_start: '', alternative_stop: '', boundary: 'boundary', non_word_boundary: 'non_word_boundary', space: 'space', line_start: 'line_start', line_end: 'line_end', any_character: 'any_character', range_start: 'any(', range_mid: '..', range_stop: ')', backref_num_start: 'ref(', backref_num_stop: ')' } str = [] tokens.each_with_index do |token, idx| prev = tokens[idx - 1] if idx > 0 left_side = %i[characterset_stop capture_stop zero_or_one_stop zero_or_more_stop one_or_more_stop boundary non_word_boundary line_start any_character range_stop space backref_num_stop] right_side = %i[characterset_start capture_start zero_or_one_start zero_or_more_start one_or_more_start boundary non_word_boundary line_end any_character range_start space backref_num_start] #if prev==:range_stop and token==:range_start # str << ' :adding_ranges ' #end if (prev.instance_of?(String) && right_side.include?(token)) or (left_side.include?(prev) && token.instance_of?(String)) or (left_side.include?(prev) && right_side.include?(token)) str << ' + ' end str << ', ' if prev == :alternative_stop and token == :alternative_start # str << '[' if prev == :characterset_start and token == :range_start # str << ']' if prev == :range_stop and token ==:characterset_stop if subs.include? token str << subs[token] elsif token.instance_of?(String) if prev.instance_of?(String) str[-1] = "#{str[-1][0..-2]}#{token}\"" else str << "\"#{token}\"" end else str << " #{token.inspect} " end # puts [idx, token].inspect # puts str.inspect end str.join.gsub('\\\\u', '\\u') end |
#stringify_root(root, indent: 0) ⇒ Object
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 |
# File 'lib/interscript/utils/regexp_converter.rb', line 236 def stringify_root(root, indent: 0) warning = '' root[:from] = [""] if root[:from] == [] str = " "*indent+"sub #{stringify(root[:from])}, #{root[:to]}" [:before, :not_before, :after, :not_after].each do |look| # puts "#{look.inspect} = #{root[look]}" next unless root[look] str_look = stringify(root[look]) str_look = "\"\"" if root[look] == [] || root[look] == nil #if str_look.empty? #apparently it is empty sometimes. iso-mal-Mlym-Latn for example # warning << "warning: #{look} is empty string;" #else str << ", #{look}: #{str_look}" #end end str = " "*indent+"# #{str} # warning: :" if str =~ /[^\[]:[^ \]]/ str = " "*indent+"# #{str} # #{warning}" if !warning.empty? str = " "*indent+"# #{str} # warning: :missing unimplemented" if str.include?(':missing') str = " "*indent+"# #{str} # warning: :interval unimplemented" if str.include?(':interval') str = " "*indent+"# #{str} # warning: :adding_ranges unimplemented" if str.include?(':adding_ranges') if str.include?('zero_or_one') str = " "*indent+"# #{str} # warning: zero_or_one" puts "str.includes 'zero_or_one'" pp root end # str = " "*indent+"# #{str} # warning: one_or_more" if str.include?('one_or_more') str = " "*indent+"# #{str} # warning: :lookahead_start" if str.include?(':lookahead_start') # str += " # original: #{root[:from]}" str end |