Class: OedipusLex
- Inherits:
-
Object
- Object
- OedipusLex
- Defined in:
- lib/oedipus_lex.rb,
lib/oedipus_lex.rex.rb
Overview
The generated lexer OedipusLex
Defined Under Namespace
Classes: Group, LexerError, Rule, ScanError
Constant Summary collapse
- VERSION =
:nodoc:
"2.6.2"
- DEFAULTS =
:nodoc:
{ # :nodoc: :debug => false, :do_parse => false, :lineno => false, :column => false, :stub => false, }
- TEMPLATE =
:stopdoc:
<<-'REX'.gsub(/^ {6}/, '') # frozen_string_literal: true <%= encoding %> #-- # This file is automatically generated. Do not modify it. # Generated by: oedipus_lex version <%= VERSION %>. % if filename then # Source: <%= filename %> % end #++ % unless header.empty? then % header.each do |s| <%= s %> % end % end ## # The generated lexer <%= class_name %> class <%= class_name %> require 'strscan' % unless macros.empty? then # :stopdoc: % max = macros.map { |(k,_)| k.size }.max % macros.each do |(k,v)| <%= "%-#{max}s = %s" % [k, v] %> % end # :startdoc: % end # :stopdoc: class LexerError < StandardError ; end class ScanError < LexerError ; end # :startdoc: % if option[:lineno] then ## # The current line number. attr_accessor :lineno % end ## # The file name / path attr_accessor :filename ## # The StringScanner for this lexer. attr_accessor :ss ## # The current lexical state. attr_accessor :state alias :match :ss ## # The match groups for the current scan. def matches m = (1..9).map { |i| ss[i] } m.pop until m[-1] or m.empty? m end ## # Yields on the current action. def action yield end % if option[:column] then ## # The previous position. Only available if the :column option is on. attr_accessor :old_pos ## # The position of the start of the current line. Only available if the # :column option is on. attr_accessor :start_of_current_line_pos ## # The current column, starting at 0. Only available if the # :column option is on. def column old_pos - start_of_current_line_pos end % end % if option[:do_parse] then ## # Parse the file by getting all tokens and calling lex_+type+ on them. def do_parse while token = next_token do type, *vals = token send "lex_#{type}", *vals end end % end ## # The current scanner class. Must be overridden in subclasses. def scanner_class StringScanner end unless instance_methods(false).map(&:to_s).include?("scanner_class") ## # Parse the given string. def parse str self.ss = scanner_class.new str % if option[:lineno] then self.lineno = 1 % end % if option[:column] then self.start_of_current_line_pos = 0 % end self.state ||= nil do_parse end ## # Read in and parse the file at +path+. def parse_file path self.filename = path open path do |f| parse f.read end end ## # The current location in the parse. def location [ (filename || "<input>"), % if option[:lineno] then lineno, % elsif option[:column] then "?", % end % if option[:column] then column, % end ].compact.join(":") end ## # Lex the next token. def next_token % starts.each do |s| <%= s %> % end token = nil until ss.eos? or token do % if option[:lineno] then if ss.check(/\n/) then self.lineno += 1 % if option[:column] then # line starts 1 position after the newline self.start_of_current_line_pos = ss.pos + 1 % end end % end % if option[:column] then self.old_pos = ss.pos % end token = case state % all_states.each do |the_states| % exclusive = the_states.first != nil % the_states, predicates = the_states.partition { |s| s.nil? or s.start_with? ":" } when <%= the_states.map { |s| s || "nil" }.join ", " %> then case % the_states.each do |state| % lines = rules.map { |r| r.to_ruby state, predicates, exclusive }.compact <%= lines.join("\n").gsub(/^/, " " * 10) %> % end # the_states.each else text = ss.string[ss.pos .. -1] raise ScanError, "can not match (#{state.inspect}) at #{location}: '#{text}'" end % end # all_states else raise ScanError, "undefined state at #{location}: '#{state}'" end # token = case state next unless token # allow functions to trigger redo w/ nil end # while raise LexerError, "bad lexical result at #{location}: #{token.inspect}" unless token.nil? || (Array === token && token.size >= 2) # auto-switch state self.state = token.last if token && token.first == :state % if option[:debug] then p [state, token] % end token end # def next_token % inners.each do |s| <%= s %> % end end # class % unless ends.empty? then % ends.each do |s| <%= s %> % end % end % if option[:stub] then if __FILE__ == $0 ARGV.each do |path| rex = <%= class_name %>.new def rex.do_parse while token = self.next_token p token end end begin rex.parse_file path rescue lineno = rex.respond_to?(:lineno) ? rex.lineno : -1 $stderr.printf "%s:%d:%s\n", rex.filename, lineno, $!.message exit 1 end end end % end REX
- ST =
:stopdoc:
/(?:(:\S+|\w+\??))/
- RE =
/(\/(?:\\.|[^\/])*\/[ion]?)/
- ACT =
/(\{.*|:?\w+)/
Instance Attribute Summary collapse
-
#class_name ⇒ Object
The class name to generate.
-
#ends ⇒ Object
An array of lines to have after the lexer class.
-
#filename ⇒ Object
The file name / path.
-
#group ⇒ Object
An array of all the groups within the lexer rules.
-
#header ⇒ Object
An array of header lines to have before the lexer class.
-
#inners ⇒ Object
An array of lines to have inside (but at the bottom of) the lexer class.
-
#lineno ⇒ Object
The current line number.
-
#macros ⇒ Object
An array of name/regexp pairs to generate constants inside the lexer class.
-
#old_pos ⇒ Object
The previous position.
-
#option ⇒ Object
A hash of options for the code generator.
-
#rules ⇒ Object
The rules for the lexer.
-
#ss ⇒ Object
(also: #match)
The StringScanner for this lexer.
-
#start_of_current_line_pos ⇒ Object
The position of the start of the current line.
-
#starts ⇒ Object
An array of lines of code to generate into the top of the lexer (next_token) loop.
-
#state ⇒ Object
The current lexical state.
Class Method Summary collapse
-
.[](name, *rules) ⇒ Object
A convenience method to create a new lexer with a
name
and givenrules
.
Instance Method Summary collapse
-
#==(o) ⇒ Object
:nodoc:.
-
#action ⇒ Object
Yields on the current action.
-
#column ⇒ Object
The current column, starting at 0.
-
#do_parse ⇒ Object
Parse the file by getting all tokens and calling lex_
type
on them. -
#end_group ⇒ Object
End a group.
-
#generate ⇒ Object
Generate the lexer.
-
#initialize(opts = {}) ⇒ OedipusLex
constructor
:nodoc:.
-
#lex_class(prefix, name) ⇒ Object
Process a
class
lexeme. -
#lex_comment(line) ⇒ Object
Process a
comment
lexeme. -
#lex_end(line) ⇒ Object
Process an
end
lexeme. -
#lex_group(start_state, regexp, action = nil) ⇒ Object
Process a
group
lexeme. -
#lex_groupend(start_state, regexp, action = nil) ⇒ Object
Process the end of a
group
lexeme. -
#lex_grouphead(re) ⇒ Object
Process a group head lexeme.
-
#lex_inner(line) ⇒ Object
Process an
inner
lexeme. -
#lex_macro(name, value) ⇒ Object
Process a
macro
lexeme. -
#lex_option(option) ⇒ Object
Process an
option
lexeme. -
#lex_rule(start_state, regexp, action = nil) ⇒ Object
Process a
X
lexeme. -
#lex_start(line) ⇒ Object
Process a
start
lexeme. -
#lex_state(_new_state) ⇒ Object
Process a
state
lexeme. -
#location ⇒ Object
The current location in the parse.
-
#matches ⇒ Object
The match groups for the current scan.
-
#next_token ⇒ Object
Lex the next token.
-
#parse(str) ⇒ Object
Parse the given string.
-
#parse_file(path) ⇒ Object
Read in and parse the file at
path
. -
#pretty_print(pp) ⇒ Object
:nodoc:.
-
#scanner_class ⇒ Object
The current scanner class.
Constructor Details
#initialize(opts = {}) ⇒ OedipusLex
:nodoc:
234 235 236 237 238 239 240 241 242 243 244 245 |
# File 'lib/oedipus_lex.rb', line 234 def initialize opts = {} # :nodoc: self.option = DEFAULTS.merge opts self.class_name = nil self.header = [] self.ends = [] self.inners = [] self.macros = [] self.rules = [] self.starts = [] self.group = nil end |
Instance Attribute Details
#class_name ⇒ Object
The class name to generate.
38 39 40 |
# File 'lib/oedipus_lex.rb', line 38 def class_name @class_name end |
#ends ⇒ Object
An array of lines to have after the lexer class.
48 49 50 |
# File 'lib/oedipus_lex.rb', line 48 def ends @ends end |
#filename ⇒ Object
The file name / path
33 34 35 |
# File 'lib/oedipus_lex.rex.rb', line 33 def filename @filename end |
#group ⇒ Object
An array of all the groups within the lexer rules.
82 83 84 |
# File 'lib/oedipus_lex.rb', line 82 def group @group end |
#header ⇒ Object
An array of header lines to have before the lexer class.
43 44 45 |
# File 'lib/oedipus_lex.rb', line 43 def header @header end |
#inners ⇒ Object
An array of lines to have inside (but at the bottom of) the lexer class.
54 55 56 |
# File 'lib/oedipus_lex.rb', line 54 def inners @inners end |
#lineno ⇒ Object
The current line number.
29 30 31 |
# File 'lib/oedipus_lex.rex.rb', line 29 def lineno @lineno end |
#macros ⇒ Object
An array of name/regexp pairs to generate constants inside the lexer class.
60 61 62 |
# File 'lib/oedipus_lex.rb', line 60 def macros @macros end |
#old_pos ⇒ Object
The previous position. Only available if the :column option is on.
66 67 68 |
# File 'lib/oedipus_lex.rex.rb', line 66 def old_pos @old_pos end |
#option ⇒ Object
A hash of options for the code generator. See README.rdoc for supported options.
66 67 68 |
# File 'lib/oedipus_lex.rb', line 66 def option @option end |
#rules ⇒ Object
The rules for the lexer.
71 72 73 |
# File 'lib/oedipus_lex.rb', line 71 def rules @rules end |
#ss ⇒ Object Also known as: match
The StringScanner for this lexer.
38 39 40 |
# File 'lib/oedipus_lex.rex.rb', line 38 def ss @ss end |
#start_of_current_line_pos ⇒ Object
The position of the start of the current line. Only available if the :column option is on.
72 73 74 |
# File 'lib/oedipus_lex.rex.rb', line 72 def start_of_current_line_pos @start_of_current_line_pos end |
#starts ⇒ Object
An array of lines of code to generate into the top of the lexer (next_token) loop.
77 78 79 |
# File 'lib/oedipus_lex.rb', line 77 def starts @starts end |
#state ⇒ Object
The current lexical state.
43 44 45 |
# File 'lib/oedipus_lex.rex.rb', line 43 def state @state end |
Class Method Details
.[](name, *rules) ⇒ Object
A convenience method to create a new lexer with a name
and given rules
.
227 228 229 230 231 232 |
# File 'lib/oedipus_lex.rb', line 227 def self.[](name, *rules) r = new r.class_name = name r.rules.concat rules r end |
Instance Method Details
#==(o) ⇒ Object
:nodoc:
247 248 249 250 251 252 253 254 255 256 |
# File 'lib/oedipus_lex.rb', line 247 def == o # :nodoc: (o.class == self.class and o.class_name == self.class_name and o.header == self.header and o.ends == self.ends and o.inners == self.inners and o.macros == self.macros and o.rules == self.rules and o.starts == self.starts) end |
#action ⇒ Object
Yields on the current action.
59 60 61 |
# File 'lib/oedipus_lex.rex.rb', line 59 def action yield end |
#column ⇒ Object
The current column, starting at 0. Only available if the :column option is on.
77 78 79 |
# File 'lib/oedipus_lex.rex.rb', line 77 def column old_pos - start_of_current_line_pos end |
#do_parse ⇒ Object
Parse the file by getting all tokens and calling lex_type
on them.
84 85 86 87 88 89 90 |
# File 'lib/oedipus_lex.rex.rb', line 84 def do_parse while token = next_token do type, *vals = token send "lex_#{type}", *vals end end |
#end_group ⇒ Object
End a group.
345 346 347 348 349 |
# File 'lib/oedipus_lex.rb', line 345 def end_group rules << group self.group = nil self.state = :rule end |
#generate ⇒ Object
Generate the lexer.
370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 |
# File 'lib/oedipus_lex.rb', line 370 def generate filter = lambda { |r| Rule === r && r.start_state || nil } _mystates = rules.map(&filter).flatten.compact.uniq exclusives, inclusives = _mystates.partition { |s| s =~ /^:[A-Z]/ } # NOTE: doubling up assignment to remove unused var warnings in # ERB binding. all_states = all_states = [[nil, *inclusives], # nil+incls # eg [[nil, :a], *exclusives.map { |s| [s] }] # [excls] # [:A], [:B]] encoding = header.shift if /encoding:/.match?(header.first) encoding ||= "# encoding: UTF-8" erb = if RUBY_VERSION >= "2.6.0" then ERB.new(TEMPLATE, trim_mode:"%") else ERB.new(TEMPLATE, nil, "%") end erb.result binding end |
#lex_class(prefix, name) ⇒ Object
Process a class
lexeme.
270 271 272 273 |
# File 'lib/oedipus_lex.rb', line 270 def lex_class prefix, name header.concat prefix.split(/\n/) self.class_name = name end |
#lex_comment(line) ⇒ Object
Process a comment
lexeme.
278 279 280 |
# File 'lib/oedipus_lex.rb', line 278 def lex_comment line # do nothing end |
#lex_end(line) ⇒ Object
Process an end
lexeme.
285 286 287 |
# File 'lib/oedipus_lex.rb', line 285 def lex_end line ends << line end |
#lex_group(start_state, regexp, action = nil) ⇒ Object
Process a group
lexeme.
336 337 338 339 340 |
# File 'lib/oedipus_lex.rb', line 336 def lex_group start_state, regexp, action = nil rule = Rule.new(start_state, regexp, action) rule.group = group self.group << rule end |
#lex_groupend(start_state, regexp, action = nil) ⇒ Object
Process the end of a group
lexeme.
354 355 356 357 |
# File 'lib/oedipus_lex.rb', line 354 def lex_groupend start_state, regexp, action = nil end_group lex_rule start_state, regexp, action end |
#lex_grouphead(re) ⇒ Object
Process a group head lexeme.
327 328 329 330 331 |
# File 'lib/oedipus_lex.rb', line 327 def lex_grouphead re end_group if group self.state = :group self.group = Group.new re end |
#lex_inner(line) ⇒ Object
Process an inner
lexeme.
292 293 294 |
# File 'lib/oedipus_lex.rb', line 292 def lex_inner line inners << line end |
#lex_macro(name, value) ⇒ Object
Process a macro
lexeme.
306 307 308 |
# File 'lib/oedipus_lex.rb', line 306 def lex_macro name, value macros << [name, value] end |
#lex_option(option) ⇒ Object
Process an option
lexeme.
313 314 315 |
# File 'lib/oedipus_lex.rb', line 313 def lex_option option self.option[option.to_sym] = true end |
#lex_rule(start_state, regexp, action = nil) ⇒ Object
Process a X
lexeme.
320 321 322 |
# File 'lib/oedipus_lex.rb', line 320 def lex_rule start_state, regexp, action = nil rules << Rule.new(start_state, regexp, action) end |
#lex_start(line) ⇒ Object
Process a start
lexeme.
299 300 301 |
# File 'lib/oedipus_lex.rb', line 299 def lex_start line starts << line.strip end |
#lex_state(_new_state) ⇒ Object
Process a state
lexeme.
362 363 364 365 |
# File 'lib/oedipus_lex.rb', line 362 def lex_state _new_state end_group if group # do nothing -- lexer switches state for us end |
#location ⇒ Object
The current location in the parse.
124 125 126 127 128 129 130 |
# File 'lib/oedipus_lex.rex.rb', line 124 def location [ (filename || "<input>"), lineno, column, ].compact.join(":") end |
#matches ⇒ Object
The match groups for the current scan.
50 51 52 53 54 |
# File 'lib/oedipus_lex.rex.rb', line 50 def matches m = (1..9).map { |i| ss[i] } m.pop until m[-1] or m.empty? m end |
#next_token ⇒ Object
Lex the next token.
135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 |
# File 'lib/oedipus_lex.rex.rb', line 135 def next_token token = nil until ss.eos? or token do if ss.check(/\n/) then self.lineno += 1 # line starts 1 position after the newline self.start_of_current_line_pos = ss.pos + 1 end self.old_pos = ss.pos token = case state when nil, :option, :inner, :start, :macro, :rule, :group then case when ss.skip(/options?.*/) then [:state, :option] when ss.skip(/inner.*/) then [:state, :inner] when ss.skip(/macros?.*/) then [:state, :macro] when ss.skip(/rules?.*/) then [:state, :rule] when ss.skip(/start.*/) then [:state, :start] when ss.skip(/end/) then [:state, :END] when ss.skip(/\A((?:.|\n)*)class ([\w:]+.*)/) then action { [:class, *matches] } when ss.skip(/\n+/) then # do nothing when text = ss.scan(/\s*(\#.*)/) then action { [:comment, text] } when (state == :option) && (ss.skip(/\s+/)) then # do nothing when (state == :option) && (text = ss.scan(/stub/i)) then action { [:option, text] } when (state == :option) && (text = ss.scan(/debug/i)) then action { [:option, text] } when (state == :option) && (text = ss.scan(/do_parse/i)) then action { [:option, text] } when (state == :option) && (text = ss.scan(/lineno/i)) then action { [:option, text] } when (state == :option) && (text = ss.scan(/column/i)) then action { [:option, text] } when (state == :inner) && (text = ss.scan(/.*/)) then action { [:inner, text] } when (state == :start) && (text = ss.scan(/.*/)) then action { [:start, text] } when (state == :macro) && (ss.skip(/\s+(\w+)\s+#{RE}/o)) then action { [:macro, *matches] } when (state == :rule) && (ss.skip(/\s*#{ST}?[\ \t]*#{RE}[\ \t]*#{ACT}?/o)) then action { [:rule, *matches] } when (state == :rule) && (ss.skip(/\s*:[\ \t]*#{RE}/o)) then action { [:grouphead, *matches] } when (state == :group) && (ss.skip(/\s*:[\ \t]*#{RE}/o)) then action { [:grouphead, *matches] } when (state == :group) && (ss.skip(/\s*\|\s*#{ST}?[\ \t]*#{RE}[\ \t]*#{ACT}?/o)) then action { [:group, *matches] } when (state == :group) && (ss.skip(/\s*#{ST}?[\ \t]*#{RE}[\ \t]*#{ACT}?/o)) then action { [:groupend, *matches] } else text = ss.string[ss.pos .. -1] raise ScanError, "can not match (#{state.inspect}) at #{location}: '#{text}'" end when :END then case when ss.skip(/\n+/) then # do nothing when text = ss.scan(/.*/) then action { [:end, text] } else text = ss.string[ss.pos .. -1] raise ScanError, "can not match (#{state.inspect}) at #{location}: '#{text}'" end else raise ScanError, "undefined state at #{location}: '#{state}'" end # token = case state next unless token # allow functions to trigger redo w/ nil end # while raise LexerError, "bad lexical result at #{location}: #{token.inspect}" unless token.nil? || (Array === token && token.size >= 2) # auto-switch state self.state = token.last if token && token.first == :state token end |
#parse(str) ⇒ Object
Parse the given string.
102 103 104 105 106 107 108 109 |
# File 'lib/oedipus_lex.rex.rb', line 102 def parse str self.ss = scanner_class.new str self.lineno = 1 self.start_of_current_line_pos = 0 self.state ||= nil do_parse end |
#parse_file(path) ⇒ Object
Read in and parse the file at path
.
114 115 116 117 118 119 |
# File 'lib/oedipus_lex.rex.rb', line 114 def parse_file path self.filename = path open path do |f| parse f.read end end |
#pretty_print(pp) ⇒ Object
:nodoc:
258 259 260 261 262 263 264 265 |
# File 'lib/oedipus_lex.rb', line 258 def pretty_print pp # :nodoc: commas = lambda { pp.comma_breakable } pp.text "Lexer" pp.group 2, "[", "]" do pp.seplist([class_name] + rules, commas, :each) { |v| pp.pp v } end end |
#scanner_class ⇒ Object
The current scanner class. Must be overridden in subclasses.
95 96 97 |
# File 'lib/oedipus_lex.rex.rb', line 95 def scanner_class StringScanner end |