Class: OedipusLex

Inherits:
Object
  • Object
show all
Defined in:
lib/oedipus_lex.rb,
lib/oedipus_lex.rex.rb

Overview

The generated lexer OedipusLex

Defined Under Namespace

Classes: Group, LexerError, Rule, ScanError

Constant Summary collapse

VERSION =

:nodoc:

"2.6.2"
DEFAULTS =

:nodoc:

{ # :nodoc:
  :debug    => false,
  :do_parse => false,
  :lineno   => false,
  :column   => false,
  :stub     => false,
}
TEMPLATE =

:stopdoc:

<<-'REX'.gsub(/^ {6}/, '')
    # frozen_string_literal: true
    <%= encoding %>
    #--
    # This file is automatically generated. Do not modify it.
    # Generated by: oedipus_lex version <%= VERSION %>.
% if filename then
    # Source: <%= filename %>
% end
    #++

% unless header.empty? then
%   header.each do |s|
    <%= s %>
%   end

% end

    ##
    # The generated lexer <%= class_name %>

    class <%= class_name %>
      require 'strscan'

% unless macros.empty? then
      # :stopdoc:
%   max = macros.map { |(k,_)| k.size }.max
%   macros.each do |(k,v)|
      <%= "%-#{max}s = %s" % [k, v] %>
%   end
      # :startdoc:
% end
      # :stopdoc:
      class LexerError < StandardError ; end
      class ScanError < LexerError ; end
      # :startdoc:

% if option[:lineno] then
      ##
      # The current line number.

      attr_accessor :lineno
% end
      ##
      # The file name / path

      attr_accessor :filename

      ##
      # The StringScanner for this lexer.

      attr_accessor :ss

      ##
      # The current lexical state.

      attr_accessor :state

      alias :match :ss

      ##
      # The match groups for the current scan.

      def matches
        m = (1..9).map { |i| ss[i] }
        m.pop until m[-1] or m.empty?
        m
      end

      ##
      # Yields on the current action.

      def action
        yield
      end

% if option[:column] then
      ##
      # The previous position. Only available if the :column option is on.

      attr_accessor :old_pos

      ##
      # The position of the start of the current line. Only available if the
      # :column option is on.

      attr_accessor :start_of_current_line_pos

      ##
      # The current column, starting at 0. Only available if the
      # :column option is on.
      def column
        old_pos - start_of_current_line_pos
      end

% end
% if option[:do_parse] then
      ##
      # Parse the file by getting all tokens and calling lex_+type+ on them.

      def do_parse
        while token = next_token do
          type, *vals = token

          send "lex_#{type}", *vals
        end
      end

% end
      ##
      # The current scanner class. Must be overridden in subclasses.

      def scanner_class
        StringScanner
      end unless instance_methods(false).map(&:to_s).include?("scanner_class")

      ##
      # Parse the given string.

      def parse str
        self.ss     = scanner_class.new str
% if option[:lineno] then
        self.lineno = 1
% end
% if option[:column] then
        self.start_of_current_line_pos = 0
% end
        self.state  ||= nil

        do_parse
      end

      ##
      # Read in and parse the file at +path+.

      def parse_file path
        self.filename = path
        open path do |f|
          parse f.read
        end
      end

      ##
      # The current location in the parse.

      def location
        [
          (filename || "<input>"),
% if option[:lineno] then
          lineno,
% elsif option[:column] then
          "?",
% end
% if option[:column] then
          column,
% end
        ].compact.join(":")
      end

      ##
      # Lex the next token.

      def next_token
% starts.each do |s|
        <%= s %>
% end

        token = nil

        until ss.eos? or token do
% if option[:lineno] then
          if ss.check(/\n/) then
            self.lineno += 1
% if option[:column] then
            # line starts 1 position after the newline
            self.start_of_current_line_pos = ss.pos + 1
% end
          end
% end
% if option[:column] then
          self.old_pos = ss.pos
% end
          token =
            case state
% all_states.each do |the_states|
%   exclusive = the_states.first != nil
%   the_states, predicates = the_states.partition { |s| s.nil? or s.start_with? ":" }
            when <%= the_states.map { |s| s || "nil" }.join ", " %> then
              case
%   the_states.each do |state|
%     lines = rules.map { |r| r.to_ruby state, predicates, exclusive }.compact
<%= lines.join("\n").gsub(/^/, " " * 10) %>
%   end # the_states.each
              else
                text = ss.string[ss.pos .. -1]
                raise ScanError, "can not match (#{state.inspect}) at #{location}: '#{text}'"
              end
% end # all_states
            else
              raise ScanError, "undefined state at #{location}: '#{state}'"
            end # token = case state

          next unless token # allow functions to trigger redo w/ nil
        end # while

        raise LexerError, "bad lexical result at #{location}: #{token.inspect}" unless
          token.nil? || (Array === token && token.size >= 2)

        # auto-switch state
        self.state = token.last if token && token.first == :state

% if option[:debug] then
        p [state, token]
% end
        token
      end # def next_token
% inners.each do |s|
      <%= s %>
% end
    end # class
% unless ends.empty? then

%   ends.each do |s|
      <%= s %>
%   end
% end
% if option[:stub] then

    if __FILE__ == $0
      ARGV.each do |path|
        rex = <%= class_name %>.new

        def rex.do_parse
          while token = self.next_token
            p token
          end
        end

        begin
          rex.parse_file path
        rescue
          lineno = rex.respond_to?(:lineno) ? rex.lineno : -1
          $stderr.printf "%s:%d:%s\n", rex.filename, lineno, $!.message
          exit 1
        end
      end
    end
% end
REX
ST =

:stopdoc:

/(?:(:\S+|\w+\??))/
RE =
/(\/(?:\\.|[^\/])*\/[ion]?)/
ACT =
/(\{.*|:?\w+)/

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(opts = {}) ⇒ OedipusLex

:nodoc:



234
235
236
237
238
239
240
241
242
243
244
245
# File 'lib/oedipus_lex.rb', line 234

def initialize opts = {} # :nodoc:
  self.option     = DEFAULTS.merge opts
  self.class_name = nil

  self.header  = []
  self.ends    = []
  self.inners  = []
  self.macros  = []
  self.rules   = []
  self.starts  = []
  self.group   = nil
end

Instance Attribute Details

#class_nameObject

The class name to generate.



38
39
40
# File 'lib/oedipus_lex.rb', line 38

def class_name
  @class_name
end

#endsObject

An array of lines to have after the lexer class.



48
49
50
# File 'lib/oedipus_lex.rb', line 48

def ends
  @ends
end

#filenameObject

The file name / path



33
34
35
# File 'lib/oedipus_lex.rex.rb', line 33

def filename
  @filename
end

#groupObject

An array of all the groups within the lexer rules.



82
83
84
# File 'lib/oedipus_lex.rb', line 82

def group
  @group
end

#headerObject

An array of header lines to have before the lexer class.



43
44
45
# File 'lib/oedipus_lex.rb', line 43

def header
  @header
end

#innersObject

An array of lines to have inside (but at the bottom of) the lexer class.



54
55
56
# File 'lib/oedipus_lex.rb', line 54

def inners
  @inners
end

#linenoObject

The current line number.



29
30
31
# File 'lib/oedipus_lex.rex.rb', line 29

def lineno
  @lineno
end

#macrosObject

An array of name/regexp pairs to generate constants inside the lexer class.



60
61
62
# File 'lib/oedipus_lex.rb', line 60

def macros
  @macros
end

#old_posObject

The previous position. Only available if the :column option is on.



66
67
68
# File 'lib/oedipus_lex.rex.rb', line 66

def old_pos
  @old_pos
end

#optionObject

A hash of options for the code generator. See README.rdoc for supported options.



66
67
68
# File 'lib/oedipus_lex.rb', line 66

def option
  @option
end

#rulesObject

The rules for the lexer.



71
72
73
# File 'lib/oedipus_lex.rb', line 71

def rules
  @rules
end

#ssObject Also known as: match

The StringScanner for this lexer.



38
39
40
# File 'lib/oedipus_lex.rex.rb', line 38

def ss
  @ss
end

#start_of_current_line_posObject

The position of the start of the current line. Only available if the :column option is on.



72
73
74
# File 'lib/oedipus_lex.rex.rb', line 72

def start_of_current_line_pos
  @start_of_current_line_pos
end

#startsObject

An array of lines of code to generate into the top of the lexer (next_token) loop.



77
78
79
# File 'lib/oedipus_lex.rb', line 77

def starts
  @starts
end

#stateObject

The current lexical state.



43
44
45
# File 'lib/oedipus_lex.rex.rb', line 43

def state
  @state
end

Class Method Details

.[](name, *rules) ⇒ Object

A convenience method to create a new lexer with a name and given rules.



227
228
229
230
231
232
# File 'lib/oedipus_lex.rb', line 227

def self.[](name, *rules)
  r = new
  r.class_name = name
  r.rules.concat rules
  r
end

Instance Method Details

#==(o) ⇒ Object

:nodoc:



247
248
249
250
251
252
253
254
255
256
# File 'lib/oedipus_lex.rb', line 247

def == o # :nodoc:
  (o.class      == self.class      and
   o.class_name == self.class_name and
   o.header     == self.header     and
   o.ends       == self.ends       and
   o.inners     == self.inners     and
   o.macros     == self.macros     and
   o.rules      == self.rules      and
   o.starts     == self.starts)
end

#actionObject

Yields on the current action.



59
60
61
# File 'lib/oedipus_lex.rex.rb', line 59

def action
  yield
end

#columnObject

The current column, starting at 0. Only available if the :column option is on.



77
78
79
# File 'lib/oedipus_lex.rex.rb', line 77

def column
  old_pos - start_of_current_line_pos
end

#do_parseObject

Parse the file by getting all tokens and calling lex_type on them.



84
85
86
87
88
89
90
# File 'lib/oedipus_lex.rex.rb', line 84

def do_parse
  while token = next_token do
    type, *vals = token

    send "lex_#{type}", *vals
  end
end

#end_groupObject

End a group.



345
346
347
348
349
# File 'lib/oedipus_lex.rb', line 345

def end_group
  rules << group
  self.group = nil
  self.state = :rule
end

#generateObject

Generate the lexer.



370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
# File 'lib/oedipus_lex.rb', line 370

def generate
  filter = lambda { |r| Rule === r && r.start_state || nil }
  _mystates = rules.map(&filter).flatten.compact.uniq
  exclusives, inclusives = _mystates.partition { |s| s =~ /^:[A-Z]/ }

  # NOTE: doubling up assignment to remove unused var warnings in
  # ERB binding.

  all_states =
    all_states = [[nil, *inclusives],          # nil+incls # eg [[nil, :a],
                  *exclusives.map { |s| [s] }] # [excls]   #     [:A], [:B]]

  encoding = header.shift if /encoding:/.match?(header.first)
  encoding ||= "# encoding: UTF-8"

  erb = if RUBY_VERSION >= "2.6.0" then
          ERB.new(TEMPLATE, trim_mode:"%")
        else
          ERB.new(TEMPLATE, nil, "%")
        end

  erb.result binding
end

#lex_class(prefix, name) ⇒ Object

Process a class lexeme.



270
271
272
273
# File 'lib/oedipus_lex.rb', line 270

def lex_class prefix, name
  header.concat prefix.split(/\n/)
  self.class_name = name
end

#lex_comment(line) ⇒ Object

Process a comment lexeme.



278
279
280
# File 'lib/oedipus_lex.rb', line 278

def lex_comment line
  # do nothing
end

#lex_end(line) ⇒ Object

Process an end lexeme.



285
286
287
# File 'lib/oedipus_lex.rb', line 285

def lex_end line
  ends << line
end

#lex_group(start_state, regexp, action = nil) ⇒ Object

Process a group lexeme.



336
337
338
339
340
# File 'lib/oedipus_lex.rb', line 336

def lex_group start_state, regexp, action = nil
  rule = Rule.new(start_state, regexp, action)
  rule.group = group
  self.group << rule
end

#lex_groupend(start_state, regexp, action = nil) ⇒ Object

Process the end of a group lexeme.



354
355
356
357
# File 'lib/oedipus_lex.rb', line 354

def lex_groupend start_state, regexp, action = nil
  end_group
  lex_rule start_state, regexp, action
end

#lex_grouphead(re) ⇒ Object

Process a group head lexeme.



327
328
329
330
331
# File 'lib/oedipus_lex.rb', line 327

def lex_grouphead re
  end_group if group
  self.state = :group
  self.group = Group.new re
end

#lex_inner(line) ⇒ Object

Process an inner lexeme.



292
293
294
# File 'lib/oedipus_lex.rb', line 292

def lex_inner line
  inners << line
end

#lex_macro(name, value) ⇒ Object

Process a macro lexeme.



306
307
308
# File 'lib/oedipus_lex.rb', line 306

def lex_macro name, value
  macros << [name, value]
end

#lex_option(option) ⇒ Object

Process an option lexeme.



313
314
315
# File 'lib/oedipus_lex.rb', line 313

def lex_option option
  self.option[option.to_sym] = true
end

#lex_rule(start_state, regexp, action = nil) ⇒ Object

Process a X lexeme.



320
321
322
# File 'lib/oedipus_lex.rb', line 320

def lex_rule start_state, regexp, action = nil
  rules << Rule.new(start_state, regexp, action)
end

#lex_start(line) ⇒ Object

Process a start lexeme.



299
300
301
# File 'lib/oedipus_lex.rb', line 299

def lex_start line
  starts << line.strip
end

#lex_state(_new_state) ⇒ Object

Process a state lexeme.



362
363
364
365
# File 'lib/oedipus_lex.rb', line 362

def lex_state _new_state
  end_group if group
  # do nothing -- lexer switches state for us
end

#locationObject

The current location in the parse.



124
125
126
127
128
129
130
# File 'lib/oedipus_lex.rex.rb', line 124

def location
  [
    (filename || "<input>"),
    lineno,
    column,
  ].compact.join(":")
end

#matchesObject

The match groups for the current scan.



50
51
52
53
54
# File 'lib/oedipus_lex.rex.rb', line 50

def matches
  m = (1..9).map { |i| ss[i] }
  m.pop until m[-1] or m.empty?
  m
end

#next_tokenObject

Lex the next token.



135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
# File 'lib/oedipus_lex.rex.rb', line 135

def next_token

  token = nil

  until ss.eos? or token do
    if ss.check(/\n/) then
      self.lineno += 1
      # line starts 1 position after the newline
      self.start_of_current_line_pos = ss.pos + 1
    end
    self.old_pos = ss.pos
    token =
      case state
      when nil, :option, :inner, :start, :macro, :rule, :group then
        case
        when ss.skip(/options?.*/) then
          [:state, :option]
        when ss.skip(/inner.*/) then
          [:state, :inner]
        when ss.skip(/macros?.*/) then
          [:state, :macro]
        when ss.skip(/rules?.*/) then
          [:state, :rule]
        when ss.skip(/start.*/) then
          [:state, :start]
        when ss.skip(/end/) then
          [:state, :END]
        when ss.skip(/\A((?:.|\n)*)class ([\w:]+.*)/) then
          action { [:class, *matches] }
        when ss.skip(/\n+/) then
          # do nothing
        when text = ss.scan(/\s*(\#.*)/) then
          action { [:comment, text] }
        when (state == :option) && (ss.skip(/\s+/)) then
          # do nothing
        when (state == :option) && (text = ss.scan(/stub/i)) then
          action { [:option, text] }
        when (state == :option) && (text = ss.scan(/debug/i)) then
          action { [:option, text] }
        when (state == :option) && (text = ss.scan(/do_parse/i)) then
          action { [:option, text] }
        when (state == :option) && (text = ss.scan(/lineno/i)) then
          action { [:option, text] }
        when (state == :option) && (text = ss.scan(/column/i)) then
          action { [:option, text] }
        when (state == :inner) && (text = ss.scan(/.*/)) then
          action { [:inner, text] }
        when (state == :start) && (text = ss.scan(/.*/)) then
          action { [:start, text] }
        when (state == :macro) && (ss.skip(/\s+(\w+)\s+#{RE}/o)) then
          action { [:macro, *matches] }
        when (state == :rule) && (ss.skip(/\s*#{ST}?[\ \t]*#{RE}[\ \t]*#{ACT}?/o)) then
          action { [:rule, *matches] }
        when (state == :rule) && (ss.skip(/\s*:[\ \t]*#{RE}/o)) then
          action { [:grouphead, *matches] }
        when (state == :group) && (ss.skip(/\s*:[\ \t]*#{RE}/o)) then
          action { [:grouphead, *matches] }
        when (state == :group) && (ss.skip(/\s*\|\s*#{ST}?[\ \t]*#{RE}[\ \t]*#{ACT}?/o)) then
          action { [:group, *matches] }
        when (state == :group) && (ss.skip(/\s*#{ST}?[\ \t]*#{RE}[\ \t]*#{ACT}?/o)) then
          action { [:groupend, *matches] }
        else
          text = ss.string[ss.pos .. -1]
          raise ScanError, "can not match (#{state.inspect}) at #{location}: '#{text}'"
        end
      when :END then
        case
        when ss.skip(/\n+/) then
          # do nothing
        when text = ss.scan(/.*/) then
          action { [:end, text] }
        else
          text = ss.string[ss.pos .. -1]
          raise ScanError, "can not match (#{state.inspect}) at #{location}: '#{text}'"
        end
      else
        raise ScanError, "undefined state at #{location}: '#{state}'"
      end # token = case state

    next unless token # allow functions to trigger redo w/ nil
  end # while

  raise LexerError, "bad lexical result at #{location}: #{token.inspect}" unless
    token.nil? || (Array === token && token.size >= 2)

  # auto-switch state
  self.state = token.last if token && token.first == :state

  token
end

#parse(str) ⇒ Object

Parse the given string.



102
103
104
105
106
107
108
109
# File 'lib/oedipus_lex.rex.rb', line 102

def parse str
  self.ss     = scanner_class.new str
  self.lineno = 1
  self.start_of_current_line_pos = 0
  self.state  ||= nil

  do_parse
end

#parse_file(path) ⇒ Object

Read in and parse the file at path.



114
115
116
117
118
119
# File 'lib/oedipus_lex.rex.rb', line 114

def parse_file path
  self.filename = path
  open path do |f|
    parse f.read
  end
end

#pretty_print(pp) ⇒ Object

:nodoc:



258
259
260
261
262
263
264
265
# File 'lib/oedipus_lex.rb', line 258

def pretty_print pp # :nodoc:
  commas = lambda { pp.comma_breakable }

  pp.text "Lexer"
  pp.group 2, "[", "]" do
    pp.seplist([class_name] + rules, commas, :each) { |v| pp.pp v }
  end
end

#scanner_classObject

The current scanner class. Must be overridden in subclasses.



95
96
97
# File 'lib/oedipus_lex.rex.rb', line 95

def scanner_class
  StringScanner
end