Class: RubyLexer

Inherits:
Object
  • Object
show all
Defined in:
lib/ruby_lexer.rex.rb,
lib/ruby_lexer.rb,
lib/ruby_lexer.rb

Overview

encoding: UTF-8 TODO: this needs to be put on the first line

new_ruby_parser.rex lexical scanner definition for ruby

Defined Under Namespace

Classes: ScanError

Constant Summary collapse

RUBY19 =

:stopdoc:

"".respond_to? :encoding
IDENT_CHAR =
if RUBY19 then
  /[\w\u0080-\u{10ffff}]/u
else
  /[\w\x80-\xFF]/n
end
EOF =
:eof_haha!
STR_FUNC_BORING =

ruby constants for strings (should this be moved somewhere else?)

0x00
STR_FUNC_ESCAPE =

TODO: remove and replace with REGEXP

0x01
STR_FUNC_EXPAND =
0x02
STR_FUNC_REGEXP =
0x04
STR_FUNC_QWORDS =
0x08
STR_FUNC_SYMBOL =
0x10
STR_FUNC_INDENT =

<<-HEREDOC

0x20
STR_SQUOTE =
STR_FUNC_BORING
STR_DQUOTE =
STR_FUNC_BORING | STR_FUNC_EXPAND
STR_XQUOTE =
STR_FUNC_BORING | STR_FUNC_EXPAND
STR_REGEXP =
STR_FUNC_REGEXP | STR_FUNC_ESCAPE | STR_FUNC_EXPAND
STR_SSYM =
STR_FUNC_SYMBOL
STR_DSYM =
STR_FUNC_SYMBOL | STR_FUNC_EXPAND
ESCAPES =
{
  "a"    => "\007",
  "b"    => "\010",
  "e"    => "\033",
  "f"    => "\f",
  "n"    => "\n",
  "r"    => "\r",
  "s"    => " ",
  "t"    => "\t",
  "v"    => "\13",
  "\\"   => '\\',
  "\n"   => "",
  "C-\?" => 127.chr,
  "c\?"  => 127.chr,
}
TOKENS =
{
  "!"   => :tBANG,
  "!="  => :tNEQ,
  # "!@"  => :tUBANG,
  "!~"  => :tNMATCH,
  ","   => :tCOMMA,
  ".."  => :tDOT2,
  "..." => :tDOT3,
  "="   => :tEQL,
  "=="  => :tEQ,
  "===" => :tEQQ,
  "=>"  => :tASSOC,
  "=~"  => :tMATCH,
  "->"  => :tLAMBDA,
}
IDENT =
/^#{IDENT_CHAR}+/o
ESC =
/\\((?>[0-7]{1,3}|x[0-9a-fA-F]{1,2}|M-[^\\]|(C-|c)[^\\]|u[0-9a-fA-F]+|u\{[0-9a-fA-F]+\}|[^0-7xMCc]))/
SIMPLE_STRING =
/(#{ESC}|\#(#{ESC}|[^\{\#\@\$\"\\])|[^\"\\\#])*/o
SSTRING =
/(\\.|[^\'])*/
INT_DEC =
/[+]?(?:(?:[1-9][\d_]*|0)(?!\.\d)\b|0d[0-9_]+)/i
INT_HEX =
/[+]?0x[a-f0-9_]+/i
INT_BIN =
/[+]?0b[01_]+/i
INT_OCT =
/[+]?0o?[0-7_]+|0o/i
FLOAT =
/[+]?\d[\d_]*\.[\d_]+(e[+-]?[\d_]+)?\b|[+]?[\d_]+e[+-]?[\d_]+\b/i
INT_DEC2 =
/[+]?\d[0-9_]*(?![e])/i
NUM_BAD =
/[+]?0[xbd]\b/i
INT_OCT_BAD =
/[+]?0o?[0-7_]*[89]/i
FLOAT_BAD =
/[+]?\d[\d_]*_(e|\.)/i
@@regexp_cache =
Hash.new { |h,k| h[k] = Regexp.new(Regexp.escape(k)) }

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(v = 18) ⇒ RubyLexer

Returns a new instance of RubyLexer.



102
103
104
105
106
# File 'lib/ruby_lexer.rb', line 102

def initialize v = 18
  self.version = v

  reset
end

Instance Attribute Details

#brace_nestObject

:startdoc:



70
71
72
# File 'lib/ruby_lexer.rb', line 70

def brace_nest
  @brace_nest
end

#cmdargObject

Returns the value of attribute cmdarg.



71
72
73
# File 'lib/ruby_lexer.rb', line 71

def cmdarg
  @cmdarg
end

#command_startObject

Returns the value of attribute command_start.



72
73
74
# File 'lib/ruby_lexer.rb', line 72

def command_start
  @command_start
end

#command_stateObject

Returns the value of attribute command_state.



73
74
75
# File 'lib/ruby_lexer.rb', line 73

def command_state
  @command_state
end

#commentsObject

TODO: remove this… maybe comment_string + attr_accessor



125
126
127
128
129
# File 'lib/ruby_lexer.rb', line 125

def comments # TODO: remove this... maybe comment_string + attr_accessor
  c = @comments.join
  @comments.clear
  c
end

#condObject

Returns the value of attribute cond.



75
76
77
# File 'lib/ruby_lexer.rb', line 75

def cond
  @cond
end

#filenameObject

Returns the value of attribute filename.



33
34
35
# File 'lib/ruby_lexer.rex.rb', line 33

def filename
  @filename
end

#last_stateObject

Returns the value of attribute last_state.



74
75
76
# File 'lib/ruby_lexer.rb', line 74

def last_state
  @last_state
end

#lex_stateObject

Additional context surrounding tokens that both the lexer and grammar use.



81
82
83
# File 'lib/ruby_lexer.rb', line 81

def lex_state
  @lex_state
end

#lex_strtermObject

Returns the value of attribute lex_strterm.



83
84
85
# File 'lib/ruby_lexer.rb', line 83

def lex_strterm
  @lex_strterm
end

#linenoObject

Returns the value of attribute lineno.



32
33
34
# File 'lib/ruby_lexer.rex.rb', line 32

def lineno
  @lineno
end

#lpar_begObject

Returns the value of attribute lpar_beg.



84
85
86
# File 'lib/ruby_lexer.rb', line 84

def lpar_beg
  @lpar_beg
end

#paren_nestObject

Returns the value of attribute paren_nest.



85
86
87
# File 'lib/ruby_lexer.rb', line 85

def paren_nest
  @paren_nest
end

#parserObject

HACK for very end of lexer… sigh



86
87
88
# File 'lib/ruby_lexer.rb', line 86

def parser
  @parser
end

#space_seenObject

Returns the value of attribute space_seen.



87
88
89
# File 'lib/ruby_lexer.rb', line 87

def space_seen
  @space_seen
end

#ssObject Also known as: match

Returns the value of attribute ss.



34
35
36
# File 'lib/ruby_lexer.rex.rb', line 34

def ss
  @ss
end

#stateObject

Returns the value of attribute state.



35
36
37
# File 'lib/ruby_lexer.rex.rb', line 35

def state
  @state
end

#string_bufferObject

Returns the value of attribute string_buffer.



88
89
90
# File 'lib/ruby_lexer.rb', line 88

def string_buffer
  @string_buffer
end

#string_nestObject

Returns the value of attribute string_nest.



89
90
91
# File 'lib/ruby_lexer.rb', line 89

def string_nest
  @string_nest
end

#tokenObject

Last token read via next_token.



92
93
94
# File 'lib/ruby_lexer.rb', line 92

def token
  @token
end

#versionObject

What version of ruby to parse. 18 and 19 are the only valid values currently supported.



98
99
100
# File 'lib/ruby_lexer.rb', line 98

def version
  @version
end

Instance Method Details

#actionObject



45
46
47
# File 'lib/ruby_lexer.rex.rb', line 45

def action
  yield
end

#arg_ambiguousObject



108
109
110
# File 'lib/ruby_lexer.rb', line 108

def arg_ambiguous
  self.warning("Ambiguous first argument. make sure.")
end

#arg_stateObject



112
113
114
# File 'lib/ruby_lexer.rb', line 112

def arg_state
  in_arg_state? ? :expr_arg : :expr_beg
end

#beginning_of_line?Boolean Also known as: bol?

Returns:

  • (Boolean)


116
117
118
# File 'lib/ruby_lexer.rb', line 116

def beginning_of_line?
  ss.bol?
end

#check(re) ⇒ Object



121
122
123
# File 'lib/ruby_lexer.rb', line 121

def check re
  ss.check re
end

#d(o) ⇒ Object



1179
1180
1181
# File 'lib/ruby_lexer.rb', line 1179

def d o
  $stderr.puts o.inspect
end

#end_of_stream?Boolean

Returns:

  • (Boolean)


131
132
133
# File 'lib/ruby_lexer.rb', line 131

def end_of_stream?
  ss.eos?
end

#expr_result(token, text) ⇒ Object



135
136
137
138
139
# File 'lib/ruby_lexer.rb', line 135

def expr_result token, text
  cond.push false
  cmdarg.push false
  result :expr_beg, token, text
end

#heredoc(here) ⇒ Object

TODO: rewrite / remove



141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# File 'lib/ruby_lexer.rb', line 141

def heredoc here # TODO: rewrite / remove
  _, eos, func, last_line = here

  indent  = (func & STR_FUNC_INDENT) != 0 ? "[ \t]*" : nil
  expand  = (func & STR_FUNC_EXPAND) != 0
  eos_re  = /#{indent}#{Regexp.escape eos}(\r*\n|\z)/
  err_msg = "can't match #{eos_re.inspect} anywhere in "

  rb_compile_error err_msg if end_of_stream?

  if beginning_of_line? && scan(eos_re) then
    self.lineno += 1
    ss.unread_many last_line # TODO: figure out how to remove this
    return :tSTRING_END, eos
  end

  self.string_buffer = []

  if expand then
    case
    when scan(/#[$@]/) then
      ss.pos -= 1 # FIX omg stupid
      return :tSTRING_DVAR, matched
    when scan(/#[{]/) then
      return :tSTRING_DBEG, matched
    when scan(/#/) then
      string_buffer << '#'
    end

    begin
      c = tokadd_string func, "\n", nil

      rb_compile_error err_msg if
        c == RubyLexer::EOF

      if c != "\n" then
        return :tSTRING_CONTENT, string_buffer.join.delete("\r")
      else
        self.lineno += 1
        string_buffer << scan(/\n/)
      end

      rb_compile_error err_msg if end_of_stream?
    end until check(eos_re)
  else
    until check(eos_re) do
      string_buffer << scan(/.*(\n|\z)/)
      rb_compile_error err_msg if end_of_stream?
    end
  end

  self.lex_strterm = [:heredoc, eos, func, last_line]

  return :tSTRING_CONTENT, string_buffer.join.delete("\r")
end

#heredoc_identifierObject

TODO: remove / rewrite



197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
# File 'lib/ruby_lexer.rb', line 197

def heredoc_identifier # TODO: remove / rewrite
  term, func = nil, STR_FUNC_BORING
  self.string_buffer = []

  case
  when scan(/(-?)([\'\"\`])(.*?)\2/) then
    term = ss[2]
    func |= STR_FUNC_INDENT unless ss[1].empty?
    func |= case term
            when "\'" then
              STR_SQUOTE
            when '"' then
              STR_DQUOTE
            else
              STR_XQUOTE
            end
    string_buffer << ss[3]
  when scan(/-?([\'\"\`])(?!\1*\Z)/) then
    rb_compile_error "unterminated here document identifier"
  when scan(/(-?)(#{IDENT_CHAR}+)/) then
    term = '"'
    func |= STR_DQUOTE
    unless ss[1].empty? then
      func |= STR_FUNC_INDENT
    end
    string_buffer << ss[2]
  else
    return nil
  end

  if scan(/.*\n/) then
    # TODO: think about storing off the char range instead
    line = matched
  else
    line = nil
  end

  self.lex_strterm = [:heredoc, string_buffer.join, func, line]

  if term == '`' then
    result nil, :tXSTRING_BEG, "`"
  else
    result nil, :tSTRING_BEG, "\""
  end
end

#in_arg_state?Boolean

TODO: rename is_after_operator?

Returns:

  • (Boolean)


247
248
249
# File 'lib/ruby_lexer.rb', line 247

def in_arg_state? # TODO: rename is_after_operator?
  in_lex_state? :expr_fname, :expr_dot
end

#in_fname?Boolean

Returns:

  • (Boolean)


243
244
245
# File 'lib/ruby_lexer.rb', line 243

def in_fname?
  in_lex_state? :expr_fname
end

#in_lex_state?(*states) ⇒ Boolean

Returns:

  • (Boolean)


251
252
253
# File 'lib/ruby_lexer.rb', line 251

def in_lex_state?(*states)
  states.include? lex_state
end

#int_with_base(base) ⇒ Object



255
256
257
258
# File 'lib/ruby_lexer.rb', line 255

def int_with_base base
  rb_compile_error "Invalid numeric format" if matched =~ /__/
  return result(:expr_end, :tINTEGER, matched.to_i(base))
end

#is_arg?Boolean

Returns:

  • (Boolean)


260
261
262
# File 'lib/ruby_lexer.rb', line 260

def is_arg?
  in_lex_state? :expr_arg, :expr_cmdarg
end

#is_beg?Boolean

Returns:

  • (Boolean)


264
265
266
# File 'lib/ruby_lexer.rb', line 264

def is_beg?
  in_lex_state? :expr_beg, :expr_value, :expr_mid, :expr_class
end

#is_end?Boolean

Returns:

  • (Boolean)


268
269
270
# File 'lib/ruby_lexer.rb', line 268

def is_end?
  in_lex_state? :expr_end, :expr_endarg, :expr_endfn
end

#is_label_possible?Boolean

Returns:

  • (Boolean)


272
273
274
# File 'lib/ruby_lexer.rb', line 272

def is_label_possible?
  (in_lex_state?(:expr_beg, :expr_endfn) && !command_state) || is_arg?
end

#is_space_arg?(c = "x") ⇒ Boolean

Returns:

  • (Boolean)


276
277
278
# File 'lib/ruby_lexer.rb', line 276

def is_space_arg? c = "x"
  is_arg? and space_seen and c !~ /\s/
end

#matchedObject



280
281
282
# File 'lib/ruby_lexer.rb', line 280

def matched
  ss.matched
end

#matchesObject



39
40
41
42
43
# File 'lib/ruby_lexer.rex.rb', line 39

def matches
  m = (1..9).map { |i| ss[i] }
  m.pop until m[-1] or m.empty?
  m
end

#next_tokenObject



68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
# File 'lib/ruby_lexer.rex.rb', line 68

def next_token
  return process_string if lex_strterm
  self.command_state = self.command_start
  self.command_start = false
  self.space_seen    = false
  self.last_state    = lex_state

  token = nil

  until ss.eos? or token do
    token =
      case state
      when nil then
        case
        when text = ss.scan(/[\ \t\r\f\v]/) then
          action { self.space_seen = true; next }
        when text = ss.scan(/\n|\#/) then
          process_newline_or_comment text
        when text = ss.scan(/[\]\)\}]/) then
          process_bracing text
        when text = ss.scan(/\!/) then
          process_bang text
        when text = ss.scan(/\.\.\.?|,|![=~]?/) then
          action { result :expr_beg, TOKENS[text], text }
        when text = ss.scan(/\.\d/) then
          action { rb_compile_error "no .<digit> floating literal anymore put 0 before dot" }
        when text = ss.scan(/\./) then
          action { result :expr_dot, :tDOT, "." }
        when text = ss.scan(/\(/) then
          process_paren text
        when text = ss.scan(/\=\=\=|\=\=|\=~|\=>|\=(?!begin\b)/) then
          action { result arg_state, TOKENS[text], text }
        when bol? && (text = ss.scan(/\=begin(?=\s)/)) then
          process_begin text
        when text = ss.scan(/\=(?=begin\b)/) then
          action { result arg_state, TOKENS[text], text }
        when text = ss.scan(/\"(#{SIMPLE_STRING})\"/o) then
          action { result :expr_end, :tSTRING, text[1..-2].gsub(ESC) { unescape $1 } }
        when text = ss.scan(/\"/) then
          action { string STR_DQUOTE; result nil, :tSTRING_BEG, text }
        when text = ss.scan(/\@\@?\d/) then
          action { rb_compile_error "`#{text}` is not allowed as a variable name" }
        when text = ss.scan(/\@\@?#{IDENT_CHAR}+/o) then
          process_ivar text
        when not_end? && (text = ss.scan(/:([a-zA-Z_]#{IDENT_CHAR}*(?:[?]|[!](?!=)|=(?==>)|=(?![=>]))?)/o)) then
          process_symbol text
        when not_end? && (text = ss.scan(/\:\"(#{SIMPLE_STRING})\"/o)) then
          process_symbol text
        when not_end? && (text = ss.scan(/\:\'(#{SSTRING})\'/o)) then
          process_symbol text
        when text = ss.scan(/\:\:/) then
          process_colon2 text
        when text = ss.scan(/\:/) then
          process_colon1 text
        when text = ss.scan(/->/) then
          action { result :expr_endfn, :tLAMBDA, nil }
        when text = ss.scan(/[+-]/) then
          process_plus_minus text
        when text = ss.scan(/#{NUM_BAD}/o) then
          action { rb_compile_error "Invalid numeric format"  }
        when text = ss.scan(/#{INT_DEC}/o) then
          action { int_with_base 10                           }
        when text = ss.scan(/#{INT_HEX}/o) then
          action { int_with_base 16                           }
        when text = ss.scan(/#{INT_BIN}/o) then
          action { int_with_base 2                            }
        when text = ss.scan(/#{INT_OCT_BAD}/o) then
          action { rb_compile_error "Illegal octal digit."    }
        when text = ss.scan(/#{INT_OCT}/o) then
          action { int_with_base 8                            }
        when text = ss.scan(/#{FLOAT_BAD}/o) then
          action { rb_compile_error "Trailing '_' in number." }
        when text = ss.scan(/#{FLOAT}/o) then
          process_float text
        when text = ss.scan(/#{INT_DEC2}/o) then
          action { int_with_base 10                           }
        when text = ss.scan(/[0-9]/) then
          action { rb_compile_error "Bad number format" }
        when text = ss.scan(/\[/) then
          process_square_bracket text
        when text = ss.scan(/\'#{SSTRING}\'/o) then
          action { result :expr_end, :tSTRING, matched[1..-2].gsub(/\\\\/, "\\").gsub(/\\'/, "'") } # " stupid emacs
        when text = ss.scan(/\|\|\=/) then
          action { result :expr_beg, :tOP_ASGN, "||" }
        when text = ss.scan(/\|\|/) then
          action { result :expr_beg, :tOROP,    "||" }
        when text = ss.scan(/\|\=/) then
          action { result :expr_beg, :tOP_ASGN, "|" }
        when text = ss.scan(/\|/) then
          action { result :arg_state, :tPIPE,    "|" }
        when text = ss.scan(/\{/) then
          process_curly_brace text
        when text = ss.scan(/\*\*=/) then
          action { result :expr_beg, :tOP_ASGN, "**" }
        when text = ss.scan(/\*\*/) then
          action { result(:arg_state, space_vs_beginning(:tDSTAR, :tDSTAR, :tPOW), "**") }
        when text = ss.scan(/\*\=/) then
          action { result(:expr_beg, :tOP_ASGN, "*") }
        when text = ss.scan(/\*/) then
          action { result(:arg_state, space_vs_beginning(:tSTAR, :tSTAR, :tSTAR2), "*") }
        when text = ss.scan(/\<\=\>/) then
          action { result :arg_state, :tCMP, "<=>"    }
        when text = ss.scan(/\<\=/) then
          action { result :arg_state, :tLEQ, "<="     }
        when text = ss.scan(/\<\<\=/) then
          action { result :arg_state, :tOP_ASGN, "<<" }
        when text = ss.scan(/\<\</) then
          process_lchevron text
        when text = ss.scan(/\</) then
          action { result :arg_state, :tLT, "<"       }
        when text = ss.scan(/\>\=/) then
          action { result :arg_state, :tGEQ, ">="     }
        when text = ss.scan(/\>\>=/) then
          action { result :arg_state, :tOP_ASGN, ">>" }
        when text = ss.scan(/\>\>/) then
          action { result :arg_state, :tRSHFT, ">>"   }
        when text = ss.scan(/\>/) then
          action { result :arg_state, :tGT, ">"       }
        when text = ss.scan(/\`/) then
          process_backtick text
        when text = ss.scan(/\?/) then
          process_questionmark text
        when text = ss.scan(/\&\&\=/) then
          action { result(:expr_beg, :tOP_ASGN, "&&") }
        when text = ss.scan(/\&\&/) then
          action { result(:expr_beg, :tANDOP,   "&&") }
        when text = ss.scan(/\&\=/) then
          action { result(:expr_beg, :tOP_ASGN, "&" ) }
        when text = ss.scan(/\&/) then
          process_amper text
        when text = ss.scan(/\//) then
          process_slash text
        when text = ss.scan(/\^=/) then
          action { result(:expr_beg, :tOP_ASGN, "^") }
        when text = ss.scan(/\^/) then
          action { result(:arg_state, :tCARET, "^") }
        when text = ss.scan(/\;/) then
          action { self.command_start = true; result(:expr_beg, :tSEMI, ";") }
        when in_arg_state? && (text = ss.scan(/\~@/)) then
          action { result(:arg_state, :tTILDE, "~") }
        when text = ss.scan(/\~/) then
          action { result(:arg_state, :tTILDE, "~") }
        when text = ss.scan(/\\\r?\n/) then
          action { self.lineno += 1; self.space_seen = true; next }
        when text = ss.scan(/\\/) then
          action { rb_compile_error "bare backslash only allowed before newline" }
        when text = ss.scan(/\%/) then
          process_percent text
        when text = ss.scan(/\$_\w+/) then
          process_gvar text
        when text = ss.scan(/\$_/) then
          process_gvar text
        when text = ss.scan(/\$[~*$?!@\/\\;,.=:<>\"]|\$-\w?/) then
          process_gvar text
        when in_fname? && (text = ss.scan(/\$([\&\`\'\+])/)) then
          process_gvar text
        when text = ss.scan(/\$([\&\`\'\+])/) then
          process_backref text
        when in_fname? && (text = ss.scan(/\$([1-9]\d*)/)) then
          process_gvar text
        when text = ss.scan(/\$([1-9]\d*)/) then
          process_nthref text
        when text = ss.scan(/\$0/) then
          process_gvar text
        when text = ss.scan(/\$\W|\$\z/) then
          process_gvar_oddity text
        when text = ss.scan(/\$\w+/) then
          process_gvar text
        when text = ss.scan(/\_/) then
          process_underscore text
        when text = ss.scan(/#{IDENT}/o) then
          process_token text
        when text = ss.scan(/\004|\032|\000|\Z/) then
          action { [RubyLexer::EOF, RubyLexer::EOF] }
        when text = ss.scan(/./) then
          action { rb_compile_error "Invalid char #{text.inspect} in expression" }
        else
          text = ss.string[ss.pos .. -1]
          raise ScanError, "can not match (#{state.inspect}): '#{text}'"
        end
      else
        raise ScanError, "undefined state: '#{state}'"
      end # token = case state

    next unless token # allow functions to trigger redo w/ nil
  end # while

  raise "bad lexical result: #{token.inspect}" unless
    token.nil? || (Array === token && token.size >= 2)

  # auto-switch state
  self.state = token.last if token && token.first == :state

  token
end

#not_end?Boolean

Returns:

  • (Boolean)


284
285
286
# File 'lib/ruby_lexer.rb', line 284

def not_end?
  not is_end?
end

#old_lineno=Object



1177
# File 'lib/ruby_lexer.rb', line 1177

alias :old_lineno= :lineno=

#parse(str) ⇒ Object



53
54
55
56
57
58
59
# File 'lib/ruby_lexer.rex.rb', line 53

def parse str
  self.ss     = scanner_class.new str
  self.lineno = 1
  self.state  ||= nil

  do_parse
end

#parse_file(path) ⇒ Object



61
62
63
64
65
66
# File 'lib/ruby_lexer.rex.rb', line 61

def parse_file path
  self.filename = path
  open path do |f|
    parse f.read
  end
end

#parse_quoteObject

TODO: remove / rewrite



1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
# File 'lib/ruby_lexer.rb', line 1064

def parse_quote # TODO: remove / rewrite
  beg, nnd, short_hand, c = nil, nil, false, nil

  if scan(/[a-z0-9]{1,2}/i) then # Long-hand (e.g. %Q{}).
    rb_compile_error "unknown type of %string" if ss.matched_size == 2
    c, beg, short_hand = matched, ss.getch, false
  else                               # Short-hand (e.g. %{, %., %!, etc)
    c, beg, short_hand = 'Q', ss.getch, true
  end

  if end_of_stream? or c == RubyLexer::EOF or beg == RubyLexer::EOF then
    rb_compile_error "unterminated quoted string meets end of file"
  end

  # Figure nnd-char.  "\0" is special to indicate beg=nnd and that no nesting?
  nnd = { "(" => ")", "[" => "]", "{" => "}", "<" => ">" }[beg]
  nnd, beg = beg, "\0" if nnd.nil?

  token_type, text = nil, "%#{c}#{beg}"
  token_type, string_type = case c
                            when 'Q' then
                              ch = short_hand ? nnd : c + beg
                              text = "%#{ch}"
                              [:tSTRING_BEG,   STR_DQUOTE]
                            when 'q' then
                              [:tSTRING_BEG,   STR_SQUOTE]
                            when 'W' then
                              scan(/\s*/)
                              [:tWORDS_BEG,    STR_DQUOTE | STR_FUNC_QWORDS]
                            when 'w' then
                              scan(/\s*/)
                              [:tQWORDS_BEG,   STR_SQUOTE | STR_FUNC_QWORDS]
                            when 'x' then
                              [:tXSTRING_BEG,  STR_XQUOTE]
                            when 'r' then
                              [:tREGEXP_BEG,   STR_REGEXP]
                            when 's' then
                              self.lex_state  = :expr_fname
                              [:tSYMBEG,       STR_SSYM]
                            when 'I' then
                              scan(/\s*/)
                              [:tSYMBOLS_BEG, STR_DQUOTE | STR_FUNC_QWORDS]
                            when 'i' then
                              scan(/\s*/)
                              [:tQSYMBOLS_BEG, STR_SQUOTE | STR_FUNC_QWORDS]
                            end

  rb_compile_error "Bad %string type. Expected [QqWwIixrs], found '#{c}'." if
    token_type.nil?

  raise "huh" unless string_type

  string string_type, nnd, beg

  return token_type, text
end

#parse_string(quote) ⇒ Object

TODO: rewrite / remove



1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
# File 'lib/ruby_lexer.rb', line 1121

def parse_string quote # TODO: rewrite / remove
  _, string_type, term, open = quote

  space = false # FIX: remove these
  func = string_type
  paren = open
  term_re = @@regexp_cache[term]

  qwords = (func & STR_FUNC_QWORDS) != 0
  regexp = (func & STR_FUNC_REGEXP) != 0
  expand = (func & STR_FUNC_EXPAND) != 0

  unless func then # nil'ed from qwords below. *sigh*
    return :tSTRING_END, nil
  end

  space = true if qwords and scan(/\s+/)

  if self.string_nest == 0 && scan(/#{term_re}/) then
    if qwords then
      quote[1] = nil
      return :tSPACE, nil
    elsif regexp then
      return :tREGEXP_END, self.regx_options
    else
      return :tSTRING_END, term
    end
  end

  return :tSPACE, nil if space

  self.string_buffer = []

  if expand
    case
    when scan(/#(?=[$@])/) then
      return :tSTRING_DVAR, nil
    when scan(/#[{]/) then
      return :tSTRING_DBEG, nil
    when scan(/#/) then
      string_buffer << '#'
    end
  end

  if tokadd_string(func, term, paren) == RubyLexer::EOF then
    rb_compile_error "unterminated string meets end of file"
  end

  return :tSTRING_CONTENT, string_buffer.join
end

#process_amper(text) ⇒ Object



288
289
290
291
292
293
294
295
296
297
298
299
# File 'lib/ruby_lexer.rb', line 288

def process_amper text
  token = if is_arg? && space_seen && !check(/\s/) then
             warning("`&' interpreted as argument prefix")
             :tAMPER
           elsif in_lex_state? :expr_beg, :expr_mid then
             :tAMPER
           else
             :tAMPER2
           end

  return result(:arg_state, token, "&")
end

#process_backref(text) ⇒ Object



301
302
303
304
305
# File 'lib/ruby_lexer.rb', line 301

def process_backref text
  token = ss[1].to_sym
  # TODO: can't do lineno hack w/ symbol
  result :expr_end, :tBACK_REF, token
end

#process_backtick(text) ⇒ Object



307
308
309
310
311
312
313
314
315
316
317
# File 'lib/ruby_lexer.rb', line 307

def process_backtick text
  case lex_state
  when :expr_fname then
    result :expr_end, :tBACK_REF2, "`"
  when :expr_dot then
    result((command_state ? :expr_cmdarg : :expr_arg), :tBACK_REF2, "`")
  else
    string STR_XQUOTE
    result nil, :tXSTRING_BEG, "`"
  end
end

#process_bang(text) ⇒ Object



319
320
321
322
323
324
325
326
327
# File 'lib/ruby_lexer.rb', line 319

def process_bang text
  if in_arg_state? then
    return result(:expr_arg, :tUBANG, "!@") if scan(/@/)
  end

  text = scan(/[=~]/) ? "!#{matched}" : "!"

  return result(arg_state, TOKENS[text], text)
end

#process_begin(text) ⇒ Object



329
330
331
332
333
334
335
336
337
338
339
340
# File 'lib/ruby_lexer.rb', line 329

def process_begin text
  @comments << matched

  unless scan(/.*?\n=end( |\t|\f)*[^\n]*(\n|\z)/m) then
    @comments.clear
    rb_compile_error("embedded document meets end of file")
  end

  @comments << matched

  nil # TODO
end

#process_bracing(text) ⇒ Object



342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
# File 'lib/ruby_lexer.rb', line 342

def process_bracing text
  cond.lexpop
  cmdarg.lexpop

  case matched
  when "}" then
    self.brace_nest -= 1
    self.lex_state   = :expr_endarg
    return :tRCURLY, matched
  when "]" then
    self.paren_nest -= 1
    self.lex_state   = :expr_endarg
    return :tRBRACK, matched
  when ")" then
    self.paren_nest -= 1
    self.lex_state   = :expr_endfn
    return :tRPAREN, matched
  else
    raise "Unknown bracing: #{matched.inspect}"
  end
end

#process_colon1(text) ⇒ Object



364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
# File 'lib/ruby_lexer.rb', line 364

def process_colon1 text
  # ?: / then / when
  if is_end? || check(/\s/) then
    return result :expr_beg, :tCOLON, text
  end

  case
  when scan(/\'/) then
    string STR_SSYM
  when scan(/\"/) then
    string STR_DSYM
  end

  result :expr_fname, :tSYMBEG, text
end

#process_colon2(text) ⇒ Object



380
381
382
383
384
385
386
# File 'lib/ruby_lexer.rb', line 380

def process_colon2 text
  if is_beg? || in_lex_state?(:expr_class) || is_space_arg? then
    result :expr_beg, :tCOLON3, text
  else
    result :expr_dot, :tCOLON2, text
  end
end

#process_curly_brace(text) ⇒ Object



388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
# File 'lib/ruby_lexer.rb', line 388

def process_curly_brace text
  self.brace_nest += 1
  if lpar_beg && lpar_beg == paren_nest then
    self.lpar_beg = nil
    self.paren_nest -= 1

    return expr_result(:tLAMBEG, "{")
  end

  token = if is_arg? || in_lex_state?(:expr_end, :expr_endfn) then
             :tLCURLY      #  block (primary)
           elsif in_lex_state?(:expr_endarg) then
             :tLBRACE_ARG  #  block (expr)
           else
             :tLBRACE      #  hash
           end

  self.command_start = true unless token == :tLBRACE

  return expr_result(token, "{")
end

#process_float(text) ⇒ Object



410
411
412
413
# File 'lib/ruby_lexer.rb', line 410

def process_float text
  rb_compile_error "Invalid numeric format" if text =~ /__/
  return result(:expr_end, :tFLOAT, text.to_f)
end

#process_gvar(text) ⇒ Object



415
416
417
418
# File 'lib/ruby_lexer.rb', line 415

def process_gvar text
  text.lineno = self.lineno
  result(:expr_end, :tGVAR, text)
end

#process_gvar_oddity(text) ⇒ Object



420
421
422
# File 'lib/ruby_lexer.rb', line 420

def process_gvar_oddity text
  result :expr_end, "$", "$" # TODO: wtf is this?
end

#process_ivar(text) ⇒ Object



424
425
426
427
428
# File 'lib/ruby_lexer.rb', line 424

def process_ivar text
  tok_id = text =~ /^@@/ ? :tCVAR : :tIVAR
  text.lineno = self.lineno
  return result(:expr_end, tok_id, text)
end

#process_lchevron(text) ⇒ Object



430
431
432
433
434
435
436
437
438
439
# File 'lib/ruby_lexer.rb', line 430

def process_lchevron text
  if (!in_lex_state?(:expr_dot, :expr_class) &&
      !is_end? &&
      (!is_arg? || space_seen)) then
    tok = self.heredoc_identifier
    return tok if tok
  end

  return result(:arg_state, :tLSHFT, "\<\<")
end

#process_newline_or_comment(text) ⇒ Object



441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
# File 'lib/ruby_lexer.rb', line 441

def process_newline_or_comment text
  c = matched
  hit = false

  if c == '#' then
    ss.pos -= 1

    while scan(/\s*\#.*(\n+|\z)/) do
      hit = true
      self.lineno += matched.lines.to_a.size
      @comments << matched.gsub(/^ +#/, '#').gsub(/^ +$/, '')
    end

    return nil if end_of_stream?
  end

  self.lineno += 1 unless hit

  # Replace a string of newlines with a single one
  self.lineno += matched.lines.to_a.size if scan(/\n+/)

  return if in_lex_state?(:expr_beg, :expr_value, :expr_class,
                          :expr_fname, :expr_dot)

  if scan(/([\ \t\r\f\v]*)\./) then
    self.space_seen = true unless ss[1].empty?

    ss.pos -= 1
    return unless check(/\.\./)
  end

  self.command_start = true

  return result(:expr_beg, :tNL, nil)
end

#process_nthref(text) ⇒ Object



477
478
479
480
# File 'lib/ruby_lexer.rb', line 477

def process_nthref text
  # TODO: can't do lineno hack w/ number
  result :expr_end, :tNTH_REF, ss[1].to_i
end

#process_paren(text) ⇒ Object



482
483
484
485
486
487
488
489
490
491
492
# File 'lib/ruby_lexer.rb', line 482

def process_paren text
  token = if ruby18 then
            process_paren18
          else
            process_paren19
          end

  self.paren_nest += 1

  return expr_result(token, "(")
end

#process_paren18Object



494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
# File 'lib/ruby_lexer.rb', line 494

def process_paren18
  self.command_start = true
  token = :tLPAREN2

  if in_lex_state? :expr_beg, :expr_mid then
    token = :tLPAREN
  elsif space_seen then
    if in_lex_state? :expr_cmdarg then
      token = :tLPAREN_ARG
    elsif in_lex_state? :expr_arg then
      warning "don't put space before argument parentheses"
    end
  else
    # not a ternary -- do nothing?
  end

  token
end

#process_paren19Object



513
514
515
516
517
518
519
520
521
# File 'lib/ruby_lexer.rb', line 513

def process_paren19
  if is_beg? then
    :tLPAREN
  elsif is_space_arg? then
    :tLPAREN_ARG
  else
    :tLPAREN2 # plain '(' in parse.y
  end
end

#process_percent(text) ⇒ Object



523
524
525
526
527
528
529
530
531
# File 'lib/ruby_lexer.rb', line 523

def process_percent text
  return parse_quote if is_beg?

  return result(:expr_beg, :tOP_ASGN, "%") if scan(/\=/)

  return parse_quote if is_arg? && space_seen && ! check(/\s/)

  return result(:arg_state, :tPERCENT, "%")
end

#process_plus_minus(text) ⇒ Object



533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
# File 'lib/ruby_lexer.rb', line 533

def process_plus_minus text
  sign = matched
  utype, type = if sign == "+" then
                  [:tUPLUS, :tPLUS]
                else
                  [:tUMINUS, :tMINUS]
                end

  if in_arg_state? then
    if scan(/@/) then
      return result(:expr_arg, utype, "#{sign}@")
    else
      return result(:expr_arg, type, sign)
    end
  end

  return result(:expr_beg, :tOP_ASGN, sign) if scan(/\=/)

  if (is_beg? || (is_arg? && space_seen && !check(/\s/))) then
    arg_ambiguous if is_arg?

    if check(/\d/) then
      return nil if utype == :tUPLUS
      return result(:expr_beg, :tUMINUS_NUM, sign)
    end

    return result(:expr_beg, utype, sign)
  end

  return result(:expr_beg, type, sign)
end

#process_questionmark(text) ⇒ Object



565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
# File 'lib/ruby_lexer.rb', line 565

def process_questionmark text
  if is_end? then
    state = ruby18 ? :expr_beg : :expr_value # HACK?
    return result(state, :tEH, "?")
  end

  if end_of_stream? then
    rb_compile_error "incomplete character syntax: parsed #{text.inspect}"
  end

  if check(/\s|\v/) then
    unless is_arg? then
      c2 = { " " => 's',
            "\n" => 'n',
            "\t" => 't',
            "\v" => 'v',
            "\r" => 'r',
            "\f" => 'f' }[matched]

      if c2 then
        warning("invalid character syntax; use ?\\" + c2)
      end
    end

    # ternary
    state = ruby18 ? :expr_beg : :expr_value # HACK?
    return result(state, :tEH, "?")
  elsif check(/\w(?=\w)/) then # ternary, also
    return result(:expr_beg, :tEH, "?")
  end

  c = if scan(/\\/) then
        self.read_escape
      else
        ss.getch
      end

  if version == 18 then
    return result(:expr_end, :tINTEGER, c[0].ord & 0xff)
  else
    return result(:expr_end, :tSTRING, c)
  end
end

#process_slash(text) ⇒ Object



609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
# File 'lib/ruby_lexer.rb', line 609

def process_slash text
  if is_beg? then
    string STR_REGEXP

    return result(nil, :tREGEXP_BEG, "/")
  end

  if scan(/\=/) then
    return result(:expr_beg, :tOP_ASGN, "/")
  end

  if is_arg? && space_seen then
    unless scan(/\s/) then
      arg_ambiguous
      string STR_REGEXP, "/"
      return result(nil, :tREGEXP_BEG, "/")
    end
  end

  return result(:arg_state, :tDIVIDE, "/")
end

#process_square_bracket(text) ⇒ Object



631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
# File 'lib/ruby_lexer.rb', line 631

def process_square_bracket text
  self.paren_nest += 1

  token = nil

  if in_arg_state? then
    case
    when scan(/\]\=/) then
      self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
      return result(:expr_arg, :tASET, "[]=")
    when scan(/\]/) then
      self.paren_nest -= 1 # HACK? I dunno, or bug in MRI
      return result(:expr_arg, :tAREF, "[]")
    else
      rb_compile_error "unexpected '['"
    end
  elsif is_beg? then
    token = :tLBRACK
  elsif is_arg? && space_seen then
    token = :tLBRACK
  else
    token = :tLBRACK2
  end

  return expr_result(token, "[")
end

#process_stringObject

TODO: rewrite / remove



1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
# File 'lib/ruby_lexer.rb', line 1047

def process_string # TODO: rewrite / remove
  token = if lex_strterm[0] == :heredoc then
            self.heredoc lex_strterm
          else
            self.parse_string lex_strterm
          end

  token_type, _ = token

  if token_type == :tSTRING_END || token_type == :tREGEXP_END then
    self.lex_strterm = nil
    self.lex_state   = :expr_end
  end

  return token
end

#process_symbol(text) ⇒ Object



658
659
660
661
662
663
664
665
# File 'lib/ruby_lexer.rb', line 658

def process_symbol text
  symbol = match[1].gsub(ESC) { unescape $1 }

  rb_compile_error "symbol cannot contain '\\0'" if
    ruby18 && symbol =~ /\0/

  return result(:expr_end, :tSYMBOL, symbol)
end

#process_token(text) ⇒ Object



667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
# File 'lib/ruby_lexer.rb', line 667

def process_token text
  # TODO: make this always return [token, lineno]
  token = self.token = text
  token << matched if scan(/[\!\?](?!=)/)

  tok_id =
    case
    when token =~ /[!?]$/ then
      :tFID
    when in_lex_state?(:expr_fname) && scan(/=(?:(?![~>=])|(?==>))/) then
      # ident=, not =~ => == or followed by =>
      # TODO test lexing of a=>b vs a==>b
      token << matched
      :tIDENTIFIER
    when token =~ /^[A-Z]/ then
      :tCONSTANT
    else
      :tIDENTIFIER
    end

  if !ruby18 and is_label_possible? and scan(/:(?!:)/) then
    return result(:expr_beg, :tLABEL, [token, self.lineno])
  end

  unless in_lex_state? :expr_dot then
    # See if it is a reserved word.
    keyword = if ruby18 then # REFACTOR need 18/19 lexer subclasses
                RubyParserStuff::Keyword.keyword18 token
              else
                RubyParserStuff::Keyword.keyword19 token
              end

    return process_token_keyword keyword if keyword
  end # unless in_lex_state? :expr_dot

  # TODO:
  # if (mb == ENC_CODERANGE_7BIT && lex_state != EXPR_DOT) {

  state = if is_beg? or is_arg? or in_lex_state? :expr_dot then
            command_state ? :expr_cmdarg : :expr_arg
          elsif not ruby18 and in_lex_state? :expr_fname then
            :expr_endfn
          else
            :expr_end
          end

  if not [:expr_dot, :expr_fname].include? last_state and
      self.parser.env[token.to_sym] == :lvar then
    state = :expr_end
  end

  token.lineno = self.lineno # yes, on a string. I know... I know...

  return result(state, tok_id, token)
end

#process_token_keyword(keyword) ⇒ Object



723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
# File 'lib/ruby_lexer.rb', line 723

def process_token_keyword keyword
  state = keyword.state

  value = [token, self.lineno]

  self.command_start = true if state == :expr_beg and lex_state != :expr_fname

  case
  when lex_state == :expr_fname then
    result(state, keyword.id0, keyword.name)
  when keyword.id0 == :kDO then
    case
    when lpar_beg && lpar_beg == paren_nest then
      self.lpar_beg = nil
      self.paren_nest -= 1
      result(state, :kDO_LAMBDA, value)
    when cond.is_in_state then
      result(state, :kDO_COND, value)
    when cmdarg.is_in_state && lex_state != :expr_cmdarg then
      result(state, :kDO_BLOCK, value)
    when in_lex_state?(:expr_beg, :expr_endarg) then
      result(state, :kDO_BLOCK, value)
    else
      result(state, :kDO, value)
    end
  when in_lex_state?(:expr_beg, :expr_value) then
    result(state, keyword.id0, value)
  when keyword.id0 != keyword.id1 then
    result(:expr_beg, keyword.id1, value)
  else
    result(state, keyword.id1, value)
  end
end

#process_underscore(text) ⇒ Object



757
758
759
760
761
762
763
764
765
# File 'lib/ruby_lexer.rb', line 757

def process_underscore text
  ss.unscan # put back "_"

  if beginning_of_line? && scan(/\__END__(\r?\n|\Z)/) then
    return [RubyLexer::EOF, RubyLexer::EOF]
  elsif scan(/\_\w*/) then
    return process_token matched
  end
end

#rb_compile_error(msg) ⇒ Object



767
768
769
770
# File 'lib/ruby_lexer.rb', line 767

def rb_compile_error msg
  msg += ". near line #{self.lineno}: #{ss.rest[/^.*/].inspect}"
  raise RubyParser::SyntaxError, msg
end

#read_escapeObject

TODO: remove / rewrite



772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
# File 'lib/ruby_lexer.rb', line 772

def read_escape # TODO: remove / rewrite
  case
  when scan(/\\/) then                  # Backslash
    '\\'
  when scan(/n/) then                   # newline
    "\n"
  when scan(/t/) then                   # horizontal tab
    "\t"
  when scan(/r/) then                   # carriage-return
    "\r"
  when scan(/f/) then                   # form-feed
    "\f"
  when scan(/v/) then                   # vertical tab
    "\13"
  when scan(/a/) then                   # alarm(bell)
    "\007"
  when scan(/e/) then                   # escape
    "\033"
  when scan(/b/) then                   # backspace
    "\010"
  when scan(/s/) then                   # space
    " "
  when scan(/[0-7]{1,3}/) then          # octal constant
    (matched.to_i(8) & 0xFF).chr
  when scan(/x([0-9a-fA-F]{1,2})/) then # hex constant
    ss[1].to_i(16).chr
  when check(/M-\\[\\MCc]/) then
    scan(/M-\\/) # eat it
    c = self.read_escape
    c[0] = (c[0].ord | 0x80).chr
    c
  when scan(/M-(.)/) then
    c = ss[1]
    c[0] = (c[0].ord | 0x80).chr
    c
  when check(/(C-|c)\\[\\MCc]/) then
    scan(/(C-|c)\\/) # eat it
    c = self.read_escape
    c[0] = (c[0].ord & 0x9f).chr
    c
  when scan(/C-\?|c\?/) then
    127.chr
  when scan(/(C-|c)(.)/) then
    c = ss[2]
    c[0] = (c[0].ord & 0x9f).chr
    c
  when scan(/^[89]/i) then # bad octal or hex... MRI ignores them :(
    matched
  when scan(/u([0-9a-fA-F]{2,4}|\{[0-9a-fA-F]{2,6}\})/) then
    [ss[1].delete("{}").to_i(16)].pack("U")
  when scan(/[McCx0-9]/) || end_of_stream? then
    rb_compile_error("Invalid escape character syntax")
  else
    ss.getch
  end
end

#regx_optionsObject

TODO: rewrite / remove



829
830
831
832
833
834
835
836
837
838
839
840
841
842
# File 'lib/ruby_lexer.rb', line 829

def regx_options # TODO: rewrite / remove
  good, bad = [], []

  if scan(/[a-z]+/) then
    good, bad = matched.split(//).partition { |s| s =~ /^[ixmonesu]$/ }
  end

  unless bad.empty? then
    rb_compile_error("unknown regexp option%s - %s" %
                     [(bad.size > 1 ? "s" : ""), bad.join.inspect])
  end

  return good.join
end

#resetObject



844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
# File 'lib/ruby_lexer.rb', line 844

def reset
  self.brace_nest    = 0
  self.command_start = true
  self.comments      = []
  self.lex_state     = nil
  self.lex_strterm   = nil
  self.lineno        = 1
  self.lpar_beg      = nil
  self.paren_nest    = 0
  self.space_seen    = false
  self.string_nest   = 0
  self.token         = nil

  self.cmdarg = RubyParserStuff::StackState.new(:cmdarg)
  self.cond   = RubyParserStuff::StackState.new(:cond)
end

#result(lex_state, token, text) ⇒ Object

:nodoc:



861
862
863
864
865
# File 'lib/ruby_lexer.rb', line 861

def result lex_state, token, text # :nodoc:
  lex_state = self.arg_state if lex_state == :arg_state
  self.lex_state = lex_state if lex_state
  [token, text]
end

#ruby18Object



867
868
869
# File 'lib/ruby_lexer.rb', line 867

def ruby18
  Ruby18Parser === parser
end

#ruby19Object



871
872
873
# File 'lib/ruby_lexer.rb', line 871

def ruby19
  Ruby19Parser === parser
end

#scan(re) ⇒ Object



875
876
877
# File 'lib/ruby_lexer.rb', line 875

def scan re
  ss.scan re
end

#scanner_classObject

TODO: design this out of oedipus_lex. or something.



879
880
881
# File 'lib/ruby_lexer.rb', line 879

def scanner_class # TODO: design this out of oedipus_lex. or something.
  RPStringScanner
end

#space_vs_beginning(space_type, beg_type, fallback) ⇒ Object



883
884
885
886
887
888
889
890
891
892
893
# File 'lib/ruby_lexer.rb', line 883

def space_vs_beginning space_type, beg_type, fallback
  if is_space_arg? check(/./m) then
    warning "`**' interpreted as argument prefix"
    space_type
  elsif is_beg? then
    beg_type
  else
    # TODO: warn_balanced("**", "argument prefix");
    fallback
  end
end

#string(type, beg = matched, nnd = "\0") ⇒ Object



895
896
897
# File 'lib/ruby_lexer.rb', line 895

def string type, beg = matched, nnd = "\0"
  self.lex_strterm = [:strterm, type, beg, nnd]
end

#tokadd_escape(term) ⇒ Object

TODO: consider def src= src

raise "bad src: #{src.inspect}" unless String === src
@src = RPStringScanner.new(src)

end



905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
# File 'lib/ruby_lexer.rb', line 905

def tokadd_escape term # TODO: rewrite / remove
  case
  when scan(/\\\n/) then
    # just ignore
  when scan(/\\([0-7]{1,3}|x[0-9a-fA-F]{1,2})/) then
    self.string_buffer << matched
  when scan(/\\([MC]-|c)(?=\\)/) then
    self.string_buffer << matched
    self.tokadd_escape term
  when scan(/\\([MC]-|c)(.)/) then
    self.string_buffer << matched
  when scan(/\\[McCx]/) then
    rb_compile_error "Invalid escape character syntax"
  when scan(/\\(.)/m) then
    self.string_buffer << matched
  else
    rb_compile_error "Invalid escape character syntax"
  end
end

#tokadd_string(func, term, paren) ⇒ Object

TODO: rewrite / remove



925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
# File 'lib/ruby_lexer.rb', line 925

def tokadd_string(func, term, paren) # TODO: rewrite / remove
  qwords = (func & STR_FUNC_QWORDS) != 0
  escape = (func & STR_FUNC_ESCAPE) != 0
  expand = (func & STR_FUNC_EXPAND) != 0
  regexp = (func & STR_FUNC_REGEXP) != 0
  symbol = (func & STR_FUNC_SYMBOL) != 0

  paren_re = @@regexp_cache[paren]
  term_re  = @@regexp_cache[term]

  until end_of_stream? do
    c = nil
    handled = true

    case
    when paren_re && scan(paren_re) then
      self.string_nest += 1
    when scan(term_re) then
      if self.string_nest == 0 then
        ss.pos -= 1
        break
      else
        self.string_nest -= 1
      end
    when expand && scan(/#(?=[\$\@\{])/) then
      ss.pos -= 1
      break
    when qwords && scan(/\s/) then
      ss.pos -= 1
      break
    when expand && scan(/#(?!\n)/) then
      # do nothing
    when check(/\\/) then
      case
      when qwords && scan(/\\\n/) then
        string_buffer << "\n"
        next
      when qwords && scan(/\\\s/) then
        c = ' '
      when expand && scan(/\\\n/) then
        next
      when regexp && check(/\\/) then
        self.tokadd_escape term
        next
      when expand && scan(/\\/) then
        c = self.read_escape
      when scan(/\\\n/) then
        # do nothing
      when scan(/\\\\/) then
        string_buffer << '\\' if escape
        c = '\\'
      when scan(/\\/) then
        unless scan(term_re) || paren.nil? || scan(paren_re) then
          string_buffer << "\\"
        end
      else
        handled = false
      end # inner /\\/ case
    else
      handled = false
    end # top case

    unless handled then
      t = Regexp.escape term
      x = Regexp.escape(paren) if paren && paren != "\000"
      re = if qwords then
             if RUBY19 then
               /[^#{t}#{x}\#\0\\\s]+|./ # |. to pick up whatever
             else
               /[^#{t}#{x}\#\0\\\s\v]+|./ # argh. 1.8's \s doesn't pick up \v
             end
           else
             /[^#{t}#{x}\#\0\\]+|./
           end

      scan re
      c = matched

      rb_compile_error "symbol cannot contain '\\0'" if symbol && c =~ /\0/
    end # unless handled

    c ||= matched
    string_buffer << c
  end # until

  c ||= matched
  c = RubyLexer::EOF if end_of_stream?

  return c
end

#unescape(s) ⇒ Object



1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
# File 'lib/ruby_lexer.rb', line 1016

def unescape s
  r = ESCAPES[s]

  return r if r

  x = case s
      when /^[0-7]{1,3}/ then
        ($&.to_i(8) & 0xFF).chr
      when /^x([0-9a-fA-F]{1,2})/ then
        $1.to_i(16).chr
      when /^M-(.)/ then
        ($1[0].ord | 0x80).chr
      when /^(C-|c)(.)/ then
        ($2[0].ord & 0x9f).chr
      when /^[89a-f]/i then # bad octal or hex... ignore? that's what MRI does :(
        s
      when /^[McCx0-9]/ then
        rb_compile_error("Invalid escape character syntax")
      when /u([0-9a-fA-F]{2,4}|\{[0-9a-fA-F]{2,6}\})/ then
        [$1.delete("{}").to_i(16)].pack("U")
      else
        s
      end
  x.force_encoding "UTF-8" if RUBY19
  x
end

#warning(s) ⇒ Object



1043
1044
1045
# File 'lib/ruby_lexer.rb', line 1043

def warning s
  # do nothing for now
end