Class: RDoc::RubyLex

Inherits:

Object

Object
RDoc::RubyLex

show all

Extended by:: Exception2MessageMapper

Includes:: IRB, RubyToken

Defined in:: lib/rdoc/ruby_lex.rb

Overview

Ruby lexer adapted from irb.

The internals are not documented because they are scary.

Defined Under Namespace

Classes: Error

Constant Summary collapse

ENINDENT_CLAUSE =

[
  "case", "class", "def", "do", "for", "if",
  "module", "unless", "until", "while", "begin" #, "when"
]

DEINDENT_CLAUSE = , “when”

["end" #, "when"
]

PERCENT_LTYPE =

{
  "q" => "\'",
  "Q" => "\"",
  "x" => "\`",
  "r" => "/",
  "w" => "]",
  "W" => "]",
  "s" => ":"
}

PERCENT_PAREN =

{
  "{" => "}",
  "[" => "]",
  "<" => ">",
  "(" => ")"
}

PERCENT_PAREN_REV =

PERCENT_PAREN.invert

Ltype2Token =

{
  "\'" => TkSTRING,
  "\"" => TkSTRING,
  "\`" => TkXSTRING,
  "/" => TkREGEXP,
  "]" => TkDSTRING,
  ":" => TkSYMBOL
}

DLtype2Token =

{
  "\"" => TkDSTRING,
  "\`" => TkDXSTRING,
  "/" => TkDREGEXP,
}

IDENT_RE =

if defined? Encoding then
  eval '/[\w\u{0080}-\u{FFFFF}]/u' # 1.8 can't parse \u{}
else
  /[\w\x80-\xFF]/
end

Constants included from RubyToken

RDoc::RubyToken::EXPR_ARG, RDoc::RubyToken::EXPR_BEG, RDoc::RubyToken::EXPR_CLASS, RDoc::RubyToken::EXPR_DOT, RDoc::RubyToken::EXPR_END, RDoc::RubyToken::EXPR_FNAME, RDoc::RubyToken::EXPR_MID, RDoc::RubyToken::NEWLINE_TOKEN, RDoc::RubyToken::Symbol, RDoc::RubyToken::TkReading2Token, RDoc::RubyToken::TkSymbol2Token, RDoc::RubyToken::TkToken2Reading, RDoc::RubyToken::TokenDefinitions

Class Attribute Summary collapse

.debug_level ⇒ Object

Returns the value of attribute debug_level.

Instance Attribute Summary collapse

#char_no ⇒ Object readonly

Returns the value of attribute char_no.
#continue ⇒ Object

Returns the value of attribute continue.
#exception_on_syntax_error ⇒ Object

Returns the value of attribute exception_on_syntax_error.
#indent ⇒ Object readonly

Returns the value of attribute indent.
#lex_state ⇒ Object

Returns the value of attribute lex_state.
#line_no ⇒ Object readonly

Returns the value of attribute line_no.
#readed_auto_clean_up ⇒ Object

Returns the value of attribute readed_auto_clean_up.
#reader ⇒ Object readonly

Returns the value of attribute reader.
#seek ⇒ Object readonly

Returns the value of attribute seek.
#skip_space ⇒ Object

Returns the value of attribute skip_space.

Class Method Summary collapse

.debug? ⇒ Boolean
.tokenize(ruby, options) ⇒ Object

Returns an Array of ruby tokens.

Instance Method Summary collapse

Methods included from RubyToken

#Token, def_token, #set_token_position

Constructor Details

#initialize(content, options) ⇒ `RubyLex`

Creates a new lexer for content. options is an RDoc::Options, only +tab_width is used.

# File 'lib/rdoc/ruby_lex.rb', line 82

def initialize(content, options)
  lex_init

  if /\t/ =~ content then
    tab_width = options.tab_width
    content = content.split(/\n/).map do |line|
      1 while line.gsub!(/\t+/) {
        ' ' * (tab_width*$&.length - $`.length % tab_width)
      }  && $~
      line
    end.join("\n")
  end

  content << "\n" unless content[-1, 1] == "\n"

  set_input StringIO.new content

  @base_char_no = 0
  @char_no = 0
  @exp_line_no = @line_no = 1
  @here_readed = []
  @readed = []
  @rests = []
  @seek = 0

  @here_header = false
  @indent = 0
  @indent_stack = []
  @lex_state = :EXPR_BEG
  @space_seen = false

  @continue = false
  @line = ""

  @skip_space = false
  @readed_auto_clean_up = false
  @exception_on_syntax_error = true

  @prompt = nil
  @prev_seek = nil
  @ltype = nil
end

Class Attribute Details

.debug_level ⇒ `Object`

Returns the value of attribute debug_level.



50
51
52

# File 'lib/rdoc/ruby_lex.rb', line 50

def debug_level
  @debug_level
end

Instance Attribute Details

#char_no ⇒ `Object` (readonly)

Returns the value of attribute char_no.



139
140
141

# File 'lib/rdoc/ruby_lex.rb', line 139

def char_no
  @char_no
end

#continue ⇒ `Object`

Returns the value of attribute continue.



45
46
47

# File 'lib/rdoc/ruby_lex.rb', line 45

def continue
  @continue
end

#exception_on_syntax_error ⇒ `Object`

Returns the value of attribute exception_on_syntax_error.



136
137
138

# File 'lib/rdoc/ruby_lex.rb', line 136

def exception_on_syntax_error
  @exception_on_syntax_error
end

#indent ⇒ `Object` (readonly)

Returns the value of attribute indent.



141
142
143

# File 'lib/rdoc/ruby_lex.rb', line 141

def indent
  @indent
end

#lex_state ⇒ `Object`

Returns the value of attribute lex_state.



46
47
48

# File 'lib/rdoc/ruby_lex.rb', line 46

def lex_state
  @lex_state
end

#line_no ⇒ `Object` (readonly)

Returns the value of attribute line_no.



140
141
142

# File 'lib/rdoc/ruby_lex.rb', line 140

def line_no
  @line_no
end

#readed_auto_clean_up ⇒ `Object`

Returns the value of attribute readed_auto_clean_up.



135
136
137

# File 'lib/rdoc/ruby_lex.rb', line 135

def readed_auto_clean_up
  @readed_auto_clean_up
end

#reader ⇒ `Object` (readonly)

Returns the value of attribute reader.



47
48
49

# File 'lib/rdoc/ruby_lex.rb', line 47

def reader
  @reader
end

#seek ⇒ `Object` (readonly)

Returns the value of attribute seek.



138
139
140

# File 'lib/rdoc/ruby_lex.rb', line 138

def seek
  @seek
end

#skip_space ⇒ `Object`

Returns the value of attribute skip_space.



134
135
136

# File 'lib/rdoc/ruby_lex.rb', line 134

def skip_space
  @skip_space
end

Class Method Details

.debug? ⇒ `Boolean`

Returns:

(Boolean)



53
54
55

# File 'lib/rdoc/ruby_lex.rb', line 53

def self.debug?
  @debug_level > 0
end

.tokenize(ruby, options) ⇒ `Object`

Returns an Array of ruby tokens. See ::new for a description of options.

# File 'lib/rdoc/ruby_lex.rb', line 65

def self.tokenize ruby, options
  tokens = []

  scanner = RDoc::RubyLex.new ruby, options
  scanner.exception_on_syntax_error = true

  while token = scanner.token do
    tokens << token
  end

  tokens
end

Instance Method Details

#each_top_level_statement ⇒ `Object`

# File 'lib/rdoc/ruby_lex.rb', line 294

def each_top_level_statement
  initialize_input
  catch(:TERM_INPUT) do
    loop do
      begin
        @continue = false
        prompt
        unless l = lex
          throw :TERM_INPUT if @line == ''
        else
          #p l
          @line.concat l
          if @ltype or @continue or @indent > 0
            next
          end
        end
        if @line != "\n"
          yield @line, @exp_line_no
        end
        break unless l
        @line = ''
        @exp_line_no = @line_no

        @indent = 0
        @indent_stack = []
        prompt
      rescue TerminateLineInput
        initialize_input
        prompt
        get_readed
      end
    end
  end
end

#eof? ⇒ `Boolean`

Returns:

(Boolean)



199
200
201

# File 'lib/rdoc/ruby_lex.rb', line 199

def eof?
  @io.eof?
end

#get_readed ⇒ `Object`

# File 'lib/rdoc/ruby_lex.rb', line 155

def get_readed
  if idx = @readed.rindex("\n")
    @base_char_no = @readed.size - (idx + 1)
  else
    @base_char_no += @readed.size
  end

  readed = @readed.join("")
  @readed = []
  readed
end

#getc ⇒ `Object`

# File 'lib/rdoc/ruby_lex.rb', line 167

def getc
  while @rests.empty?
    #      return nil unless buf_input
    @rests.push nil unless buf_input
  end
  c = @rests.shift
  if @here_header
    @here_readed.push c
  else
    @readed.push c
  end
  @seek += 1
  if c == "\n"
    @line_no += 1
    @char_no = 0
  else
    @char_no += 1
  end

  c
end

#getc_of_rests ⇒ `Object`

# File 'lib/rdoc/ruby_lex.rb', line 203

def getc_of_rests
  if @rests.empty?
    nil
  else
    getc
  end
end

#gets ⇒ `Object`

# File 'lib/rdoc/ruby_lex.rb', line 189

def gets
  l = ""
  while c = getc
    l.concat(c)
    break if c == "\n"
  end
  return nil if l == "" and c.nil?
  l
end

#identify_comment ⇒ `Object`

# File 'lib/rdoc/ruby_lex.rb', line 1266

def identify_comment
  @ltype = "#"

  comment = '#'

  while ch = getc
    # if ch == "\\" #"
    #   read_escape
    # end
    if ch == "\n"
      @ltype = nil
      ungetc
      break
    end

    comment << ch
  end

  return Token(TkCOMMENT, comment)
end

#identify_gvar ⇒ `Object`

# File 'lib/rdoc/ruby_lex.rb', line 834

def identify_gvar
  @lex_state = :EXPR_END

  case ch = getc
  when /[~_*$?!@\/\\;,=:<>".]/   #"
    Token(TkGVAR, "$" + ch)
  when "-"
    Token(TkGVAR, "$-" + getc)
  when "&", "`", "'", "+"
    Token(TkBACK_REF, "$"+ch)
  when /[1-9]/
    ref = ch
    while (ch = getc) =~ /[0-9]/ do ref << ch end
    ungetc
    Token(TkNTH_REF, "$#{ref}")
  when /\w/
    ungetc
    ungetc
    identify_identifier
  else
    ungetc
    Token("$")
  end
end

#identify_here_document ⇒ `Object`

# File 'lib/rdoc/ruby_lex.rb', line 979

def identify_here_document
  ch = getc
  #    if lt = PERCENT_LTYPE[ch]
  if ch == "-"
    ch = getc
    indent = true
  end
  if /['"`]/ =~ ch
    user_quote = lt = ch
    quoted = ""
    while (c = getc) && c != lt
      quoted.concat c
    end
  else
    user_quote = nil
    lt = '"'
    quoted = ch.dup
    while (c = getc) && c =~ /\w/
      quoted.concat c
    end
    ungetc
  end

  ltback, @ltype = @ltype, lt
  reserve = []
  while ch = getc
    reserve.push ch
    if ch == "\\"
      reserve.push ch = getc
    elsif ch == "\n"
      break
    end
  end

  output_heredoc = reserve.join =~ /\A\r?\n\z/

  if output_heredoc then
    doc = '<<'
    doc << '-' if indent
    doc << "#{user_quote}#{quoted}#{user_quote}\n"
  else
    doc = '"'
  end

  @here_header = false
  while l = gets
    l = l.sub(/(:?\r)?\n\z/, "\n")
    if (indent ? l.strip : l.chomp) == quoted
      break
    end
    doc << l
  end

  if output_heredoc then
    raise Error, "Missing terminating #{quoted} for string" unless l

    doc << l.chomp
  else
    doc << '"'
  end

  @here_header = true
  @here_readed.concat reserve
  while ch = reserve.pop
    ungetc ch
  end

  token_class = output_heredoc ? RDoc::RubyLex::TkHEREDOC : Ltype2Token[lt]
  @ltype = ltback
  @lex_state = :EXPR_END
  Token(token_class, doc)
end

#identify_identifier ⇒ `Object`

# File 'lib/rdoc/ruby_lex.rb', line 865

def identify_identifier
  token = ""
  if peek(0) =~ /[$@]/
    token.concat(c = getc)
    if c == "@" and peek(0) == "@"
      token.concat getc
    end
  end

  while (ch = getc) =~ IDENT_RE do
    print " :#{ch}: " if RDoc::RubyLex.debug?
    token.concat ch
  end

  ungetc

  if (ch == "!" || ch == "?") && token[0,1] =~ /\w/ && peek(0) != "="
    token.concat getc
  end

  # almost fix token

  case token
  when /^\$/
    return Token(TkGVAR, token)
  when /^\@\@/
    @lex_state = :EXPR_END
    # p Token(TkCVAR, token)
    return Token(TkCVAR, token)
  when /^\@/
    @lex_state = :EXPR_END
    return Token(TkIVAR, token)
  end

  if @lex_state != :EXPR_DOT
    print token, "\n" if RDoc::RubyLex.debug?

    token_c, *trans = TkReading2Token[token]
    if token_c
      # reserved word?

      if (@lex_state != :EXPR_BEG &&
          @lex_state != :EXPR_FNAME &&
          trans[1])
        # modifiers
        token_c = TkSymbol2Token[trans[1]]
        @lex_state = trans[0]
      else
        if @lex_state != :EXPR_FNAME
          if ENINDENT_CLAUSE.include?(token)
            valid = peek(0) != ':'

            # check for ``class = val'' etc.
            case token
            when "class"
              valid = false unless peek_match?(/^\s*(<<|\w|::)/)
            when "def"
              valid = false if peek_match?(/^\s*(([+-\/*&\|^]|<<|>>|\|\||\&\&)=|\&\&|\|\|)/)
            when "do"
              valid = false if peek_match?(/^\s*([+-\/*]?=|\*|<|>|\&)/)
            when *ENINDENT_CLAUSE
              valid = false if peek_match?(/^\s*([+-\/*]?=|\*|<|>|\&|\|)/)
            else
              # no nothing
            end if valid

            if valid
              if token == "do"
                if ![TkFOR, TkWHILE, TkUNTIL].include?(@indent_stack.last)
                  @indent += 1
                  @indent_stack.push token_c
                end
              else
                @indent += 1
                @indent_stack.push token_c
              end
            else
              token_c = TkIDENTIFIER
            end

          elsif DEINDENT_CLAUSE.include?(token)
            @indent -= 1
            @indent_stack.pop
          end
          @lex_state = trans[0]
        else
          @lex_state = :EXPR_END
        end
      end
      return Token(token_c, token)
    end
  end

  if @lex_state == :EXPR_FNAME
    @lex_state = :EXPR_END
    if peek(0) == '='
      token.concat getc
    end
  elsif @lex_state == :EXPR_BEG || @lex_state == :EXPR_DOT ||
        @lex_state == :EXPR_ARG
    @lex_state = :EXPR_ARG
  else
    @lex_state = :EXPR_END
  end

  if token[0, 1] =~ /[A-Z]/
    return Token(TkCONSTANT, token)
  elsif token[token.size - 1, 1] =~ /[!?]/
    return Token(TkFID, token)
  else
    return Token(TkIDENTIFIER, token)
  end
end

#identify_number(op = "") ⇒ `Object`

# File 'lib/rdoc/ruby_lex.rb', line 1071

def identify_number(op = "")
  @lex_state = :EXPR_END

  num = op

  if peek(0) == "0" && peek(1) !~ /[.eE]/
    num << getc

    case peek(0)
    when /[xX]/
      ch = getc
      match = /[0-9a-fA-F_]/
    when /[bB]/
      ch = getc
      match = /[01_]/
    when /[oO]/
      ch = getc
      match = /[0-7_]/
    when /[dD]/
      ch = getc
      match = /[0-9_]/
    when /[0-7]/
      match = /[0-7_]/
    when /[89]/
      raise Error, "Illegal octal digit"
    else
      return Token(TkINTEGER, num)
    end

    num << ch if ch

    len0 = true
    non_digit = false
    while ch = getc
      num << ch
      if match =~ ch
        if ch == "_"
          if non_digit
            raise Error, "trailing `#{ch}' in number"
          else
            non_digit = ch
          end
        else
          non_digit = false
          len0 = false
        end
      else
        ungetc
        num[-1, 1] = ''
        if len0
          raise Error, "numeric literal without digits"
        end
        if non_digit
          raise Error, "trailing `#{non_digit}' in number"
        end
        break
      end
    end
    return Token(TkINTEGER, num)
  end

  type = TkINTEGER
  allow_point = true
  allow_e = true
  non_digit = false
  while ch = getc
    num << ch
    case ch
    when /[0-9]/
      non_digit = false
    when "_"
      non_digit = ch
    when allow_point && "."
      if non_digit
        raise Error, "trailing `#{non_digit}' in number"
      end
      type = TkFLOAT
      if peek(0) !~ /[0-9]/
        type = TkINTEGER
        ungetc
        num[-1, 1] = ''
        break
      end
      allow_point = false
    when allow_e && "e", allow_e && "E"
      if non_digit
        raise Error, "trailing `#{non_digit}' in number"
      end
      type = TkFLOAT
      if peek(0) =~ /[+-]/
        num << getc
      end
      allow_e = false
      allow_point = false
      non_digit = ch
    else
      if non_digit
        raise Error, "trailing `#{non_digit}' in number"
      end
      ungetc
      num[-1, 1] = ''
      break
    end
  end

  Token(type, num)
end

#identify_quotation ⇒ `Object`

# File 'lib/rdoc/ruby_lex.rb', line 1052

def identify_quotation
  type = ch = getc
  if lt = PERCENT_LTYPE[type]
    ch = getc
  elsif type =~ /\W/
    type = nil
    lt = "\""
  else
    return Token(TkMOD, '%')
  end
  #     if ch !~ /\W/
  #       ungetc
  #       next
  #     end
  #@ltype = lt
  @quoted = ch unless @quoted = PERCENT_PAREN[ch]
  identify_string(lt, @quoted, type)
end

#identify_string(ltype, quoted = ltype, type = nil) ⇒ `Object`

# File 'lib/rdoc/ruby_lex.rb', line 1179

def identify_string(ltype, quoted = ltype, type = nil)
  close = PERCENT_PAREN.values.include?(quoted)
  @ltype = ltype
  @quoted = quoted

  str = if ltype == quoted and %w[" ' /].include? ltype then
          ltype.dup
        elsif RUBY_VERSION > '1.9' then
          "%#{type or PERCENT_LTYPE.key ltype}#{PERCENT_PAREN_REV[quoted]||quoted}"
        else
          "%#{type or PERCENT_LTYPE.index ltype}#{PERCENT_PAREN_REV[quoted]||quoted}"
        end

  subtype = nil
  begin
    nest = 0

    while ch = getc
      str << ch

      if @quoted == ch and nest <= 0
        break
      elsif @ltype != "'" && @ltype != "]" && @ltype != ":" and ch == "#"
        ch = getc
        subtype = true
        if ch == "{" then
          str << ch << skip_inner_expression
          next
        else
          ungetc
        end
      elsif ch == '\\'
        if %w[' /].include? @ltype then
          case ch = getc
          when "\\", "\n", "'"
          when @ltype
            str << ch
          else
            ungetc
          end
        else
          str << read_escape
        end
      end

      if close then
        if PERCENT_PAREN[ch] == @quoted
          nest += 1
        elsif ch == @quoted
          nest -= 1
        end
      end
    end

    if @ltype == "/"
      while peek(0) =~ /i|m|x|o|e|s|u|n/
        str << getc
      end
    end

    if subtype
      Token(DLtype2Token[ltype], str)
    else
      Token(Ltype2Token[ltype], str)
    end
  ensure
    @ltype = nil
    @quoted = nil
    @lex_state = :EXPR_END
  end
end

#initialize_input ⇒ `Object`

# File 'lib/rdoc/ruby_lex.rb', line 278

def initialize_input
  @ltype = nil
  @quoted = nil
  @indent = 0
  @indent_stack = []
  @lex_state = :EXPR_BEG
  @space_seen = false
  @here_header = false

  @continue = false
  prompt

  @line = ""
  @exp_line_no = @line_no
end

#inspect ⇒ `Object`

:stopdoc:

# File 'lib/rdoc/ruby_lex.rb', line 127

def inspect # :nodoc:
  "#<%s:0x%x pos %d lex_state %p space_seen %p>" % [
    self.class, object_id,
    @io.pos, @lex_state, @space_seen,
  ]
end

#lex ⇒ `Object`

# File 'lib/rdoc/ruby_lex.rb', line 329

def lex
  until (((tk = token).kind_of?(TkNL) || tk.kind_of?(TkEND_OF_SCRIPT)) &&
         !@continue or
    tk.nil?)
    #p tk
    #p @lex_state
    #p self
  end
  line = get_readed
  #      print self.inspect
  if line == "" and tk.kind_of?(TkEND_OF_SCRIPT) || tk.nil?
    nil
  else
    line
  end
end

#lex_init ⇒ `Object`

# File 'lib/rdoc/ruby_lex.rb', line 412

def lex_init()
  @OP = IRB::SLex.new
  @OP.def_rules("\0", "\004", "\032") do |op, io|
    Token(TkEND_OF_SCRIPT, '')
  end

  @OP.def_rules(" ", "\t", "\f", "\r", "\13") do |op, io|
    @space_seen = true
    str = op
    while (ch = getc) =~ /[ \t\f\r\13]/ do
      str << ch
    end
    ungetc
    Token TkSPACE, str
  end

  @OP.def_rule("#") do |op, io|
    identify_comment
  end

  @OP.def_rule("=begin",
               proc{|op, io| @prev_char_no == 0 && peek(0) =~ /\s/}) do
    |op, io|
    @ltype = "="
    res = ''
    nil until getc == "\n"

    until ( peek_equal?("=end") && peek(4) =~ /\s/ ) do
      (ch = getc)
      res << ch
    end

    gets # consume =end

    @ltype = nil
    Token(TkRD_COMMENT, res)
  end

  @OP.def_rule("\n") do |op, io|
    print "\\n\n" if RDoc::RubyLex.debug?
    case @lex_state
    when :EXPR_BEG, :EXPR_FNAME, :EXPR_DOT
      @continue = true
    else
      @continue = false
      @lex_state = :EXPR_BEG
      until (@indent_stack.empty? ||
             [TkLPAREN, TkLBRACK, TkLBRACE,
               TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
        @indent_stack.pop
      end
    end
    @here_header = false
    @here_readed = []
    Token(TkNL)
  end

  @OP.def_rules("*", "**",
                "=", "==", "===",
                "=~", "<=>",
                "<", "<=",
                ">", ">=", ">>") do
    |op, io|
    case @lex_state
    when :EXPR_FNAME, :EXPR_DOT
      @lex_state = :EXPR_ARG
    else
      @lex_state = :EXPR_BEG
    end
    Token(op)
  end

  @OP.def_rules("!", "!=", "!~") do
    |op, io|
    @lex_state = :EXPR_BEG
    Token(op)
  end

  @OP.def_rules("<<") do
    |op, io|
    tk = nil
    if @lex_state != :EXPR_END && @lex_state != :EXPR_CLASS &&
       (@lex_state != :EXPR_ARG || @space_seen)
      c = peek(0)
      if /\S/ =~ c && (/["'`]/ =~ c || /\w/ =~ c || c == "-")
        tk = identify_here_document
      end
    end
    unless tk
      tk = Token(op)
      case @lex_state
      when :EXPR_FNAME, :EXPR_DOT
        @lex_state = :EXPR_ARG
      else
        @lex_state = :EXPR_BEG
      end
    end
    tk
  end

  @OP.def_rules("'", '"') do
    |op, io|
    identify_string(op)
  end

  @OP.def_rules("`") do
    |op, io|
    if @lex_state == :EXPR_FNAME
      @lex_state = :EXPR_END
      Token(op)
    else
      identify_string(op)
    end
  end

  @OP.def_rules('?') do
    |op, io|
    if @lex_state == :EXPR_END
      @lex_state = :EXPR_BEG
      Token(TkQUESTION)
    else
      ch = getc
      if @lex_state == :EXPR_ARG && ch =~ /\s/
        ungetc
        @lex_state = :EXPR_BEG;
        Token(TkQUESTION)
      else
        @lex_state = :EXPR_END
        Token(TkCHAR, "?#{ch}")
      end
    end
  end

  @OP.def_rules("&", "&&", "|", "||") do
    |op, io|
    @lex_state = :EXPR_BEG
    Token(op)
  end

  @OP.def_rules("+=", "-=", "*=", "**=",
                "&=", "|=", "^=", "<<=", ">>=", "||=", "&&=") do
    |op, io|
    @lex_state = :EXPR_BEG
    op =~ /^(.*)=$/
    Token(TkOPASGN, $1)
  end

  @OP.def_rule("+@", proc{|op, io| @lex_state == :EXPR_FNAME}) do
    |op, io|
    @lex_state = :EXPR_ARG
    Token(op)
  end

  @OP.def_rule("-@", proc{|op, io| @lex_state == :EXPR_FNAME}) do
    |op, io|
    @lex_state = :EXPR_ARG
    Token(op)
  end

  @OP.def_rules("+", "-") do
    |op, io|
    catch(:RET) do
      if @lex_state == :EXPR_ARG
        if @space_seen and peek(0) =~ /[0-9]/
          throw :RET, identify_number(op)
        else
          @lex_state = :EXPR_BEG
        end
      elsif @lex_state != :EXPR_END and peek(0) =~ /[0-9]/
        throw :RET, identify_number(op)
      else
        @lex_state = :EXPR_BEG
      end
      Token(op)
    end
  end

  @OP.def_rule(".") do
    |op, io|
    @lex_state = :EXPR_BEG
    if peek(0) =~ /[0-9]/
      ungetc
      identify_number
    else
      # for "obj.if" etc.
      @lex_state = :EXPR_DOT
      Token(TkDOT)
    end
  end

  @OP.def_rules("..", "...") do
    |op, io|
    @lex_state = :EXPR_BEG
    Token(op)
  end

  lex_int2
end

#lex_int2 ⇒ `Object`

# File 'lib/rdoc/ruby_lex.rb', line 611

def lex_int2
  @OP.def_rules("]", "}", ")") do
    |op, io|
    @lex_state = :EXPR_END
    @indent -= 1
    @indent_stack.pop
    Token(op)
  end

  @OP.def_rule(":") do
    |op, io|
    if @lex_state == :EXPR_END || peek(0) =~ /\s/
      @lex_state = :EXPR_BEG
      Token(TkCOLON)
    else
      @lex_state = :EXPR_FNAME;
      Token(TkSYMBEG)
    end
  end

  @OP.def_rule("::") do
    |op, io|
    #      p @lex_state.id2name, @space_seen
    if @lex_state == :EXPR_BEG or @lex_state == :EXPR_ARG && @space_seen
      @lex_state = :EXPR_BEG
      Token(TkCOLON3)
    else
      @lex_state = :EXPR_DOT
      Token(TkCOLON2)
    end
  end

  @OP.def_rule("/") do
    |op, io|
    if @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID
      identify_string(op)
    elsif peek(0) == '='
      getc
      @lex_state = :EXPR_BEG
      Token(TkOPASGN, "/") #/)
    elsif @lex_state == :EXPR_ARG and @space_seen and peek(0) !~ /\s/
      identify_string(op)
    else
      @lex_state = :EXPR_BEG
      Token("/") #/)
    end
  end

  @OP.def_rules("^") do
    |op, io|
    @lex_state = :EXPR_BEG
    Token("^")
  end

  #       @OP.def_rules("^=") do
  # 	@lex_state = :EXPR_BEG
  # 	Token(OP_ASGN, :^)
  #       end

  @OP.def_rules(",") do
    |op, io|
    @lex_state = :EXPR_BEG
    Token(op)
  end

  @OP.def_rules(";") do
    |op, io|
    @lex_state = :EXPR_BEG
    until (@indent_stack.empty? ||
           [TkLPAREN, TkLBRACK, TkLBRACE,
             TkfLPAREN, TkfLBRACK, TkfLBRACE].include?(@indent_stack.last))
      @indent_stack.pop
    end
    Token(op)
  end

  @OP.def_rule("~") do
    |op, io|
    @lex_state = :EXPR_BEG
    Token("~")
  end

  @OP.def_rule("~@", proc{|op, io| @lex_state == :EXPR_FNAME}) do
    |op, io|
    @lex_state = :EXPR_BEG
    Token("~")
  end

  @OP.def_rule("(") do
    |op, io|
    @indent += 1
    if @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID
      @lex_state = :EXPR_BEG
      tk_c = TkfLPAREN
    else
      @lex_state = :EXPR_BEG
      tk_c = TkLPAREN
    end
    @indent_stack.push tk_c
    Token tk_c
  end

  @OP.def_rule("[]", proc{|op, io| @lex_state == :EXPR_FNAME}) do
    |op, io|
    @lex_state = :EXPR_ARG
    Token("[]")
  end

  @OP.def_rule("[]=", proc{|op, io| @lex_state == :EXPR_FNAME}) do
    |op, io|
    @lex_state = :EXPR_ARG
    Token("[]=")
  end

  @OP.def_rule("[") do
    |op, io|
    @indent += 1
    if @lex_state == :EXPR_FNAME
      tk_c = TkfLBRACK
    else
      if @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID
        tk_c = TkLBRACK
      elsif @lex_state == :EXPR_ARG && @space_seen
        tk_c = TkLBRACK
      else
        tk_c = TkfLBRACK
      end
      @lex_state = :EXPR_BEG
    end
    @indent_stack.push tk_c
    Token(tk_c)
  end

  @OP.def_rule("{") do
    |op, io|
    @indent += 1
    if @lex_state != :EXPR_END && @lex_state != :EXPR_ARG
      tk_c = TkLBRACE
    else
      tk_c = TkfLBRACE
    end
    @lex_state = :EXPR_BEG
    @indent_stack.push tk_c
    Token(tk_c)
  end

  @OP.def_rule('\\') do
    |op, io|
    if getc == "\n"
      @space_seen = true
      @continue = true
      Token(TkSPACE)
    else
      ungetc
      Token("\\")
    end
  end

  @OP.def_rule('%') do
    |op, io|
    if @lex_state == :EXPR_BEG || @lex_state == :EXPR_MID
      identify_quotation
    elsif peek(0) == '='
      getc
      Token(TkOPASGN, :%)
    elsif @lex_state == :EXPR_ARG and @space_seen and peek(0) !~ /\s/
      identify_quotation
    else
      @lex_state = :EXPR_BEG
      Token("%") #))
    end
  end

  @OP.def_rule('$') do
    |op, io|
    identify_gvar
  end

  @OP.def_rule('@') do
    |op, io|
    if peek(0) =~ /[\w@]/
      ungetc
      identify_identifier
    else
      Token("@")
    end
  end

  #       @OP.def_rule("def", proc{|op, io| /\s/ =~ io.peek(0)}) do
  # 	|op, io|
  # 	@indent += 1
  # 	@lex_state = :EXPR_FNAME
  # #	@lex_state = :EXPR_END
  # #	until @rests[0] == "\n" or @rests[0] == ";"
  # #	  rests.shift
  # #	end
  #       end

  @OP.def_rule("_") do
    if peek_match?(/_END__/) and @lex_state == :EXPR_BEG then
      6.times { getc }
      Token(TkEND_OF_SCRIPT, '__END__')
    else
      ungetc
      identify_identifier
    end
  end

  @OP.def_rule("") do
    |op, io|
    printf "MATCH: start %s: %s\n", op, io.inspect if RDoc::RubyLex.debug?
    if peek(0) =~ /[0-9]/
      t = identify_number
    else
      t = identify_identifier
    end
    printf "MATCH: end %s: %s\n", op, io.inspect if RDoc::RubyLex.debug?
    t
  end

  p @OP if RDoc::RubyLex.debug?
end

#peek(i = 0) ⇒ `Object`

# File 'lib/rdoc/ruby_lex.rb', line 247

def peek(i = 0)
  while @rests.size <= i
    return nil unless buf_input
  end
  @rests[i]
end

#peek_equal?(str) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/rdoc/ruby_lex.rb', line 232

def peek_equal?(str)
  chrs = str.split(//)
  until @rests.size >= chrs.size
    return false unless buf_input
  end
  @rests[0, chrs.size] == chrs
end

#peek_match?(regexp) ⇒ `Boolean`

Returns:

(Boolean)

# File 'lib/rdoc/ruby_lex.rb', line 240

def peek_match?(regexp)
  while @rests.empty?
    return false unless buf_input
  end
  regexp =~ @rests.join("")
end

#prompt ⇒ `Object`

# File 'lib/rdoc/ruby_lex.rb', line 272

def prompt
  if @prompt
    @prompt.call(@ltype, @indent, @continue, @line_no)
  end
end

#read_escape ⇒ `Object`

# File 'lib/rdoc/ruby_lex.rb', line 1287

def read_escape
  escape = ''
  ch = getc

  case ch
  when "\n", "\r", "\f"
    escape << ch
  when "\\", "n", "t", "r", "f", "v", "a", "e", "b", "s" #"
    escape << ch
  when /[0-7]/
    ungetc ch
    3.times do
      ch = getc
      case ch
      when /[0-7]/
        escape << ch
      when nil
        break
      else
        ungetc
        break
      end
    end

  when "x"
    escape << ch

    2.times do
      ch = getc
      case ch
      when /[0-9a-fA-F]/
        escape << ch
      when nil
        break
      else
        ungetc
        break
      end
    end

  when "M"
    escape << ch

    ch = getc
    if ch != '-'
      ungetc
    else
      escape << ch

      ch = getc
      if ch == "\\" #"
        ungetc
        escape << read_escape
      else
        escape << ch
      end
    end

  when "C", "c" #, "^"
    escape << ch

    if ch == "C"
      ch = getc

      if ch == "-"
        escape << ch
        ch = getc
        escape << ch

        escape << read_escape if ch == "\\"
      else
        ungetc
      end
    elsif (ch = getc) == "\\" #"
      escape << ch << read_escape
    end
  else
    escape << ch

    # other characters
  end

  escape
end

#set_input(io, p = nil, &block) ⇒ `Object`

io functions

# File 'lib/rdoc/ruby_lex.rb', line 144

def set_input(io, p = nil, &block)
  @io = io
  if p.respond_to?(:call)
    @input = p
  elsif block_given?
    @input = block
  else
    @input = Proc.new{@io.gets}
  end
end

#set_prompt(p = nil, &block) ⇒ `Object`

# File 'lib/rdoc/ruby_lex.rb', line 263

def set_prompt(p = nil, &block)
  p = block if block_given?
  if p.respond_to?(:call)
    @prompt = p
  else
    @prompt = Proc.new{print p}
  end
end

#skip_inner_expression ⇒ `Object`

# File 'lib/rdoc/ruby_lex.rb', line 1251

def skip_inner_expression
  res = ""
  nest = 0
  while ch = getc
    res << ch
    if ch == '}'
      break if nest.zero?
      nest -= 1
    elsif ch == '{'
      nest += 1
    end
  end
  res
end

#token ⇒ `Object`

# File 'lib/rdoc/ruby_lex.rb', line 346

def token
  #      require "tracer"
  #      Tracer.on
  @prev_seek = @seek
  @prev_line_no = @line_no
  @prev_char_no = @char_no
  begin
    begin
      tk = @OP.match(self)
      @space_seen = tk.kind_of?(TkSPACE)
    rescue SyntaxError => e
      raise Error, "syntax error: #{e.message}" if
        @exception_on_syntax_error

      tk = TkError.new(@seek, @line_no, @char_no)
    end
  end while @skip_space and tk.kind_of?(TkSPACE)

  if @readed_auto_clean_up
    get_readed
  end
  #      Tracer.off
  tk
end

#ungetc(c = nil) ⇒ `Object`

# File 'lib/rdoc/ruby_lex.rb', line 211

def ungetc(c = nil)
  if @here_readed.empty?
    c2 = @readed.pop
  else
    c2 = @here_readed.pop
  end
  c = c2 unless c
  @rests.unshift c #c =
  @seek -= 1
  if c == "\n"
    @line_no -= 1
    if idx = @readed.rindex("\n")
      @char_no = idx + 1
    else
      @char_no = @base_char_no + @readed.size
    end
  else
    @char_no -= 1
  end
end

Class: RDoc::RubyLex

Overview

Defined Under Namespace

Constant Summary collapse

Constants included from RubyToken

Class Attribute Summary collapse

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from RubyToken

Constructor Details

#initialize(content, options) ⇒ RubyLex

Class Attribute Details

.debug_level ⇒ Object

Instance Attribute Details

#char_no ⇒ Object (readonly)

#continue ⇒ Object

#exception_on_syntax_error ⇒ Object

#indent ⇒ Object (readonly)

#lex_state ⇒ Object

#line_no ⇒ Object (readonly)

#readed_auto_clean_up ⇒ Object

#reader ⇒ Object (readonly)

#seek ⇒ Object (readonly)

#skip_space ⇒ Object

Class Method Details

.debug? ⇒ Boolean

.tokenize(ruby, options) ⇒ Object

Instance Method Details

#each_top_level_statement ⇒ Object

#eof? ⇒ Boolean

#get_readed ⇒ Object

#getc ⇒ Object

#getc_of_rests ⇒ Object

#gets ⇒ Object

#identify_comment ⇒ Object

#identify_gvar ⇒ Object

#identify_here_document ⇒ Object

#identify_identifier ⇒ Object

#identify_number(op = "") ⇒ Object

#identify_quotation ⇒ Object

#identify_string(ltype, quoted = ltype, type = nil) ⇒ Object

#initialize_input ⇒ Object

#inspect ⇒ Object

#lex ⇒ Object

#lex_init ⇒ Object

#lex_int2 ⇒ Object

#peek(i = 0) ⇒ Object

#peek_equal?(str) ⇒ Boolean

#peek_match?(regexp) ⇒ Boolean

#prompt ⇒ Object

#read_escape ⇒ Object

#set_input(io, p = nil, &block) ⇒ Object

#set_prompt(p = nil, &block) ⇒ Object

#skip_inner_expression ⇒ Object

#token ⇒ Object

#ungetc(c = nil) ⇒ Object

#initialize(content, options) ⇒ `RubyLex`

.debug_level ⇒ `Object`

#char_no ⇒ `Object` (readonly)

#continue ⇒ `Object`

#exception_on_syntax_error ⇒ `Object`

#indent ⇒ `Object` (readonly)

#lex_state ⇒ `Object`

#line_no ⇒ `Object` (readonly)

#readed_auto_clean_up ⇒ `Object`

#reader ⇒ `Object` (readonly)

#seek ⇒ `Object` (readonly)

#skip_space ⇒ `Object`

.debug? ⇒ `Boolean`

.tokenize(ruby, options) ⇒ `Object`

#each_top_level_statement ⇒ `Object`

#eof? ⇒ `Boolean`

#get_readed ⇒ `Object`

#getc ⇒ `Object`

#getc_of_rests ⇒ `Object`

#gets ⇒ `Object`

#identify_comment ⇒ `Object`

#identify_gvar ⇒ `Object`

#identify_here_document ⇒ `Object`

#identify_identifier ⇒ `Object`

#identify_number(op = "") ⇒ `Object`

#identify_quotation ⇒ `Object`

#identify_string(ltype, quoted = ltype, type = nil) ⇒ `Object`

#initialize_input ⇒ `Object`

#inspect ⇒ `Object`

#lex ⇒ `Object`

#lex_init ⇒ `Object`

#lex_int2 ⇒ `Object`

#peek(i = 0) ⇒ `Object`

#peek_equal?(str) ⇒ `Boolean`

#peek_match?(regexp) ⇒ `Boolean`

#prompt ⇒ `Object`

#read_escape ⇒ `Object`

#set_input(io, p = nil, &block) ⇒ `Object`

#set_prompt(p = nil, &block) ⇒ `Object`

#skip_inner_expression ⇒ `Object`

#token ⇒ `Object`

#ungetc(c = nil) ⇒ `Object`