Class: Solidity::Lexer

Inherits:

Object

Object
Solidity::Lexer

show all

Defined in:: lib/solidity/lexer.rb

Constant Summary collapse

DOUBLE_QUOTE = from the solidity grammar StringLiteralFragment : 'unicode'? '"' DoubleQuotedStringCharacter* '"' | 'unicode'? '\'' SingleQuotedStringCharacter* '\'' ; fragment DoubleQuotedStringCharacter : ~["\r\n\\] | ('\\' .) ; fragment SingleQuotedStringCharacter : ~['\r\n\\] | ('\\' .) ;

%r{"
 ( \\\\. | [^"\r\n\\] )*
"}x

SINGLE_QUOTE =

%r{'
  ( \\\\. | [^'\r\n\\] )*
'}x

NAME = from the solidity grammar > An identifier in solidity has to start with a letter, > a dollar-sign or an underscore and > may additionally contain numbers after the first symbol. Identifier : IdentifierStart IdentifierPart* ; fragment IdentifierStart : [a-zA-Z$_] ; fragment IdentifierPart : [a-zA-Z0-9$_] ;

/[a-zA-Z$_][a-zA-Z0-9$_]*/

COMMENT = from the solidity grammar COMMENT : '/*' .*? '*/' ; LINE_COMMENT : '//' ~[\r\n]* ;

%r{/\*
.*?
\*/}x

LINE_COMMENT =

%r{//
[^\r\n]*}x

Class Method Summary collapse

.read(path) ⇒ Object

Instance Method Summary collapse

#_norm_whitespace(str) ⇒ Object
#eos? ⇒ Boolean
#initialize(txt) ⇒ Lexer constructor

A new instance of Lexer.
#next ⇒ Object
#peek ⇒ Object
#pos ⇒ Object
#reset ⇒ Object
#scan_until(tt, include: false) ⇒ Object

“higher-level” helpers.
#tokenize ⇒ Object

Constructor Details

#initialize(txt) ⇒ `Lexer`

Returns a new instance of Lexer.

# File 'lib/solidity/lexer.rb', line 12

def initialize( txt )
  @txt    = txt
  @tokens = tokenize
  @pos    = 0
end

Class Method Details

.read(path) ⇒ `Object`

# File 'lib/solidity/lexer.rb', line 5

def self.read( path )
   txt = read_text( path )
   new( txt )
end

Instance Method Details

#_norm_whitespace(str) ⇒ `Object`

# File 'lib/solidity/lexer.rb', line 177

def _norm_whitespace( str )
   ## change newlines to spaces and
   ##   all multiple spaces to one
   str = str.gsub( /[ \t\n\r]+/, ' ' )
   str.strip
end

#eos? ⇒ `Boolean`

Returns:

(Boolean)

154	# File 'lib/solidity/lexer.rb', line 154 def eos?() peek().nil?; end

#next ⇒ `Object`

# File 'lib/solidity/lexer.rb', line 146

def next
   ## note: returns type lexeme (string content) for now
   ##            and NOT token struct for now - why? why not?
   t = @tokens[@pos]
   str =  t.nil? || t.is_a?( String ) ? t : t[1]
   @pos += 1      unless t.nil?
   str
end

#peek ⇒ `Object`

# File 'lib/solidity/lexer.rb', line 140

def peek
   ## note: returns token type for now (e.g. :string, :sp, etc.)
   ##              and NOT token struct for now - why? why not?
   t = @tokens[@pos]
   t.nil? || t.is_a?( String ) ? t : t[0]
end

#pos ⇒ `Object`

139	# File 'lib/solidity/lexer.rb', line 139 def pos() @pos; end

#reset ⇒ `Object`

138	# File 'lib/solidity/lexer.rb', line 138 def reset() @pos = 0; end

#scan_until(tt, include: false) ⇒ `Object`

“higher-level” helpers

# File 'lib/solidity/lexer.rb', line 161

def scan_until( tt, include: false )
   code = String.new('')
   while (peek=self.peek) != tt do
      ## note:  turn inline comments into a single space
      code <<   if peek == :comment
                    self.next   ## note: next (w/o self) is parsed as keyword
                    ' '
                else
                    self.next  ## note: next (w/o self) is parsed as keyword
                end
   end
   code << self.next       if include  ## include ';' too - why? why not?
   code = _norm_whitespace( code )
   code
end

#tokenize ⇒ `Object`

# File 'lib/solidity/lexer.rb', line 88

def tokenize
  t = []
  s = StringScanner.new( @txt )

  until s.eos?   ## loop until hitting end-of-string (file)
    if s.scan( /[ \t]+/ )   ## one or more spaces
       ## note: (auto-)convert tab to space - why? why not?
       t << [:sp, s.matched.gsub( /[\t]/, ' ') ]
    elsif s.scan( /\r?\n/ )    ## check for (windows) carriage return (\r) - why? why not?
       t << [:nl, "\n" ]
    elsif s.scan( COMMENT )
       t << [:comment, s.matched]
    elsif s.scan( LINE_COMMENT )
       t << [:comment, s.matched]
    elsif s.scan( DOUBLE_QUOTE )  ## double-quoted string
       t << [:string, s.matched]
    elsif s.scan( SINGLE_QUOTE )  ## single-quoted string
       t << [:string, s.matched]
    elsif s.scan( NAME )
       name = s.matched
       case name
       when 'pragma'    then  t << [:pragma, name]
       when 'contract'  then  t << [:contract, name]
       when 'abstract'  then  t << [:abstract, name]
       when 'library'   then  t << [:library, name]
       when 'interface' then  t << [:interface, name]
       when 'function'  then  t << [:function, name]
       when 'struct'    then  t << [:struct, name]
       when 'enum'      then  t << [:enum, name]
       when 'event'     then  t << [:event, name]
       else
          t << [:ident, name]
       end
    elsif s.scan( /;/ )   then   t << [:';', ';']
    elsif s.scan( /\{/ )  then   t << [:'{', '{']
    elsif s.scan( /\}/ )  then   t << [:'}', '}']
    else    ## slurp until hitting a "tracked" token again
        last = t[-1]
        if last.is_a?( String )
           last << s.getch  ## append char to last chunk
        else
           t << s.getch     ## start a new chunk
        end
    end
  end
  t
end

Class: Solidity::Lexer

Constant Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(txt) ⇒ Lexer

Class Method Details

.read(path) ⇒ Object

Instance Method Details

#_norm_whitespace(str) ⇒ Object

#eos? ⇒ Boolean

#next ⇒ Object

#peek ⇒ Object

#pos ⇒ Object

#reset ⇒ Object

#scan_until(tt, include: false) ⇒ Object

#tokenize ⇒ Object