Class: Solidity::Lexer

Inherits:
Object
  • Object
show all
Defined in:
lib/solidity/lexer.rb

Constant Summary collapse

DOUBLE_QUOTE =

from the solidity grammar

StringLiteralFragment

: 'unicode'? '"' DoubleQuotedStringCharacter* '"'
| 'unicode'? '\'' SingleQuotedStringCharacter* '\'' ;

fragment DoubleQuotedStringCharacter

: ~["\r\n\\] | ('\\' .) ;

fragment SingleQuotedStringCharacter

: ~['\r\n\\] | ('\\' .) ;
%r{"
 ( \\\\. | [^"\r\n\\] )*
"}x
SINGLE_QUOTE =
%r{'
  ( \\\\. | [^'\r\n\\] )*
'}x
NAME =

from the solidity grammar

> An identifier in solidity has to start with a letter,
>  a dollar-sign or an underscore and
>  may additionally contain numbers after the first symbol.

Identifier

 : IdentifierStart IdentifierPart* ;

fragment
  IdentifierStart
  : [a-zA-Z$_] ;

fragment
  IdentifierPart
 : [a-zA-Z0-9$_] ;
/[a-zA-Z$_][a-zA-Z0-9$_]*/
COMMENT =

from the solidity grammar

COMMENT

: '/*' .*? '*/'  ;

LINE_COMMENT

: '//' ~[\r\n]* ;
%r{/\*
.*?
\*/}x
LINE_COMMENT =
%r{//
[^\r\n]*}x

Class Method Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(txt) ⇒ Lexer

Returns a new instance of Lexer.



12
13
14
15
16
# File 'lib/solidity/lexer.rb', line 12

def initialize( txt )
  @txt    = txt
  @tokens = tokenize
  @pos    = 0
end

Class Method Details

.read(path) ⇒ Object



5
6
7
8
# File 'lib/solidity/lexer.rb', line 5

def self.read( path )
   txt = read_text( path )
   new( txt )
end

Instance Method Details

#_norm_whitespace(str) ⇒ Object



177
178
179
180
181
182
# File 'lib/solidity/lexer.rb', line 177

def _norm_whitespace( str )
   ## change newlines to spaces and
   ##   all multiple spaces to one
   str = str.gsub( /[ \t\n\r]+/, ' ' )
   str.strip
end

#eos?Boolean

Returns:

  • (Boolean)


154
# File 'lib/solidity/lexer.rb', line 154

def eos?()  peek().nil?; end

#nextObject



146
147
148
149
150
151
152
153
# File 'lib/solidity/lexer.rb', line 146

def next
   ## note: returns type lexeme (string content) for now
   ##            and NOT token struct for now - why? why not?
   t = @tokens[@pos]
   str =  t.nil? || t.is_a?( String ) ? t : t[1]
   @pos += 1      unless t.nil?
   str
end

#peekObject



140
141
142
143
144
145
# File 'lib/solidity/lexer.rb', line 140

def peek
   ## note: returns token type for now (e.g. :string, :sp, etc.)
   ##              and NOT token struct for now - why? why not?
   t = @tokens[@pos]
   t.nil? || t.is_a?( String ) ? t : t[0]
end

#posObject



139
# File 'lib/solidity/lexer.rb', line 139

def pos()   @pos;  end

#resetObject



138
# File 'lib/solidity/lexer.rb', line 138

def reset() @pos = 0; end

#scan_until(tt, include: false) ⇒ Object

“higher-level” helpers



161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# File 'lib/solidity/lexer.rb', line 161

def scan_until( tt, include: false )
   code = String.new('')
   while (peek=self.peek) != tt do
      ## note:  turn inline comments into a single space
      code <<   if peek == :comment
                    self.next   ## note: next (w/o self) is parsed as keyword
                    ' '
                else
                    self.next  ## note: next (w/o self) is parsed as keyword
                end
   end
   code << self.next       if include  ## include ';' too - why? why not?
   code = _norm_whitespace( code )
   code
end

#tokenizeObject



88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/solidity/lexer.rb', line 88

def tokenize
  t = []
  s = StringScanner.new( @txt )

  until s.eos?   ## loop until hitting end-of-string (file)
    if s.scan( /[ \t]+/ )   ## one or more spaces
       ## note: (auto-)convert tab to space - why? why not?
       t << [:sp, s.matched.gsub( /[\t]/, ' ') ]
    elsif s.scan( /\r?\n/ )    ## check for (windows) carriage return (\r) - why? why not?
       t << [:nl, "\n" ]
    elsif s.scan( COMMENT )
       t << [:comment, s.matched]
    elsif s.scan( LINE_COMMENT )
       t << [:comment, s.matched]
    elsif s.scan( DOUBLE_QUOTE )  ## double-quoted string
       t << [:string, s.matched]
    elsif s.scan( SINGLE_QUOTE )  ## single-quoted string
       t << [:string, s.matched]
    elsif s.scan( NAME )
       name = s.matched
       case name
       when 'pragma'    then  t << [:pragma, name]
       when 'contract'  then  t << [:contract, name]
       when 'abstract'  then  t << [:abstract, name]
       when 'library'   then  t << [:library, name]
       when 'interface' then  t << [:interface, name]
       when 'function'  then  t << [:function, name]
       when 'struct'    then  t << [:struct, name]
       when 'enum'      then  t << [:enum, name]
       when 'event'     then  t << [:event, name]
       else
          t << [:ident, name]
       end
    elsif s.scan( /;/ )   then   t << [:';', ';']
    elsif s.scan( /\{/ )  then   t << [:'{', '{']
    elsif s.scan( /\}/ )  then   t << [:'}', '}']
    else    ## slurp until hitting a "tracked" token again
        last = t[-1]
        if last.is_a?( String )
           last << s.getch  ## append char to last chunk
        else
           t << s.getch     ## start a new chunk
        end
    end
  end
  t
end