Class: RBS::Inline::AnnotationParser::Tokenizer

Inherits:
Object
  • Object
show all
Includes:
Tokens
Defined in:
lib/rbs/inline/annotation_parser/tokenizer.rb

Constant Summary collapse

KEYWORDS =
{
  "return" => K_RETURN,
  "inherits" => K_INHERITS,
  "as" => K_AS,
  "override" => K_OVERRIDE,
  "use" => K_USE,
  "module-self" => K_MODULE_SELF,
  "generic" => K_GENERIC,
  "in" => K_IN,
  "out" => K_OUT,
  "unchecked" => K_UNCHECKED,
  "self" => K_SELF,
  "skip" => K_SKIP,
  "yields" => K_YIELDS,
  "module" => K_MODULE,
  "class" => K_CLASS,
}
KW_RE =

: Hash[String, Symbol]

/#{Regexp.union(KEYWORDS.keys)}\b/
PUNCTS =
{
  "::" => K_COLON2,
  ":" => K_COLON,
  "[" => K_LBRACKET,
  "]" => K_RBRACKET,
  "," => K_COMMA,
  "**" => K_STAR2,
  "*" => K_STAR,
  "--" => K_MINUS2,
  "<" => K_LT,
  "..." => K_DOT3,
  "." => K_DOT,
  "->" => K_ARROW,
  "{" => K_LBRACE,
  "(" => K_LPAREN,
  "&" => K_AMP,
  "?" => K_QUESTION,
  "|" => K_VBAR,
}
PUNCTS_RE =

: Hash[String, Symbol]

Regexp.union(PUNCTS.keys)

Constants included from Tokens

RBS::Inline::AnnotationParser::Tokens::K_AMP, RBS::Inline::AnnotationParser::Tokens::K_ARROW, RBS::Inline::AnnotationParser::Tokens::K_AS, RBS::Inline::AnnotationParser::Tokens::K_CLASS, RBS::Inline::AnnotationParser::Tokens::K_COLON, RBS::Inline::AnnotationParser::Tokens::K_COLON2, RBS::Inline::AnnotationParser::Tokens::K_COMMA, RBS::Inline::AnnotationParser::Tokens::K_DOT, RBS::Inline::AnnotationParser::Tokens::K_DOT3, RBS::Inline::AnnotationParser::Tokens::K_EOF, RBS::Inline::AnnotationParser::Tokens::K_GENERIC, RBS::Inline::AnnotationParser::Tokens::K_IN, RBS::Inline::AnnotationParser::Tokens::K_INHERITS, RBS::Inline::AnnotationParser::Tokens::K_LBRACE, RBS::Inline::AnnotationParser::Tokens::K_LBRACKET, RBS::Inline::AnnotationParser::Tokens::K_LPAREN, RBS::Inline::AnnotationParser::Tokens::K_LT, RBS::Inline::AnnotationParser::Tokens::K_MINUS2, RBS::Inline::AnnotationParser::Tokens::K_MODULE, RBS::Inline::AnnotationParser::Tokens::K_MODULE_SELF, RBS::Inline::AnnotationParser::Tokens::K_OUT, RBS::Inline::AnnotationParser::Tokens::K_OVERRIDE, RBS::Inline::AnnotationParser::Tokens::K_QUESTION, RBS::Inline::AnnotationParser::Tokens::K_RBRACKET, RBS::Inline::AnnotationParser::Tokens::K_RBS, RBS::Inline::AnnotationParser::Tokens::K_RBSE, RBS::Inline::AnnotationParser::Tokens::K_RETURN, RBS::Inline::AnnotationParser::Tokens::K_SELF, RBS::Inline::AnnotationParser::Tokens::K_SKIP, RBS::Inline::AnnotationParser::Tokens::K_STAR, RBS::Inline::AnnotationParser::Tokens::K_STAR2, RBS::Inline::AnnotationParser::Tokens::K_UNCHECKED, RBS::Inline::AnnotationParser::Tokens::K_USE, RBS::Inline::AnnotationParser::Tokens::K_VBAR, RBS::Inline::AnnotationParser::Tokens::K_YIELDS, RBS::Inline::AnnotationParser::Tokens::T_ANNOTATION, RBS::Inline::AnnotationParser::Tokens::T_ATIDENT, RBS::Inline::AnnotationParser::Tokens::T_BLOCKSTR, RBS::Inline::AnnotationParser::Tokens::T_COMMENT, RBS::Inline::AnnotationParser::Tokens::T_ELVAR, RBS::Inline::AnnotationParser::Tokens::T_IFIDENT, RBS::Inline::AnnotationParser::Tokens::T_LVAR, RBS::Inline::AnnotationParser::Tokens::T_SOURCE, RBS::Inline::AnnotationParser::Tokens::T_UIDENT, RBS::Inline::AnnotationParser::Tokens::T_WHITESPACE

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(scanner) ⇒ Tokenizer

Returns a new instance of Tokenizer.



161
162
163
164
165
# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 161

def initialize(scanner)
  @scanner = scanner

  @lookahead_tokens = [[], nil, [], nil]
end

Instance Attribute Details

#lookahead_tokensObject (readonly)

Tokens that comes after the current position

This is a four tuple of tokens.

  1. The first array is a trivia tokens before current position

  2. The second token is the first lookahead token after the current position

  3. The third array is a trivia tokens between the first lookahead and the second lookahead

  4. The fourth token is the second lookahead token



121
122
123
# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 121

def lookahead_tokens
  @lookahead_tokens
end

#scannerObject (readonly)

: StringScanner



110
111
112
# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 110

def scanner
  @scanner
end

Instance Method Details

#advance(tree, eat: false) ⇒ Object

Advances the scanner



172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 172

def advance(tree, eat: false)
  consume_trivias(tree)
  last = lookahead_tokens[1]
  tree << last if eat

  lookahead_tokens[0].replace(lookahead_tokens[2])
  lookahead_tokens[1] = lookahead_tokens[3]
  lookahead_tokens[2].clear

  while s = scanner.scan(/\s+/)
    lookahead_tokens[2] << [T_WHITESPACE, s]
  end

  lookahead =
    case
    when scanner.eos?
      [K_EOF, ""]
    when s = scanner.scan(/@rbs!/)
      [K_RBSE, s]
    when s = scanner.scan(/@rbs\b/)
      [K_RBS, s]
    when s = scanner.scan(PUNCTS_RE)
      [PUNCTS.fetch(s), s]
    when s = scanner.scan(KW_RE)
      [KEYWORDS.fetch(s), s]
    when s = scanner.scan(/[A-Z]\w*/)
      [T_UIDENT, s]
    when s = scanner.scan(/_[A-Z]\w*/)
      [T_IFIDENT, s]
    when s = scanner.scan(/[a-z]\w*/)
      [T_LVAR, s]
    when s = scanner.scan(/![a-z]\w*/)
      [T_ELVAR, s]
    when s = scanner.scan(/@\w+/)
      [T_ATIDENT, s]
    when s = scanner.scan(/%a\{[^}]+\}/)
      [T_ANNOTATION, s]
    when s = scanner.scan(/%a\[[^\]]+\]/)
      [T_ANNOTATION, s]
    when s = scanner.scan(/%a\([^)]+\)/)
      [T_ANNOTATION, s]
    end #: token?

  lookahead_tokens[3] = lookahead

  last
end

#consume_token(*types, tree:) ⇒ Object

Consume given token type and inserts the token to the tree or ‘nil`



279
280
281
282
283
284
285
# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 279

def consume_token(*types, tree:)
  if type?(*types)
    advance(tree, eat: true)
  else
    tree << nil
  end
end

#consume_token!(*types, tree:) ⇒ Object

Consume given token type and inserts the token to the tree or raise



292
293
294
295
# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 292

def consume_token!(*types, tree:)
  type!(*types)
  advance(tree, eat: true)
end

#consume_trivias(tree) ⇒ Object



221
222
223
224
225
226
227
228
229
230
231
# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 221

def consume_trivias(tree)
  buf = +""

  lookahead_tokens[0].each do |tok|
    tree << tok if tree
    buf << tok[1]
  end
  lookahead_tokens[0].clear

  buf
end

#current_positionObject

Returns the current char position of the first lookahead token

“‘ __ foo _ bar baz ^^ Trivia tokens before lookahead1

^                #current_position
 ^^^             lookahead1
     ^^^         Trivia tokens between lookahead1 and lookahead2
         ^^^     lookahead2
             ^    <= scanner.charpos

“‘



147
148
149
150
151
152
153
# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 147

def current_position #: Integer
  start = scanner.charpos
  start -= lookahead1[1].size if lookahead1
  lookahead_tokens[2].each {|_, s| start -= s.size }
  start -= lookahead2[1].size if lookahead2
  start
end

#lookahead1Object

Token that comes after the current position



125
126
127
# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 125

def lookahead1 #: token?
  lookahead_tokens[1]
end

#lookahead2Object

Token that comes after ‘lookahead1`



131
132
133
# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 131

def lookahead2 #: token?
  lookahead_tokens[3]
end

#lookaheadsObject

: Array



155
156
157
# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 155

def lookaheads #: Array[Symbol?]
  [lookahead1&.[](0), lookahead2&.[](0)]
end

#reset(position, tree) ⇒ Object

Skips characters

This method ensures the ‘current_position` will be the given `position`.



245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 245

def reset(position, tree)
  if scanner.charpos > position
    scanner.reset()
  end

  skips = position - scanner.charpos

  if scanner.rest_size < skips
    raise "The position is bigger than the size of the rest of the input: input size=#{scanner.string.size}, position=#{position}"
  end

  scanner.skip(/.{#{skips}}/)

  @lookahead_tokens = [[], nil, [], nil]

  advance(tree)
  advance(tree)
end

#restObject

: String



264
265
266
267
268
269
270
271
272
# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 264

def rest #: String
  buf = +""
  lookahead_tokens[0].each {|_, s| buf << s }
  buf << lookahead1[1] if lookahead1
  lookahead_tokens[2].each {|_, s| buf << s }
  buf << lookahead2[1] if lookahead2
  buf << scanner.rest
  buf
end

#skip_to_commentObject

Reset the current_token to incoming comment ‘–`

Reset to the end of the input if ‘–` token cannot be found.



326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 326

def skip_to_comment
  prefix = +""

  lookahead_tokens[0].each { prefix << _1[1] }
  lookahead_tokens[0].clear

  if type?(K_MINUS2)
    return prefix
  end

  prefix << lookahead1[1] if lookahead1
  lookahead_tokens[2].each { prefix << _1[1] }
  lookahead_tokens[2].clear

  if type2?(K_MINUS2)
    advance(_ = nil)  # The tree is unused because no trivia tokens are left
    return prefix
  end

  prefix << lookahead2[1] if lookahead2

  if string = scanner.scan_until(/--/)
    @lookahead_tokens = [[], nil, [], [K_MINUS2, "--"]]
    advance(_ = nil)  # The tree is unused because no trivia tokens are left
    prefix + string.delete_suffix("--")
  else
    s = scanner.rest
    @lookahead_tokens = [[], [K_EOF, ""], [], nil]
    scanner.terminate
    prefix + s
  end
end

#stuck?Boolean

Returns true if the scanner cannot consume next token

Returns:

  • (Boolean)


234
235
236
# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 234

def stuck? #: bool
  lookahead1.nil? && lookahead2.nil?
end

#type!(*types) ⇒ Object

Ensure current token is one of the specified in types



317
318
319
# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 317

def type!(*types)
  raise "Unexpected token: #{lookahead1&.[](0)}, where expected token: #{types.join(",")}" unless type?(*types)
end

#type2?(*types) ⇒ Boolean

Test if lookahead2 token have specified ‘type`

Returns:

  • (Boolean)


309
310
311
# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 309

def type2?(*types)
  types.any? { lookahead2 && lookahead2[0] == _1 }
end

#type?(*types) ⇒ Boolean

Test if current token has specified ‘type`

Returns:

  • (Boolean)


301
302
303
# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 301

def type?(*types)
  types.any? { lookahead1 && lookahead1[0] == _1 }
end