Class: RBS::Inline::AnnotationParser::Tokenizer

Inherits:

Object

Object
RBS::Inline::AnnotationParser::Tokenizer

show all

Includes:: Tokens

Defined in:: lib/rbs/inline/annotation_parser/tokenizer.rb

Constant Summary collapse

KEYWORDS =

{
  "return" => K_RETURN,
  "inherits" => K_INHERITS,
  "as" => K_AS,
  "override" => K_OVERRIDE,
  "use" => K_USE,
  "module-self" => K_MODULE_SELF,
  "generic" => K_GENERIC,
  "in" => K_IN,
  "out" => K_OUT,
  "unchecked" => K_UNCHECKED,
  "self" => K_SELF,
  "skip" => K_SKIP,
  "yields" => K_YIELDS,
  "module" => K_MODULE,
  "class" => K_CLASS,
}

KW_RE = : Hash[String, Symbol]

/#{Regexp.union(KEYWORDS.keys)}\b/

PUNCTS =

{
  "::" => K_COLON2,
  ":" => K_COLON,
  "[" => K_LBRACKET,
  "]" => K_RBRACKET,
  "," => K_COMMA,
  "**" => K_STAR2,
  "*" => K_STAR,
  "--" => K_MINUS2,
  "<" => K_LT,
  "..." => K_DOT3,
  "." => K_DOT,
  "->" => K_ARROW,
  "{" => K_LBRACE,
  "(" => K_LPAREN,
  "&" => K_AMP,
  "?" => K_QUESTION,
  "|" => K_VBAR,
}

PUNCTS_RE = : Hash[String, Symbol]

Regexp.union(PUNCTS.keys)

Constants included from Tokens

Instance Attribute Summary collapse

#lookahead_tokens ⇒ Object readonly

Tokens that comes after the current position.
#scanner ⇒ Object readonly

: StringScanner.

Instance Method Summary collapse

#advance(tree, eat: false) ⇒ Object

Advances the scanner.
#consume_token(*types, tree:) ⇒ Object

Consume given token type and inserts the token to the tree or ‘nil`.
#consume_token!(*types, tree:) ⇒ Object

Consume given token type and inserts the token to the tree or raise.
#consume_trivias(tree) ⇒ Object
#current_position ⇒ Object

Returns the current char position of the first lookahead token.
#initialize(scanner) ⇒ Tokenizer constructor

A new instance of Tokenizer.
#lookahead1 ⇒ Object

Token that comes after the current position.
#lookahead2 ⇒ Object

Token that comes after ‘lookahead1`.
#lookaheads ⇒ Object

: Array.
#reset(position, tree) ⇒ Object

Skips characters.
#rest ⇒ Object

: String.
#skip_to_comment ⇒ Object

Reset the current_token to incoming comment ‘–`.
#stuck? ⇒ Boolean

Returns true if the scanner cannot consume next token.
#type!(*types) ⇒ Object

Ensure current token is one of the specified in types.
#type2?(*types) ⇒ Boolean

Test if lookahead2 token have specified ‘type`.
#type?(*types) ⇒ Boolean

Test if current token has specified ‘type`.

Constructor Details

#initialize(scanner) ⇒ `Tokenizer`

Returns a new instance of Tokenizer.

# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 161

def initialize(scanner)
  @scanner = scanner

  @lookahead_tokens = [[], nil, [], nil]
end

Instance Attribute Details

#lookahead_tokens ⇒ `Object` (readonly)

Tokens that comes after the current position

This is a four tuple of tokens.

The first array is a trivia tokens before current position
The second token is the first lookahead token after the current position
The third array is a trivia tokens between the first lookahead and the second lookahead
The fourth token is the second lookahead token



121
122
123

# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 121

def lookahead_tokens
  @lookahead_tokens
end

#scanner ⇒ `Object` (readonly)

: StringScanner



110
111
112

# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 110

def scanner
  @scanner
end

Instance Method Details

#advance(tree, eat: false) ⇒ `Object`

Advances the scanner

# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 172

def advance(tree, eat: false)
  consume_trivias(tree)
  last = lookahead_tokens[1]
  tree << last if eat

  lookahead_tokens[0].replace(lookahead_tokens[2])
  lookahead_tokens[1] = lookahead_tokens[3]
  lookahead_tokens[2].clear

  while s = scanner.scan(/\s+/)
    lookahead_tokens[2] << [T_WHITESPACE, s]
  end

  lookahead =
    case
    when scanner.eos?
      [K_EOF, ""]
    when s = scanner.scan(/@rbs!/)
      [K_RBSE, s]
    when s = scanner.scan(/@rbs\b/)
      [K_RBS, s]
    when s = scanner.scan(PUNCTS_RE)
      [PUNCTS.fetch(s), s]
    when s = scanner.scan(KW_RE)
      [KEYWORDS.fetch(s), s]
    when s = scanner.scan(/[A-Z]\w*/)
      [T_UIDENT, s]
    when s = scanner.scan(/_[A-Z]\w*/)
      [T_IFIDENT, s]
    when s = scanner.scan(/[a-z]\w*/)
      [T_LVAR, s]
    when s = scanner.scan(/![a-z]\w*/)
      [T_ELVAR, s]
    when s = scanner.scan(/@\w+/)
      [T_ATIDENT, s]
    when s = scanner.scan(/%a\{[^}]+\}/)
      [T_ANNOTATION, s]
    when s = scanner.scan(/%a\[[^\]]+\]/)
      [T_ANNOTATION, s]
    when s = scanner.scan(/%a\([^)]+\)/)
      [T_ANNOTATION, s]
    end #: token?

  lookahead_tokens[3] = lookahead

  last
end

#consume_token(*types, tree:) ⇒ `Object`

Consume given token type and inserts the token to the tree or ‘nil`

# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 279

def consume_token(*types, tree:)
  if type?(*types)
    advance(tree, eat: true)
  else
    tree << nil
  end
end

#consume_token!(*types, tree:) ⇒ `Object`

Consume given token type and inserts the token to the tree or raise

# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 292

def consume_token!(*types, tree:)
  type!(*types)
  advance(tree, eat: true)
end

#consume_trivias(tree) ⇒ `Object`

# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 221

def consume_trivias(tree)
  buf = +""

  lookahead_tokens[0].each do |tok|
    tree << tok if tree
    buf << tok[1]
  end
  lookahead_tokens[0].clear

  buf
end

#current_position ⇒ `Object`

Returns the current char position of the first lookahead token

“‘ __ foo _ bar baz ^^ Trivia tokens before lookahead1

^                #current_position
 ^^^             lookahead1
     ^^^         Trivia tokens between lookahead1 and lookahead2
         ^^^     lookahead2
             ^    <= scanner.charpos

“‘

# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 147

def current_position #: Integer
  start = scanner.charpos
  start -= lookahead1[1].size if lookahead1
  lookahead_tokens[2].each {|_, s| start -= s.size }
  start -= lookahead2[1].size if lookahead2
  start
end

#lookahead1 ⇒ `Object`

Token that comes after the current position



125
126
127

# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 125

def lookahead1 #: token?
  lookahead_tokens[1]
end

#lookahead2 ⇒ `Object`

Token that comes after ‘lookahead1`



131
132
133

# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 131

def lookahead2 #: token?
  lookahead_tokens[3]
end

#lookaheads ⇒ `Object`

: Array



155
156
157

# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 155

def lookaheads #: Array[Symbol?]
  [lookahead1&.[](0), lookahead2&.[](0)]
end

#reset(position, tree) ⇒ `Object`

Skips characters

This method ensures the ‘current_position` will be the given `position`.

# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 245

def reset(position, tree)
  if scanner.charpos > position
    scanner.reset()
  end

  skips = position - scanner.charpos

  if scanner.rest_size < skips
    raise "The position is bigger than the size of the rest of the input: input size=#{scanner.string.size}, position=#{position}"
  end

  scanner.skip(/.{#{skips}}/)

  @lookahead_tokens = [[], nil, [], nil]

  advance(tree)
  advance(tree)
end

#rest ⇒ `Object`

: String

# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 264

def rest #: String
  buf = +""
  lookahead_tokens[0].each {|_, s| buf << s }
  buf << lookahead1[1] if lookahead1
  lookahead_tokens[2].each {|_, s| buf << s }
  buf << lookahead2[1] if lookahead2
  buf << scanner.rest
  buf
end

#skip_to_comment ⇒ `Object`

Reset the current_token to incoming comment ‘–`

Reset to the end of the input if ‘–` token cannot be found.

# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 326

def skip_to_comment
  prefix = +""

  lookahead_tokens[0].each { prefix << _1[1] }
  lookahead_tokens[0].clear

  if type?(K_MINUS2)
    return prefix
  end

  prefix << lookahead1[1] if lookahead1
  lookahead_tokens[2].each { prefix << _1[1] }
  lookahead_tokens[2].clear

  if type2?(K_MINUS2)
    advance(_ = nil)  # The tree is unused because no trivia tokens are left
    return prefix
  end

  prefix << lookahead2[1] if lookahead2

  if string = scanner.scan_until(/--/)
    @lookahead_tokens = [[], nil, [], [K_MINUS2, "--"]]
    advance(_ = nil)  # The tree is unused because no trivia tokens are left
    prefix + string.delete_suffix("--")
  else
    s = scanner.rest
    @lookahead_tokens = [[], [K_EOF, ""], [], nil]
    scanner.terminate
    prefix + s
  end
end

#stuck? ⇒ `Boolean`

Returns true if the scanner cannot consume next token

Returns:

(Boolean)



234
235
236

# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 234

def stuck? #: bool
  lookahead1.nil? && lookahead2.nil?
end

#type!(*types) ⇒ `Object`

Ensure current token is one of the specified in types



317
318
319

# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 317

def type!(*types)
  raise "Unexpected token: #{lookahead1&.[](0)}, where expected token: #{types.join(",")}" unless type?(*types)
end

#type2?(*types) ⇒ `Boolean`

Test if lookahead2 token have specified ‘type`

Returns:

(Boolean)



309
310
311

# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 309

def type2?(*types)
  types.any? { lookahead2 && lookahead2[0] == _1 }
end

#type?(*types) ⇒ `Boolean`

Test if current token has specified ‘type`

Returns:

(Boolean)



301
302
303

# File 'lib/rbs/inline/annotation_parser/tokenizer.rb', line 301

def type?(*types)
  types.any? { lookahead1 && lookahead1[0] == _1 }
end

Class: RBS::Inline::AnnotationParser::Tokenizer

Constant Summary collapse

Constants included from Tokens

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(scanner) ⇒ Tokenizer

Instance Attribute Details

#lookahead_tokens ⇒ Object (readonly)

#scanner ⇒ Object (readonly)

Instance Method Details

#advance(tree, eat: false) ⇒ Object

#consume_token(*types, tree:) ⇒ Object

#consume_token!(*types, tree:) ⇒ Object

#consume_trivias(tree) ⇒ Object

#current_position ⇒ Object

#lookahead1 ⇒ Object

#lookahead2 ⇒ Object

#lookaheads ⇒ Object

#reset(position, tree) ⇒ Object

#rest ⇒ Object

#skip_to_comment ⇒ Object

#stuck? ⇒ Boolean

#type!(*types) ⇒ Object

#type2?(*types) ⇒ Boolean

#type?(*types) ⇒ Boolean

#initialize(scanner) ⇒ `Tokenizer`

#lookahead_tokens ⇒ `Object` (readonly)

#scanner ⇒ `Object` (readonly)

#advance(tree, eat: false) ⇒ `Object`

#consume_token(*types, tree:) ⇒ `Object`

#consume_token!(*types, tree:) ⇒ `Object`

#consume_trivias(tree) ⇒ `Object`

#current_position ⇒ `Object`

#lookahead1 ⇒ `Object`

#lookahead2 ⇒ `Object`

#lookaheads ⇒ `Object`

#reset(position, tree) ⇒ `Object`

#rest ⇒ `Object`

#skip_to_comment ⇒ `Object`

#stuck? ⇒ `Boolean`

#type!(*types) ⇒ `Object`

#type2?(*types) ⇒ `Boolean`

#type?(*types) ⇒ `Boolean`