Class: CSVPlusPlus::Lexer::Tokenizer

Inherits:
Object
  • Object
show all
Extended by:
T::Sig
Defined in:
lib/csv_plus_plus/lexer/tokenizer.rb

Overview

A class that contains the use-case-specific regexes for parsing

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(tokens:, catchall: nil, ignore: nil, alter_matches: {}, stop_fn: nil) ⇒ Tokenizer

Returns a new instance of Tokenizer.

Parameters:

  • tokens (Array<Regexp, String>)

    The list of tokens to scan

  • catchall (Regexp) (defaults to: nil)

    A final regexp to try if nothing else matches

  • ignore (Regexp) (defaults to: nil)

    Ignore anything matching this regexp

  • alter_matches (Object) (defaults to: {})

    A map of matches to alter

  • stop_fn (Proc) (defaults to: nil)

    Stop parsing when this is true



29
30
31
32
33
34
35
36
37
# File 'lib/csv_plus_plus/lexer/tokenizer.rb', line 29

def initialize(tokens:, catchall: nil, ignore: nil, alter_matches: {}, stop_fn: nil)
  @last_token = ::T.let(nil, ::T.nilable(::CSVPlusPlus::Lexer::Token))

  @catchall = catchall
  @ignore = ignore
  @tokens = tokens
  @stop_fn = stop_fn
  @alter_matches = alter_matches
end

Instance Attribute Details

#last_tokenString? (readonly)

The last token that’s been matched.

Returns:

  • (String, nil)

    the current value of last_token



9
10
11
# File 'lib/csv_plus_plus/lexer/tokenizer.rb', line 9

def last_token
  @last_token
end

Instance Method Details

#last_matchString?

The value of the last token matched

Returns:

  • (String, nil)


93
94
95
96
97
98
99
100
101
# File 'lib/csv_plus_plus/lexer/tokenizer.rb', line 93

def last_match
  # rubocop:disable Style/MissingElse
  if @last_token && @alter_matches.key?(@last_token.token.to_sym)
    # rubocop:enable Style/MissingElse
    return ::T.must(@alter_matches[@last_token.token.to_sym]).call(scanner.matched)
  end

  scanner.matched
end

#matches_ignore?boolean

Scan input against the ignore pattern

Returns:

  • (boolean)


85
86
87
# File 'lib/csv_plus_plus/lexer/tokenizer.rb', line 85

def matches_ignore?
  scanner.scan(@ignore) if @ignore
end

#peek(peek_characters: 100) ⇒ String

Read the input but don’t consume it

Parameters:

  • peek_characters (Integer) (defaults to: 100)

Returns:

  • (String)


109
110
111
# File 'lib/csv_plus_plus/lexer/tokenizer.rb', line 109

def peek(peek_characters: 100)
  scanner.peek(peek_characters)
end

#restString

The rest of the un-parsed input. The tokenizer might not need to parse the entire input

Returns:

  • (String)


125
126
127
# File 'lib/csv_plus_plus/lexer/tokenizer.rb', line 125

def rest
  scanner.rest
end

#scan(input) ⇒ Tokenizer

Initializers a scanner for the given input to be parsed

Parameters:

  • input

    The input to be tokenized

Returns:



45
46
47
48
# File 'lib/csv_plus_plus/lexer/tokenizer.rb', line 45

def scan(input)
  @scanner = ::T.let(::StringScanner.new(input.strip), ::T.nilable(::StringScanner))
  self
end

#scan_catchallString?

Scan input against the catchall pattern

Returns:

  • (String, nil)


77
78
79
# File 'lib/csv_plus_plus/lexer/tokenizer.rb', line 77

def scan_catchall
  scanner.scan(@catchall) if @catchall
end

#scan_tokens!String?

Scan tokens and set @last_token if any match

Returns:

  • (String, nil)


69
70
71
# File 'lib/csv_plus_plus/lexer/tokenizer.rb', line 69

def scan_tokens!
  @last_token = @tokens.find { |t| scanner.scan(t.regexp) }
end

#scannerStringScanner

Returns the currently initialized StringScanner. You must call #scan first or else this will throw an exception.

Returns:

  • (StringScanner)


55
56
57
58
59
60
61
62
63
# File 'lib/csv_plus_plus/lexer/tokenizer.rb', line 55

def scanner
  # The caller needs to initialize this class with a call to #scan before we can do anything.  it sets up the
  # +@scanner+ with it's necessary input.
  unless @scanner
    raise(::CSVPlusPlus::Error::CompilerError, 'Called Tokenizer#scanner without calling #scan first')
  end

  @scanner
end

#stop?boolean

Scan for our stop token (if there is one - some parsers stop early and some don’t)

Returns:

  • (boolean)


117
118
119
# File 'lib/csv_plus_plus/lexer/tokenizer.rb', line 117

def stop?
  @stop_fn ? @stop_fn.call(scanner) : false
end