Module: CSVPlusPlus::Lexer

Extended by:
T::Sig
Defined in:
lib/csv_plus_plus/lexer.rb,
lib/csv_plus_plus/lexer/tokenizer.rb,
lib/csv_plus_plus/lexer/racc_lexer.rb

Overview

Code for tokenizing a csvpp file

Defined Under Namespace

Modules: RaccLexer Classes: Token, Tokenizer

Constant Summary collapse

END_OF_CODE_SECTION =
'---'
VARIABLE_REF =
'$$'
TOKEN_LIBRARY =
::T.let(
  {
    # A1_NOTATION: ::CSVPlusPlus::Lexer::Token.new(
    # regexp: ::CSVPlusPlus::A1Reference::A1_NOTATION_REGEXP, token: :A1_NOTATION
    # ),
    FALSE: ::CSVPlusPlus::Lexer::Token.new(regexp: /false/i, token: :FALSE),
    HEX_COLOR: ::CSVPlusPlus::Lexer::Token.new(regexp: ::CSVPlusPlus::Color::HEX_STRING_REGEXP, token: :HEX_COLOR),
    INFIX_OP: ::CSVPlusPlus::Lexer::Token.new(regexp: %r{\^|\+|-|\*|/|&|<|>|<=|>=|<>}, token: :INFIX_OP),
    NUMBER: ::CSVPlusPlus::Lexer::Token.new(regexp: /-?[\d.]+/, token: :NUMBER),
    REF: ::CSVPlusPlus::Lexer::Token.new(regexp: /[$!\w:]+/, token: :REF),
    STRING: ::CSVPlusPlus::Lexer::Token.new(
      regexp: %r{"(?:[^"\\]|\\(?:["\\/bfnrt]|u[0-9a-fA-F]{4}))*"},
      token: :STRING
    ),
    TRUE: ::CSVPlusPlus::Lexer::Token.new(regexp: /true/i, token: :TRUE),
    VAR_REF: ::CSVPlusPlus::Lexer::Token.new(regexp: /\$\$/, token: :VAR_REF)
  }.freeze,
  ::T::Hash[::Symbol, ::CSVPlusPlus::Lexer::Token]
)
RaccToken =

TODO: ugh clean this up

::T.type_alias do
  ::T.any(
    [::String, ::Symbol],
    [::Symbol, ::String],
    [::String, ::String],
    [::Symbol, ::Symbol],
    [::FalseClass, ::FalseClass]
  )
end

Class Method Summary collapse

Class Method Details

.preprocess(str) ⇒ String

Run any transformations to the input before going into the CSV parser

The CSV parser in particular does not like if there is whitespace after a double quote and before the next comma

Parameters:

  • str (String)

Returns:

  • (String)


53
54
55
# File 'lib/csv_plus_plus/lexer.rb', line 53

def self.preprocess(str)
  str.gsub(/"\s*,/, '",')
end

.unquote(str) ⇒ ::String

When parsing a modifier with a quoted string field, we need a way to unescape. Some examples of quoted and unquoted results:

  • “just a string” => “just a string”

  • “‘ this is a string’” => “this is a string”

  • “won't this work?” => “won’t this work”

Parameters:

  • str (::String)

Returns:

  • (::String)


68
69
70
71
72
73
74
75
# File 'lib/csv_plus_plus/lexer.rb', line 68

def self.unquote(str)
  # could probably do this with one regex but we do it in 3 steps:
  #
  # 1. remove leading and trailing spaces and '
  # 2. remove any backslashes that are by themselves (none on either side)
  # 3. turn double backslashes into singles
  str.gsub(/^\s*'?|'?\s*$/, '').gsub(/([^\\]+)\\([^\\]+)/, '\1\2').gsub(/\\\\/, '\\')
end