Class: RubyLsp::Document::Scanner

Inherits:
Object
  • Object
show all
Extended by:
T::Sig
Defined in:
lib/ruby_lsp/document.rb

Constant Summary collapse

LINE_BREAK =
T.let(0x0A, Integer)
SURROGATE_PAIR_START =

After character 0xFFFF, UTF-16 considers characters to have length 2 and we have to account for that

T.let(0xFFFF, Integer)

Instance Method Summary collapse

Constructor Details

#initialize(source, encoding) ⇒ Scanner

Returns a new instance of Scanner.



101
102
103
104
105
106
# File 'lib/ruby_lsp/document.rb', line 101

def initialize(source, encoding)
  @current_line = T.let(0, Integer)
  @pos = T.let(0, Integer)
  @source = T.let(source.codepoints, T::Array[Integer])
  @encoding = encoding
end

Instance Method Details

#find_char_position(position) ⇒ Object



110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/ruby_lsp/document.rb', line 110

def find_char_position(position)
  # Find the character index for the beginning of the requested line
  until @current_line == position[:line]
    @pos += 1 until LINE_BREAK == @source[@pos]
    @pos += 1
    @current_line += 1
  end

  # The final position is the beginning of the line plus the requested column. If the encoding is UTF-16, we also
  # need to adjust for surrogate pairs
  requested_position = @pos + position[:character]
  requested_position -= utf_16_character_position_correction(@pos, requested_position) if @encoding == "utf-16"
  requested_position
end

#utf_16_character_position_correction(current_position, requested_position) ⇒ Object



128
129
130
131
132
133
134
135
136
137
138
139
# File 'lib/ruby_lsp/document.rb', line 128

def utf_16_character_position_correction(current_position, requested_position)
  utf16_unicode_correction = 0

  until current_position == requested_position
    codepoint = @source[current_position]
    utf16_unicode_correction += 1 if codepoint && codepoint > SURROGATE_PAIR_START

    current_position += 1
  end

  utf16_unicode_correction
end