Class: RubyLsp::Document::Utf16Scanner

Inherits:
Scanner
  • Object
show all
Defined in:
lib/ruby_lsp/document.rb

Overview

For the UTF-16 encoding, positions correspond to UTF-16 code units, which count characters beyond the surrogate pair as length 2

Constant Summary

Constants inherited from Scanner

Scanner::LINE_BREAK, Scanner::SURROGATE_PAIR_START

Instance Method Summary collapse

Constructor Details

#initialize(source) ⇒ Utf16Scanner

: (String) -> void



268
269
270
271
# File 'lib/ruby_lsp/document.rb', line 268

def initialize(source)
  super()
  @codepoints = source.codepoints #: Array[Integer]
end

Instance Method Details

#find_char_position(position) ⇒ Object

: (Hash[Symbol, untyped] position) -> Integer



275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
# File 'lib/ruby_lsp/document.rb', line 275

def find_char_position(position)
  # Find the character index for the beginning of the requested line
  until @current_line == position[:line]
    codepoint = @codepoints[@pos] #: Integer?
    raise InvalidLocationError unless codepoint

    until LINE_BREAK == @codepoints[@pos]
      @pos += 1
      codepoint = @codepoints[@pos] #: Integer?
      raise InvalidLocationError unless codepoint
    end

    @pos += 1
    @current_line += 1
  end

  # The final position is the beginning of the line plus the requested column. If the encoding is UTF-16, we also
  # need to adjust for surrogate pairs
  line_characters = 0
  line_code_units = 0

  while line_code_units < position[:character]
    code_point = @codepoints[@pos + line_characters]
    raise InvalidLocationError unless code_point

    line_code_units += if code_point > SURROGATE_PAIR_START
      2 # Surrogate pair, so we skip the next code unit
    else
      1 # Single code unit character
    end

    line_characters += 1
  end

  @pos + line_characters
end