Class: RubyLsp::Document::Utf8Scanner

Inherits:
Scanner
  • Object
show all
Defined in:
lib/ruby_lsp/document.rb

Overview

For the UTF-8 encoding, positions correspond to bytes

Constant Summary

Constants inherited from Scanner

Scanner::LINE_BREAK, Scanner::SURROGATE_PAIR_START

Instance Method Summary collapse

Constructor Details

#initialize(source) ⇒ Utf8Scanner

: (String source) -> void



205
206
207
208
209
# File 'lib/ruby_lsp/document.rb', line 205

def initialize(source)
  super()
  @bytes = source.bytes #: Array[Integer]
  @character_length = 0 #: Integer
end

Instance Method Details

#find_char_position(position) ⇒ Object

: (Hash[Symbol, untyped] position) -> Integer



213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
# File 'lib/ruby_lsp/document.rb', line 213

def find_char_position(position)
  # Each group of bytes is a character. We advance based on the number of bytes to count how many full characters
  # we have in the requested offset
  until @current_line == position[:line]
    byte = @bytes[@pos] #: Integer?
    raise InvalidLocationError unless byte

    until LINE_BREAK == byte
      @pos += character_byte_length(byte)
      @character_length += 1
      byte = @bytes[@pos]
      raise InvalidLocationError unless byte
    end

    @pos += 1
    @character_length += 1
    @current_line += 1
  end

  # @character_length has the number of characters until the beginning of the line. We don't accumulate on it for
  # the character part because locating the same position twice must return the same value
  line_byte_offset = 0
  line_characters = 0

  while line_byte_offset < position[:character]
    byte = @bytes[@pos + line_byte_offset] #: Integer?
    raise InvalidLocationError unless byte

    line_byte_offset += character_byte_length(byte)
    line_characters += 1
  end

  @character_length + line_characters
end