Class: Tiktoken::Encoding

Inherits:
Object
  • Object
show all
Defined in:
lib/tiktoken_ruby/encoding.rb

Constant Summary collapse

CACHE_MUTEX =
Mutex.new

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Instance Attribute Details

#nameObject (readonly)

Returns the value of attribute name.



6
7
8
# File 'lib/tiktoken_ruby/encoding.rb', line 6

def name
  @name
end

Class Method Details

.for_name(encoding) ⇒ Tiktoken::Encoding

This returns a new Tiktoken::Encoding instance for the requested encoding

Parameters:

  • encoding (Symbol)

    The name of the encoding to load

Returns:



11
12
13
# File 'lib/tiktoken_ruby/encoding.rb', line 11

def self.for_name(encoding)
  Tiktoken::Encoding.new(Tiktoken::BpeFactory.send(encoding.to_sym), encoding.to_sym)
end

.for_name_cached(encoding) ⇒ Tiktoken::Encoding

This returns a Tiktoken::Encoding instance for the requested encoding It will reuse an existing encoding if it’s already been loaded

Parameters:

  • encoding (Symbol)

    The name of the encoding to load

Returns:



19
20
21
22
23
24
# File 'lib/tiktoken_ruby/encoding.rb', line 19

def self.for_name_cached(encoding)
  CACHE_MUTEX.synchronize do
    @encodings ||= {}
    @encodings[encoding.to_sym] ||= Tiktoken::Encoding.for_name(encoding)
  end
end

Instance Method Details

#decode(tokens) ⇒ String

Decodes the tokens back into text

Parameters:

  • tokens (Array<Integer>)

    The tokens to decode

Returns:

  • (String)

    The decoded text



53
54
55
# File 'lib/tiktoken_ruby/encoding.rb', line 53

def decode(tokens)
  @ext_base_bpe.decode(tokens)
end

#encode(text, allowed_special: []) ⇒ Array<Integer>

Encodes the text as a list of integer tokens. This encoding will treat special non text tokens as text unless they’re in the allowed_special array. It’s basically like the text was escaped

Parameters:

  • text (String)

    The text to encode

  • allowed_special (Array<String>) (defaults to: [])

    An array of special tokens to allow

Returns:

  • (Array<Integer>)

    The encoded tokens



39
40
41
# File 'lib/tiktoken_ruby/encoding.rb', line 39

def encode(text, allowed_special: [])
  @ext_base_bpe.encode(text, allowed_special)
end

#encode_ordinary(text) ⇒ Array<Integer>

Encodes the text as a list of integer tokens. This encoding will encode special non text tokens basically it’s unescaped

Parameters:

  • text (String)

    The text to encode

Returns:

  • (Array<Integer>)

    The encoded tokens



30
31
32
# File 'lib/tiktoken_ruby/encoding.rb', line 30

def encode_ordinary(text)
  @ext_base_bpe.encode_ordinary(text)
end

#encode_with_special_tokens(text) ⇒ Array<Integer>

Encodes the text as a list of integer tokens, including special tokens.

Parameters:

  • text (String)

    The text to encode

Returns:

  • (Array<Integer>)

    The encoded tokens



46
47
48
# File 'lib/tiktoken_ruby/encoding.rb', line 46

def encode_with_special_tokens(text)
  @ext_base_bpe.encode_with_special_tokens(text)
end