Class: Tokenizers::Tokenizer
- Inherits:
-
Object
- Object
- Tokenizers::Tokenizer
show all
- Extended by:
- FromPretrained
- Defined in:
- lib/tokenizers/tokenizer.rb
Constant Summary
FromPretrained::TOKENIZERS_VERSION
Instance Method Summary
collapse
from_pretrained
Instance Method Details
#decode(ids, skip_special_tokens: true) ⇒ Object
21
22
23
|
# File 'lib/tokenizers/tokenizer.rb', line 21
def decode(ids, skip_special_tokens: true)
_decode(ids, skip_special_tokens)
end
|
#decode_batch(sequences, skip_special_tokens: true) ⇒ Object
25
26
27
|
# File 'lib/tokenizers/tokenizer.rb', line 25
def decode_batch(sequences, skip_special_tokens: true)
_decode_batch(sequences, skip_special_tokens)
end
|
#enable_padding(**options) ⇒ Object
29
30
31
|
# File 'lib/tokenizers/tokenizer.rb', line 29
def enable_padding(**options)
_enable_padding(options)
end
|
#enable_truncation(max_length, **options) ⇒ Object
33
34
35
|
# File 'lib/tokenizers/tokenizer.rb', line 33
def enable_truncation(max_length, **options)
_enable_truncation(max_length, options)
end
|
#encode(sequence, pair = nil, is_pretokenized: false, add_special_tokens: true) ⇒ Object
13
14
15
|
# File 'lib/tokenizers/tokenizer.rb', line 13
def encode(sequence, pair = nil, is_pretokenized: false, add_special_tokens: true)
_encode(sequence, pair, is_pretokenized, add_special_tokens)
end
|
#encode_batch(input, is_pretokenized: false, add_special_tokens: true) ⇒ Object
17
18
19
|
# File 'lib/tokenizers/tokenizer.rb', line 17
def encode_batch(input, is_pretokenized: false, add_special_tokens: true)
_encode_batch(input, is_pretokenized, add_special_tokens)
end
|
#save(path, pretty: false) ⇒ Object
9
10
11
|
# File 'lib/tokenizers/tokenizer.rb', line 9
def save(path, pretty: false)
_save(path, pretty)
end
|
#to_s(pretty: false) ⇒ Object
5
6
7
|
# File 'lib/tokenizers/tokenizer.rb', line 5
def to_s(pretty: false)
_to_s(pretty)
end
|
#vocab(with_added_tokens: true) ⇒ Object
37
38
39
|
# File 'lib/tokenizers/tokenizer.rb', line 37
def vocab(with_added_tokens: true)
_vocab(with_added_tokens)
end
|
#vocab_size(with_added_tokens: true) ⇒ Object
41
42
43
|
# File 'lib/tokenizers/tokenizer.rb', line 41
def vocab_size(with_added_tokens: true)
_vocab_size(with_added_tokens)
end
|