Module: Tokenizers

Defined in:
lib/tokenizers.rb,
lib/tokenizers/version.rb,
lib/tokenizers/encoding.rb,
lib/tokenizers/tokenizer.rb,
lib/tokenizers/models/bpe.rb,
lib/tokenizers/decoders/ctc.rb,
lib/tokenizers/decoders/strip.rb,
lib/tokenizers/models/unigram.rb,
lib/tokenizers/from_pretrained.rb,
lib/tokenizers/models/word_level.rb,
lib/tokenizers/models/word_piece.rb,
lib/tokenizers/normalizers/strip.rb,
lib/tokenizers/char_bpe_tokenizer.rb,
lib/tokenizers/decoders/metaspace.rb,
lib/tokenizers/decoders/word_piece.rb,
lib/tokenizers/normalizers/prepend.rb,
lib/tokenizers/decoders/bpe_decoder.rb,
lib/tokenizers/pre_tokenizers/split.rb,
lib/tokenizers/trainers/bpe_trainer.rb,
lib/tokenizers/pre_tokenizers/digits.rb,
lib/tokenizers/processors/byte_level.rb,
lib/tokenizers/pre_tokenizers/metaspace.rb,
lib/tokenizers/trainers/unigram_trainer.rb,
lib/tokenizers/pre_tokenizers/byte_level.rb,
lib/tokenizers/pre_tokenizers/punctuation.rb,
lib/tokenizers/normalizers/bert_normalizer.rb,
lib/tokenizers/trainers/word_level_trainer.rb,
lib/tokenizers/trainers/word_piece_trainer.rb,
lib/tokenizers/processors/roberta_processing.rb,
lib/tokenizers/processors/template_processing.rb

Defined Under Namespace

Modules: Decoders, FromPretrained, Models, Normalizers, PreTokenizers, Processors, Trainers Classes: CharBPETokenizer, Encoding, Error, Tokenizer

Constant Summary collapse

VERSION =
"0.4.3"

Class Method Summary collapse

Class Method Details

.from_fileObject



58
59
60
# File 'lib/tokenizers.rb', line 58

def self.from_file(...)
  Tokenizer.from_file(...)
end

.from_pretrainedObject



54
55
56
# File 'lib/tokenizers.rb', line 54

def self.from_pretrained(...)
  Tokenizer.from_pretrained(...)
end