Class: Tokenizers::Trainers::WordPieceTrainer

Inherits:
Object
  • Object
show all
Defined in:
lib/tokenizers/trainers/word_piece_trainer.rb

Class Method Summary collapse

Class Method Details

.new(vocab_size: 30000, min_frequency: 0, show_progress: true, special_tokens: [], limit_alphabet: nil, initial_alphabet: [], continuing_subword_prefix: "##", end_of_word_suffix: nil) ⇒ Object



4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
# File 'lib/tokenizers/trainers/word_piece_trainer.rb', line 4

def self.new(vocab_size: 30000,
             min_frequency: 0,
             show_progress: true,
             special_tokens: [],
             limit_alphabet: nil,
             initial_alphabet: [],
             continuing_subword_prefix: "##",
             end_of_word_suffix: nil)

  _new({
    vocab_size: vocab_size,
    min_frequency: min_frequency,
    show_progress: show_progress,
    special_tokens: special_tokens,
    limit_alphabet: limit_alphabet,
    initial_alphabet: initial_alphabet,
    continuing_subword_prefix: continuing_subword_prefix,
    end_of_word_suffix: end_of_word_suffix
  })
end