4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
|
# File 'lib/tokenizers/trainers/word_piece_trainer.rb', line 4
def self.new(vocab_size: 30000,
min_frequency: 0,
show_progress: true,
special_tokens: [],
limit_alphabet: nil,
initial_alphabet: [],
continuing_subword_prefix: "##",
end_of_word_suffix: nil)
_new({
vocab_size: vocab_size,
min_frequency: min_frequency,
show_progress: show_progress,
special_tokens: special_tokens,
limit_alphabet: limit_alphabet,
initial_alphabet: initial_alphabet,
continuing_subword_prefix: continuing_subword_prefix,
end_of_word_suffix: end_of_word_suffix
})
end
|