Class: Transformers::DebertaV2::DebertaV2TokenizerFast
- Inherits:
-
PreTrainedTokenizerFast
- Object
- PreTrainedTokenizerBase
- PreTrainedTokenizerFast
- Transformers::DebertaV2::DebertaV2TokenizerFast
- Defined in:
- lib/transformers/models/deberta_v2/tokenization_deberta_v2_fast.rb
Constant Summary collapse
- VOCAB_FILES_NAMES =
{vocab_file: "spm.model", tokenizer_file: "tokenizer.json"}
Constants included from SpecialTokensMixin
SpecialTokensMixin::SPECIAL_TOKENS_ATTRIBUTES
Instance Attribute Summary
Attributes inherited from PreTrainedTokenizerBase
#init_kwargs, #model_max_length
Instance Method Summary collapse
- #build_inputs_with_special_tokens(token_ids_0, token_ids_1: nil) ⇒ Object
- #can_save_slow_tokenizer ⇒ Object
- #create_token_type_ids_from_sequences(token_ids_0, token_ids_1: nil) ⇒ Object
- #get_special_tokens_mask(token_ids_0, token_ids_1: nil, already_has_special_tokens: false) ⇒ Object
-
#initialize(vocab_file: nil, tokenizer_file: nil, do_lower_case: false, split_by_punct: false, bos_token: "[CLS]", eos_token: "[SEP]", unk_token: "[UNK]", sep_token: "[SEP]", pad_token: "[PAD]", cls_token: "[CLS]", mask_token: "[MASK]", **kwargs) ⇒ DebertaV2TokenizerFast
constructor
self.slow_tokenizer_class = DebertaV2Tokenizer.
Methods inherited from PreTrainedTokenizerFast
#_convert_token_to_id_with_added_voc, #backend_tokenizer, #convert_ids_to_tokens, #convert_tokens_to_ids, #convert_tokens_to_string, #get_vocab, #is_fast, #vocab
Methods inherited from PreTrainedTokenizerBase
#_eventual_warn_about_too_long_sequence, _from_pretrained, #call, from_pretrained
Methods included from ClassAttribute
Methods included from SpecialTokensMixin
#bos_token_id, #cls_token_id, #eos_token_id, #pad_token_id, #sep_token_id, #special_tokens_map, #unk_token_id
Constructor Details
#initialize(vocab_file: nil, tokenizer_file: nil, do_lower_case: false, split_by_punct: false, bos_token: "[CLS]", eos_token: "[SEP]", unk_token: "[UNK]", sep_token: "[SEP]", pad_token: "[PAD]", cls_token: "[CLS]", mask_token: "[MASK]", **kwargs) ⇒ DebertaV2TokenizerFast
self.slow_tokenizer_class = DebertaV2Tokenizer
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 |
# File 'lib/transformers/models/deberta_v2/tokenization_deberta_v2_fast.rb', line 23 def initialize( vocab_file: nil, tokenizer_file: nil, do_lower_case: false, split_by_punct: false, bos_token: "[CLS]", eos_token: "[SEP]", unk_token: "[UNK]", sep_token: "[SEP]", pad_token: "[PAD]", cls_token: "[CLS]", mask_token: "[MASK]", **kwargs ) super(vocab_file, tokenizer_file: tokenizer_file, do_lower_case: do_lower_case, bos_token: bos_token, eos_token: eos_token, unk_token: unk_token, sep_token: sep_token, pad_token: pad_token, cls_token: cls_token, mask_token: mask_token, split_by_punct: split_by_punct, **kwargs) @do_lower_case = do_lower_case @split_by_punct = split_by_punct @vocab_file = vocab_file end |
Instance Method Details
#build_inputs_with_special_tokens(token_ids_0, token_ids_1: nil) ⇒ Object
48 49 50 51 52 53 54 55 |
# File 'lib/transformers/models/deberta_v2/tokenization_deberta_v2_fast.rb', line 48 def build_inputs_with_special_tokens(token_ids_0, token_ids_1: nil) if token_ids_1.nil? return [@cls_token_id] + token_ids_0 + [@sep_token_id] end cls = [@cls_token_id] sep = [@sep_token_id] cls + token_ids_0 + sep + token_ids_1 + sep end |
#can_save_slow_tokenizer ⇒ Object
44 45 46 |
# File 'lib/transformers/models/deberta_v2/tokenization_deberta_v2_fast.rb', line 44 def can_save_slow_tokenizer @vocab_file ? File.exist?(@vocab_file) : false end |
#create_token_type_ids_from_sequences(token_ids_0, token_ids_1: nil) ⇒ Object
68 69 70 71 72 73 74 75 |
# File 'lib/transformers/models/deberta_v2/tokenization_deberta_v2_fast.rb', line 68 def create_token_type_ids_from_sequences(token_ids_0, token_ids_1: nil) sep = [@sep_token_id] cls = [@cls_token_id] if token_ids_1.nil? return (cls + token_ids_0 + sep).length * [0] end ((cls + token_ids_0 + sep).length * [0]) + ((token_ids_1 + sep).length * [1]) end |
#get_special_tokens_mask(token_ids_0, token_ids_1: nil, already_has_special_tokens: false) ⇒ Object
57 58 59 60 61 62 63 64 65 66 |
# File 'lib/transformers/models/deberta_v2/tokenization_deberta_v2_fast.rb', line 57 def get_special_tokens_mask(token_ids_0, token_ids_1: nil, already_has_special_tokens: false) if already_has_special_tokens return super(token_ids_0: token_ids_0, token_ids_1: token_ids_1, already_has_special_tokens: true) end if !token_ids_1.nil? return [1] + ([0] * token_ids_0.length) + [1] + ([0] * token_ids_1.length) + [1] end [1] + ([0] * token_ids_0.length) + [1] end |