Class: Transformers::Bert::BertTokenizer::BasicTokenizer

Inherits:
Object
  • Object
show all
Defined in:
lib/transformers/models/bert/tokenization_bert.rb

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(do_lower_case: true, never_split: nil, tokenize_chinese_chars: true, strip_accents: nil, do_split_on_punc: true) ⇒ BasicTokenizer

Returns a new instance of BasicTokenizer.



21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
# File 'lib/transformers/models/bert/tokenization_bert.rb', line 21

def initialize(
  do_lower_case: true,
  never_split: nil,
  tokenize_chinese_chars: true,
  strip_accents: nil,
  do_split_on_punc: true
)
  if never_split.nil?
    never_split = []
  end
  @do_lower_case = do_lower_case
  @never_split = Set.new(never_split)
  @tokenize_chinese_chars = tokenize_chinese_chars
  @strip_accents = strip_accents
  @do_split_on_punc = do_split_on_punc
end

Instance Attribute Details

#do_lower_caseObject (readonly)

Returns the value of attribute do_lower_case.



19
20
21
# File 'lib/transformers/models/bert/tokenization_bert.rb', line 19

def do_lower_case
  @do_lower_case
end

#do_split_on_puncObject (readonly)

Returns the value of attribute do_split_on_punc.



19
20
21
# File 'lib/transformers/models/bert/tokenization_bert.rb', line 19

def do_split_on_punc
  @do_split_on_punc
end

#never_splitObject (readonly)

Returns the value of attribute never_split.



19
20
21
# File 'lib/transformers/models/bert/tokenization_bert.rb', line 19

def never_split
  @never_split
end

#strip_accentsObject (readonly)

Returns the value of attribute strip_accents.



19
20
21
# File 'lib/transformers/models/bert/tokenization_bert.rb', line 19

def strip_accents
  @strip_accents
end

#tokenize_chinese_charsObject (readonly)

Returns the value of attribute tokenize_chinese_chars.



19
20
21
# File 'lib/transformers/models/bert/tokenization_bert.rb', line 19

def tokenize_chinese_chars
  @tokenize_chinese_chars
end