Module: Transformers

Defined in:
lib/transformers/utils/hub.rb,
lib/transformers.rb,
lib/transformers/version.rb,
lib/transformers/ruby_utils.rb,
lib/transformers/activations.rb,
lib/transformers/image_utils.rb,
lib/transformers/torch_utils.rb,
lib/transformers/utils/_init.rb,
lib/transformers/hf_hub/errors.rb,
lib/transformers/utils/generic.rb,
lib/transformers/utils/logging.rb,
lib/transformers/modeling_utils.rb,
lib/transformers/pipelines/base.rb,
lib/transformers/pipelines/_init.rb,
lib/transformers/hf_hub/constants.rb,
lib/transformers/image_transforms.rb,
lib/transformers/modeling_outputs.rb,
lib/transformers/pipelines/pt_utils.rb,
lib/transformers/tokenization_utils.rb,
lib/transformers/utils/import_utils.rb,
lib/transformers/configuration_utils.rb,
lib/transformers/pipelines/embedding.rb,
lib/transformers/pipelines/reranking.rb,
lib/transformers/dynamic_module_utils.rb,
lib/transformers/hf_hub/file_download.rb,
lib/transformers/hf_hub/utils/_errors.rb,
lib/transformers/sentence_transformer.rb,
lib/transformers/data/processors/squad.rb,
lib/transformers/hf_hub/utils/_headers.rb,
lib/transformers/image_processing_base.rb,
lib/transformers/convert_slow_tokenizer.rb,
lib/transformers/image_processing_utils.rb,
lib/transformers/models/vit/modeling_vit.rb,
lib/transformers/tokenization_utils_base.rb,
lib/transformers/tokenization_utils_fast.rb,
lib/transformers/feature_extraction_utils.rb,
lib/transformers/models/auto/auto_factory.rb,
lib/transformers/models/auto/modeling_auto.rb,
lib/transformers/models/bert/modeling_bert.rb,
lib/transformers/models/mpnet/modeling_mpnet.rb,
lib/transformers/models/vit/configuration_vit.rb,
lib/transformers/pipelines/feature_extraction.rb,
lib/transformers/pipelines/question_answering.rb,
lib/transformers/models/auto/tokenization_auto.rb,
lib/transformers/models/bert/tokenization_bert.rb,
lib/transformers/pipelines/text_classification.rb,
lib/transformers/models/auto/configuration_auto.rb,
lib/transformers/models/bert/configuration_bert.rb,
lib/transformers/pipelines/image_classification.rb,
lib/transformers/pipelines/token_classification.rb,
lib/transformers/models/vit/image_processing_vit.rb,
lib/transformers/models/mpnet/configuration_mpnet.rb,
lib/transformers/models/auto/image_processing_auto.rb,
lib/transformers/models/bert/tokenization_bert_fast.rb,
lib/transformers/pipelines/image_feature_extraction.rb,
lib/transformers/models/auto/feature_extraction_auto.rb,
lib/transformers/models/mpnet/tokenization_mpnet_fast.rb,
lib/transformers/models/deberta_v2/modeling_deberta_v2.rb,
lib/transformers/models/distilbert/modeling_distilbert.rb,
lib/transformers/models/xlm_roberta/modeling_xlm_roberta.rb,
lib/transformers/models/distilbert/tokenization_distilbert.rb,
lib/transformers/models/deberta_v2/configuration_deberta_v2.rb,
lib/transformers/models/distilbert/configuration_distilbert.rb,
lib/transformers/models/xlm_roberta/configuration_xlm_roberta.rb,
lib/transformers/models/deberta_v2/tokenization_deberta_v2_fast.rb,
lib/transformers/models/distilbert/tokenization_distilbert_fast.rb,
lib/transformers/models/xlm_roberta/tokenization_xlm_roberta_fast.rb

Overview

Copyright 2018 Google AI, Google Brain and Carnegie Mellon University Authors and the HuggingFace Inc. team.

Licensed under the Apache License, Version 2.0 (the “License”); you may not use this file except in compliance with the License. You may obtain a copy of the License at

http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an “AS IS” BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License

Defined Under Namespace

Modules: Activations, Bert, ClassAttribute, ConvertSlowTokenizer, Copy, DebertaV2, Distilbert, DynamicModuleUtils, HfHub, ImageProcessingUtils, ImageTransforms, ImageUtils, ModelingUtils, ModuleUtilsMixin, Mpnet, Pipelines, SpecialTokensMixin, TorchUtils, Utils, Vit, XlmRoberta Classes: AggregationStrategy, ArgumentHandler, AutoConfig, AutoImageProcessor, AutoModel, AutoModelForImageClassification, AutoModelForMaskedLM, AutoModelForQuestionAnswering, AutoModelForSequenceClassification, AutoModelForTokenClassification, AutoTokenizer, BaseAutoModelClass, BaseImageProcessor, BaseModelOutput, BaseModelOutputWithPastAndCrossAttentions, BaseModelOutputWithPooling, BaseModelOutputWithPoolingAndCrossAttentions, BatchEncoding, BatchFeature, ChannelDimension, ChunkPipeline, ClassInstantier, ClassificationFunction, EmbeddingPipeline, Error, ExplicitEnum, FeatureExtractionPipeline, GELUActivation, ImageClassificationPipeline, ImageClassifierOutput, ImageFeatureExtractionPipeline, ImageProcessingMixin, LazyAutoMapping, LazyConfigMapping, MaskedLMOutput, ModelOutput, PaddingStrategy, Pipeline, PipelineDataset, PipelineIterator, PipelineRegistry, PreTrainedModel, PreTrainedTokenizer, PreTrainedTokenizerBase, PreTrainedTokenizerFast, PretrainedConfig, QuestionAnsweringArgumentHandler, QuestionAnsweringModelOutput, QuestionAnsweringPipeline, RerankingPipeline, SentenceTransformer, SequenceClassifierOutput, SquadExample, SquadFeatures, TensorType, TextClassificationPipeline, Todo, TokenClassificationArgumentHandler, TokenClassificationPipeline, TokenClassifierOutput, TruncationStrategy

Constant Summary collapse

VERSION =
"0.1.6"
ACT2CLS =
{
  "gelu" => GELUActivation
}
ACT2FN =
ClassInstantier.new(ACT2CLS)
WEIGHTS_NAME =
"pytorch_model.bin"
WEIGHTS_INDEX_NAME =
"pytorch_model.bin.index.json"
TF2_WEIGHTS_NAME =
"tf_model.h5"
TF2_WEIGHTS_INDEX_NAME =
"tf_model.h5.index.json"
TF_WEIGHTS_NAME =
"model.ckpt"
FLAX_WEIGHTS_NAME =
"flax_model.msgpack"
FLAX_WEIGHTS_INDEX_NAME =
"flax_model.msgpack.index.json"
SAFE_WEIGHTS_NAME =
"model.safetensors"
SAFE_WEIGHTS_INDEX_NAME =
"model.safetensors.index.json"
CONFIG_NAME =
"config.json"
FEATURE_EXTRACTOR_NAME =
"preprocessor_config.json"
IMAGE_PROCESSOR_NAME =
FEATURE_EXTRACTOR_NAME
PROCESSOR_NAME =
"processor_config.json"
GENERATION_CONFIG_NAME =
"generation_config.json"
MODEL_CARD_NAME =
"modelcard.json"
LOG_LEVELS =

TODO add detail

{
  "debug" => Logger::DEBUG,
  "info" => Logger::INFO,
  "warning" => Logger::WARN,
  "error" => Logger::ERROR,
  "critical" => Logger::FATAL
}
DEFAULT_LOG_LEVEL =
Logger::WARN
TASK_ALIASES =
{
  "sentiment-analysis" => "text-classification",
  "ner" => "token-classification"
}
SUPPORTED_TASKS =
{
  "feature-extraction" => {
    "impl" => FeatureExtractionPipeline,
    "pt" => [AutoModel],
    "default" => {
      "model" => {
        "pt" => ["distilbert/distilbert-base-cased", "6ea8117"]
      }
    },
    "type" => "multimodal"
  },
  "text-classification" => {
    "impl" => TextClassificationPipeline,
    "pt" => [AutoModelForSequenceClassification],
    "default" => {
      "model" => {
        "pt" => ["distilbert/distilbert-base-uncased-finetuned-sst-2-english", "714eb0f"]
      }
    },
    "type" => "text"
  },
  "token-classification" => {
    "impl" => TokenClassificationPipeline,
    "pt" => [AutoModelForTokenClassification],
    "default" => {
      "model" => {
        "pt" => ["dbmdz/bert-large-cased-finetuned-conll03-english", "4c53496"]
      }
    },
    "type" => "text"
  },
  "question-answering" => {
    "impl" => QuestionAnsweringPipeline,
    "pt" => [AutoModelForQuestionAnswering],
    "default" => {
      "model" => {
        "pt" => ["distilbert/distilbert-base-cased-distilled-squad", "564e9b5"]
      }
    },
    "type" => "text"
  },
  "image-classification" => {
    "impl" => ImageClassificationPipeline,
    "pt" => [AutoModelForImageClassification],
    "default" => {
      "model" => {
        "pt" => ["google/vit-base-patch16-224", "3f49326"]
      }
    },
    "type" => "image"
  },
  "image-feature-extraction" => {
    "impl" => ImageFeatureExtractionPipeline,
    "pt" => [AutoModel],
    "default" => {
      "model" => {
        "pt" => ["google/vit-base-patch16-224", "3f49326"]
      }
    },
    "type" => "image"
  },
  "embedding" => {
    "impl" => EmbeddingPipeline,
    "pt" => [AutoModel],
    "default" => {
      "model" => {
        "pt" => ["sentence-transformers/all-MiniLM-L6-v2", "8b3219a"]
      }
    },
    "type" => "text"
  },
  "reranking" => {
    "impl" => RerankingPipeline,
    "pt" => [AutoModelForSequenceClassification],
    "default" => {
      "model" => {
        "pt" => ["mixedbread-ai/mxbai-rerank-base-v1", "03241da"]
      }
    },
    "type" => "text"
  }
}
PIPELINE_REGISTRY =
PipelineRegistry.new(supported_tasks: SUPPORTED_TASKS, task_aliases: TASK_ALIASES)
ViTForImageClassification =
Vit::ViTForImageClassification
VERY_LARGE_INTEGER =

This is used to set the max input length for a model with infinite size input

1e30.to_i
LARGE_INTEGER =

This is used when we need something big but slightly smaller than VERY_LARGE_INTEGER

1e20.to_i
SPECIAL_TOKENS_MAP_FILE =

Slow tokenizers used to be saved in three separated files

"special_tokens_map.json"
ADDED_TOKENS_FILE =
"added_tokens.json"
TOKENIZER_CONFIG_FILE =
"tokenizer_config.json"
FULL_TOKENIZER_FILE =

Fast tokenizers (provided by HuggingFace tokenizer’s library) can be saved in a single file

"tokenizer.json"
MODEL_MAPPING_NAMES =
{
  "bert" => "BertModel",
  "deberta-v2" => "DebertaV2Model",
  "distilbert" => "DistilBertModel",
  "mpnet" => "MPNetModel",
  "vit" => "ViTModel",
  "xlm-roberta" => "XLMRobertaModel"
}
MODEL_FOR_MASKED_LM_MAPPING_NAMES =
{
  "bert" => "BertForMaskedLM",
  "mpnet" => "MPNetForMaskedLM"
}
MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES =
{
  "bert" => "BertForSequenceClassification",
  "deberta-v2" => "DebertaV2ForSequenceClassification",
  "distilbert" => "DistilBertForSequenceClassification",
  "xlm-roberta" => "XLMRobertaForSequenceClassification"
}
MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES =
{
  "distilbert" => "DistilBertForQuestionAnswering"
}
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES =
{
  "vit" => "ViTForImageClassification"
}
MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES =
{
  "bert" => "BertForTokenClassification"
}
MODEL_MAPPING =
LazyAutoMapping.new(CONFIG_MAPPING_NAMES, MODEL_MAPPING_NAMES)
MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING =
LazyAutoMapping.new(
  CONFIG_MAPPING_NAMES, MODEL_FOR_IMAGE_CLASSIFICATION_MAPPING_NAMES
)
MODEL_FOR_MASKED_LM_MAPPING =
LazyAutoMapping.new(CONFIG_MAPPING_NAMES, MODEL_FOR_MASKED_LM_MAPPING_NAMES)
MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING =
LazyAutoMapping.new(
  CONFIG_MAPPING_NAMES, MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES
)
MODEL_FOR_QUESTION_ANSWERING_MAPPING =
LazyAutoMapping.new(
  CONFIG_MAPPING_NAMES, MODEL_FOR_QUESTION_ANSWERING_MAPPING_NAMES
)
MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING =
LazyAutoMapping.new(
  CONFIG_MAPPING_NAMES, MODEL_FOR_TOKEN_CLASSIFICATION_MAPPING_NAMES
)
BertModel =
Bert::BertModel
BertForTokenClassification =
Bert::BertForTokenClassification
BertForSequenceClassification =
Bert::BertForSequenceClassification
MPNetForMaskedLM =
Mpnet::MPNetForMaskedLM
TOKENIZER_MAPPING_NAMES =
{
  "bert" => ["BertTokenizer", "BertTokenizerFast"],
  "deberta-v2" => ["DebertaV2TokenizerFast"],
  "distilbert" => ["DistilBertTokenizer", "DistilBertTokenizerFast"],
  "mpnet" => ["MPNetTokenizerFast"],
  "xlm-roberta" => ["XLMRobertaTokenizerFast"]
}
TOKENIZER_MAPPING =
LazyAutoMapping.new(CONFIG_MAPPING_NAMES, TOKENIZER_MAPPING_NAMES)
CONFIG_MAPPING_NAMES =
{
  "bert" => "BertConfig",
  "deberta-v2" => "DebertaV2Config",
  "distilbert" => "DistilBertConfig",
  "mpnet" => "MPNetConfig",
  "vit" => "ViTConfig",
  "xlm-roberta" => "XLMRobertaConfig"
}
CONFIG_MAPPING =
LazyConfigMapping.new(CONFIG_MAPPING_NAMES)
IMAGE_PROCESSOR_MAPPING_NAMES =
{
  "vit" => ["ViTImageProcessor"]
}
IMAGE_PROCESSOR_MAPPING =
LazyAutoMapping.new(CONFIG_MAPPING_NAMES, IMAGE_PROCESSOR_MAPPING_NAMES)
FEATURE_EXTRACTOR_MAPPING_NAMES =
{
}
FEATURE_EXTRACTOR_MAPPING =
LazyAutoMapping.new(CONFIG_MAPPING_NAMES, FEATURE_EXTRACTOR_MAPPING_NAMES)
DebertaV2ForSequenceClassification =
DebertaV2::DebertaV2ForSequenceClassification
DistilBertForMaskedLM =
Distilbert::DistilBertForMaskedLM
DistilBertForSequenceClassification =
Distilbert::DistilBertForSequenceClassification
DistilBertForQuestionAnswering =
Distilbert::DistilBertForQuestionAnswering
XLMRobertaForSequenceClassification =
XlmRoberta::XLMRobertaForSequenceClassification

Class Attribute Summary collapse

Class Method Summary collapse

Class Attribute Details

.fast_initObject

experimental



120
121
122
# File 'lib/transformers.rb', line 120

def fast_init
  @fast_init
end

.loggerObject

Returns the value of attribute logger.



17
18
19
# File 'lib/transformers/utils/logging.rb', line 17

def logger
  @logger
end

Class Method Details

._get_default_logging_levelObject



31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/transformers/utils/logging.rb', line 31

def self._get_default_logging_level
  env_level_str = ENV["TRANSFORMERS_VERBOSITY"]
  if env_level_str
    if LOG_LEVELS.include?(env_level_str)
      return LOG_LEVELS[env_level_str]
    else
      warn(
        "Unknown option TRANSFORMERS_VERBOSITY=#{env_level_str}, " +
        "has to be one of: #{LOG_LEVELS.keys.join(", ")}"
      )
    end
  end
  DEFAULT_LOG_LEVEL
end

.pipeline(task, model_arg = nil, model: nil, config: nil, tokenizer: nil, feature_extractor: nil, image_processor: nil, framework: nil, revision: nil, use_fast: true, token: nil, device: nil, device_map: nil, torch_dtype: nil, trust_remote_code: nil, model_kwargs: nil, pipeline_class: nil, **kwargs) ⇒ Object



107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
# File 'lib/transformers/pipelines/_init.rb', line 107

def pipeline(
  task,
  model_arg = nil,
  model: nil,
  config: nil,
  tokenizer: nil,
  feature_extractor: nil,
  image_processor: nil,
  framework: nil,
  revision: nil,
  use_fast: true,
  token: nil,
  device: nil,
  device_map: nil,
  torch_dtype: nil,
  trust_remote_code: nil,
  model_kwargs: nil,
  pipeline_class: nil,
  **kwargs
)
  if !model_arg.nil?
    if !model.nil?
      raise ArgumentError, "Cannot pass multiple models"
    end
    model = model_arg
  end

  model_kwargs ||= {}
  # Make sure we only pass use_auth_token once as a kwarg (it used to be possible to pass it in model_kwargs,
  # this is to keep BC).
  use_auth_token = model_kwargs.delete(:use_auth_token)
  if !use_auth_token.nil?
    raise Todo
  end

  code_revision = kwargs.delete(:code_revision)
  commit_hash = kwargs.delete(:_commit_hash)

  hub_kwargs = {
    revision: revision,
    token: token,
    trust_remote_code: trust_remote_code,
    _commit_hash: commit_hash
  }

  if task.nil? && model.nil?
    raise RuntimeError,
      "Impossible to instantiate a pipeline without either a task or a model " +
      "being specified. " +
      "Please provide a task class or a model"
  end

  if model.nil? && !tokenizer.nil?
    raise RuntimeError,
      "Impossible to instantiate a pipeline with tokenizer specified but not the model as the provided tokenizer" +
      " may not be compatible with the default model. Please provide a PreTrainedModel class or a" +
      " path/identifier to a pretrained model when providing tokenizer."
  end
  if model.nil? && !feature_extractor.nil?
    raise RuntimeError,
      "Impossible to instantiate a pipeline with feature_extractor specified but not the model as the provided" +
      " feature_extractor may not be compatible with the default model. Please provide a PreTrainedModel class" +
      " or a path/identifier to a pretrained model when providing feature_extractor."
  end
  if model.is_a?(Pathname)
    model = model.to_s
  end

  if commit_hash.nil?
    pretrained_model_name_or_path = nil
    if config.is_a?(String)
      pretrained_model_name_or_path = config
    elsif config.nil? && model.is_a?(String)
      pretrained_model_name_or_path = model
    end

    if !config.is_a?(PretrainedConfig) && !pretrained_model_name_or_path.nil?
      # We make a call to the config file first (which may be absent) to get the commit hash as soon as possible
      resolved_config_file = Utils::Hub.cached_file(
        pretrained_model_name_or_path,
        CONFIG_NAME,
        _raise_exceptions_for_gated_repo: false,
        _raise_exceptions_for_missing_entries: false,
        _raise_exceptions_for_connection_errors: false,
        cache_dir: model_kwargs[:cache_dir],
        **hub_kwargs
      )
      hub_kwargs[:_commit_hash] = Utils::Hub.extract_commit_hash(resolved_config_file, commit_hash)
    else
      hub_kwargs[:_commit_hash] = nil # getattr(config, "_commit_hash", None)
    end
  end

  # Config is the primordial information item.
  # Instantiate config if needed
  if config.is_a?(String)
    raise Todo
  elsif config.nil? && model.is_a?(String)
    config = AutoConfig.from_pretrained(
      model, _from_pipeline: task, code_revision: code_revision, **hub_kwargs, **model_kwargs
    )
    hub_kwargs[:_commit_hash] = config._commit_hash
  end

  custom_tasks = {}
  if !config.nil? && (config.instance_variable_get(:@custom_pipelines) || {}).length > 0
    raise Todo
  end

  if task.nil? && !model.nil?
    raise Todo
  end

  # Retrieve the task
  if custom_tasks.include?(task)
    raise Todo
  else
    _normalized_task, targeted_task, task_options = check_task(task)
    if pipeline_class.nil?
      pipeline_class = targeted_task["impl"]
    end
  end

  # Use default model/config/tokenizer for the task if no model is provided
  if model.nil?
    # At that point framework might still be undetermined
    model, default_revision = Pipelines.get_default_model_and_revision(targeted_task, framework, task_options)
    revision = !revision.nil? ? revision : default_revision
    Transformers.logger.warn(
      "No model was supplied, defaulted to #{model} and revision" +
      " #{revision} (#{Utils::Hub::HUGGINGFACE_CO_RESOLVE_ENDPOINT}/#{model}).\n" +
      "Using a pipeline without specifying a model name and revision in production is not recommended."
    )
    hub_kwargs[:revision] = revision
    if config.nil? && model.is_a?(String)
      config = AutoConfig.from_pretrained(model, _from_pipeline: task, **hub_kwargs, **model_kwargs)
      hub_kwargs[:_commit_hash] = config._commit_hash
    end
  end

  if !device_map.nil?
    raise Todo
  end
  if !torch_dtype.nil?
    raise Todo
  end

  model_name = model.is_a?(String) ? model : nil

  # Load the correct model if possible
  # Infer the framework from the model if not already defined
  if model.is_a?(String) || framework.nil?
    model_classes = {"tf" => targeted_task["tf"], "pt" => targeted_task["pt"]}
    framework, model =
      Pipelines.infer_framework_load_model(
        model,
        config,
        model_classes: model_classes,
        framework: framework,
        task: task,
        **hub_kwargs,
        **model_kwargs
      )
  end

  model_config = model.config
  hub_kwargs[:_commit_hash] = model.config._commit_hash
  model_config_type = model_config.class.name.split("::").last
  load_tokenizer = TOKENIZER_MAPPING.include?(model_config_type) || !model_config.tokenizer_class.nil?
  load_feature_extractor = FEATURE_EXTRACTOR_MAPPING.include?(model_config_type) || !feature_extractor.nil?
  load_image_processor = IMAGE_PROCESSOR_MAPPING.include?(model_config_type) || !image_processor.nil?

  if load_tokenizer
    # Try to infer tokenizer from model or config name (if provided as str)
    if tokenizer.nil?
      if model_name.is_a?(String)
        tokenizer = model_name
      elsif config.is_a?(String)
        tokenizer = config
      else
        # Impossible to guess what is the right tokenizer here
        raise "Impossible to guess which tokenizer to use. Please provide a PreTrainedTokenizer class or a path/identifier to a pretrained tokenizer."
      end
    end

    # Instantiate tokenizer if needed
    if tokenizer.is_a?(String) || tokenizer.is_a?(Array)
      if tokenizer.is_a?(Array)
        # For array we have [tokenizer name, {kwargs}]
        use_fast = tokenizer[1].delete(:use_fast) { use_fast }
        tokenizer_identifier = tokenizer[0]
        tokenizer_kwargs = tokenizer[1]
      else
        tokenizer_identifier = tokenizer
        tokenizer_kwargs = model_kwargs.dup
        tokenizer_kwargs.delete(:torch_dtype)
      end

      tokenizer =
        AutoTokenizer.from_pretrained(
          tokenizer_identifier, use_fast: use_fast, _from_pipeline: task, **hub_kwargs, **tokenizer_kwargs
        )
    end
  end

  if load_image_processor
    # Try to infer image processor from model or config name (if provided as str)
    if image_processor.nil?
      if model_name.is_a?(String)
        image_processor = model_name
      elsif config.is_a?(String)
        image_processor = config
      # Backward compatibility, as `feature_extractor` used to be the name
      # for `ImageProcessor`.
      elsif !feature_extractor.nil? && feature_extractor.is_a?(BaseImageProcessor)
        image_processor = feature_extractor
      else
        # Impossible to guess what is the right image_processor here
        raise RuntimeError,
          "Impossible to guess which image processor to use. " +
          "Please provide a PreTrainedImageProcessor class or a path/identifier " +
          "to a pretrained image processor."
      end
    end

    # Instantiate image_processor if needed
    if image_processor.is_a?(String) || image_processor.is_a?(Array)
      image_processor = AutoImageProcessor.from_pretrained(
        image_processor, _from_pipeline: task, **hub_kwargs, **model_kwargs
      )
    end
  end

  if load_feature_extractor
    raise Todo
  end

  if task == "translation" && model.config.task_specific_params
    raise Todo
  end

  if !tokenizer.nil?
    kwargs[:tokenizer] = tokenizer
  end

  if !feature_extractor.nil?
    kwargs[:feature_extractor] = feature_extractor
  end

  if !torch_dtype.nil?
    kwargs[:torch_dtype] = torch_dtype
  end

  if !image_processor.nil?
    kwargs[:image_processor] = image_processor
  end

  if !device.nil?
    kwargs[:device] = device
  end

  pipeline_class.new(model, framework: framework, task: task, **kwargs)
end