Class: Transformers::PreTrainedModel

Inherits:

Torch::NN::Module

Object
Torch::NN::Module
Transformers::PreTrainedModel

show all

Extended by:: ClassAttribute

Includes:: ModuleUtilsMixin

Defined in:: lib/transformers/modeling_utils.rb

Direct Known Subclasses

Bert::BertPreTrainedModel, DebertaV2::DebertaV2PreTrainedModel, Distilbert::DistilBertPreTrainedModel, Mpnet::MPNetPreTrainedModel, Vit::ViTPreTrainedModel, XlmRoberta::XLMRobertaPreTrainedModel

Instance Attribute Summary collapse

#config ⇒ Object readonly

Returns the value of attribute config.

Class Method Summary collapse

.from_pretrained(pretrained_model_name_or_path, *model_args, config: nil, cache_dir: nil, ignore_mismatched_sizes: false, force_download: false, local_files_only: false, token: nil, revision: "main", use_safetensors: nil, **kwargs) ⇒ Object

Instance Method Summary collapse

Methods included from ClassAttribute

class_attribute

Methods included from ModuleUtilsMixin

#device, #get_extended_attention_mask, #get_head_mask

Constructor Details

#initialize(config, *inputs, **kwargs) ⇒ `PreTrainedModel`

Returns a new instance of PreTrainedModel.

# File 'lib/transformers/modeling_utils.rb', line 149

def initialize(config, *inputs, **kwargs)
  super()
  @config = config
end

Instance Attribute Details

#config ⇒ `Object` (readonly)

Returns the value of attribute config.



139
140
141

# File 'lib/transformers/modeling_utils.rb', line 139

def config
  @config
end

Class Method Details

.from_pretrained(pretrained_model_name_or_path, *model_args, config: nil, cache_dir: nil, ignore_mismatched_sizes: false, force_download: false, local_files_only: false, token: nil, revision: "main", use_safetensors: nil, **kwargs) ⇒ `Object`

# File 'lib/transformers/modeling_utils.rb', line 259

def from_pretrained(
  pretrained_model_name_or_path,
  *model_args,
  config: nil,
  cache_dir: nil,
  ignore_mismatched_sizes: false,
  force_download: false,
  local_files_only: false,
  token: nil,
  revision: "main",
  use_safetensors: nil,
  **kwargs
)
  state_dict = kwargs.delete(:state_dict)
  from_tf = kwargs.delete(:from_tf) { false }
  from_flax = kwargs.delete(:from_flax) { false }
  resume_download = kwargs.delete(:resume_download) { false }
  proxies = kwargs.delete(:proxies)
  output_loading_info = kwargs.delete(:output_loading_info) { false }
  _use_auth_token = kwargs.delete(:use_auth_token)
  trust_remote_code = kwargs.delete(:trust_remote_code)
  _ = kwargs.delete(:mirror)
  from_pipeline = kwargs.delete(:_from_pipeline)
  from_auto_class = kwargs.delete(:_from_auto) { false }
  _fast_init = kwargs.delete(:_fast_init) { true }
  torch_dtype = kwargs.delete(:torch_dtype)
  low_cpu_mem_usage = kwargs.delete(:low_cpu_mem_usage)
  device_map = kwargs.delete(:device_map)
  _max_memory = kwargs.delete(:max_memory)
  offload_folder = kwargs.delete(:offload_folder)
  offload_state_dict = kwargs.delete(:offload_state_dict) { false }
  load_in_8bit = kwargs.delete(:load_in_8bit) { false }
  load_in_4bit = kwargs.delete(:load_in_4bit) { false }
  quantization_config = kwargs.delete(:quantization_config)
  subfolder = kwargs.delete(:subfolder) { "" }
  commit_hash = kwargs.delete(:_commit_hash)
  variant = kwargs.delete(:variant)
  _adapter_kwargs = kwargs.delete(:adapter_kwargs) { {} }
  _adapter_name = kwargs.delete(:adapter_name) { "default" }
  _use_flash_attention_2 = kwargs.delete(:use_flash_attention_2) { false }

  if use_safetensors.nil? && !is_safetensors_available
    use_safetensors = false
  end
  if trust_remote_code
    Transformers.logger.warn(
      "The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is" +
      " ignored."
    )
  end

  if commit_hash.nil?
    if !config.is_a?(PretrainedConfig)
      # We make a call to the config file first (which may be absent) to get the commit hash as soon as possible
      resolved_config_file =
        Utils::Hub.cached_file(
          pretrained_model_name_or_path,
          CONFIG_NAME,
          cache_dir: cache_dir,
          force_download: force_download,
          resume_download: resume_download,
          proxies: proxies,
          local_files_only: local_files_only,
          token: token,
          revision: revision,
          subfolder: subfolder,
          _raise_exceptions_for_gated_repo: false,
          _raise_exceptions_for_missing_entries: false,
          _raise_exceptions_for_connection_errors: false,
        )
      commit_hash = Utils::Hub.extract_commit_hash(resolved_config_file, commit_hash)
    else
      commit_hash = config._commit_hash
    end
  end

  if !device_map.nil?
    raise Todo
  end

  # handling bnb config from kwargs, remove after `load_in_{4/8}bit` deprecation.
  if load_in_4bit || load_in_8bit
    raise Todo
  end

  from_pt = !(from_tf || from_flax)

  user_agent = {file_type: "model", framework: "pytorch", from_auto_class: from_auto_class}
  if !from_pipeline.nil?
    user_agent[:using_pipeline] = from_pipeline
  end

  if Utils::Hub.is_offline_mode && !local_files_only
    Transformers.logger.info "Offline mode: forcing local_files_only: true"
    local_files_only = true
  end

  # Load config if we don't provide a configuration
  if !config.is_a?(PretrainedConfig)
    config_path = !config.nil? ? config : pretrained_model_name_or_path
    config, model_kwargs =
      config_class.from_pretrained(
        config_path,
        cache_dir: cache_dir,
        return_unused_kwargs: true,
        force_download: force_download,
        resume_download: resume_download,
        proxies: proxies,
        local_files_only: local_files_only,
        token: token,
        revision: revision,
        subfolder: subfolder,
        _from_auto: from_auto_class,
        _from_pipeline: from_pipeline,
        **kwargs
      )
  else
    # In case one passes a config to `from_pretrained` + "attn_implementation"
    # override the `_attn_implementation` attribute to `attn_implementation` of the kwargs
    # Please see: https://github.com/huggingface/transformers/issues/28038

    # Overwrite `config._attn_implementation` by the one from the kwargs --> in auto-factory
    # we pop attn_implementation from the kwargs but this handles the case where users
    # passes manually the config to `from_pretrained`.
    config = Copy.deepcopy(config)

    kwarg_attn_imp = kwargs.delete(:attn_implementation)
    if !kwarg_attn_imp.nil? && config._attn_implementation != kwarg_attn_imp
      config._attn_implementation = kwarg_attn_imp
    end
    model_kwargs = kwargs
  end

  pre_quantized = false # !config.quantization_config.nil?
  if pre_quantized || !quantization_config.nil?
    raise Todo
  else
    hf_quantizer = nil
  end

  if !hf_quantizer.nil?
    raise Todo
  end

  # This variable will flag if we're loading a sharded checkpoint. In this case the archive file is just the
  # index of the files.
  is_sharded = false
  sharded_metadata = nil
  # Load model
  _loading_info = nil

  # Keep in fp32 modules
  keep_in_fp32_modules = nil
  _use_keep_in_fp32_modules = false

  resolved_archive_file = nil
  if !pretrained_model_name_or_path.nil?
    pretrained_model_name_or_path = pretrained_model_name_or_path.to_s
    is_local = Dir.exist?(pretrained_model_name_or_path)
    if is_local
      raise Todo
    elsif File.exist?(File.join(subfolder, pretrained_model_name_or_path))
      _archive_file = pretrained_model_name_or_path
      is_local = true
    else
      # set correct filename
      if use_safetensors != false
        filename = _add_variant(SAFE_WEIGHTS_NAME, variant)
      else
        filename = _add_variant(WEIGHTS_NAME, variant)
      end

      # Load from URL or cache if already cached
      cached_file_kwargs = {
        cache_dir: cache_dir,
        force_download: force_download,
        proxies: proxies,
        resume_download: resume_download,
        local_files_only: local_files_only,
        token: token,
        user_agent: user_agent,
        revision: revision,
        subfolder: subfolder,
        _raise_exceptions_for_gated_repo: false,
        _raise_exceptions_for_missing_entries: false,
        _commit_hash: commit_hash
      }
      resolved_archive_file = Utils::Hub.cached_file(pretrained_model_name_or_path, filename, **cached_file_kwargs)

      # Since we set _raise_exceptions_for_missing_entries=False, we don't get an exception but a None
      # result when internet is up, the repo and revision exist, but the file does not.
      if resolved_archive_file.nil? && filename == _add_variant(SAFE_WEIGHTS_NAME, variant)
        # Maybe the checkpoint is sharded, we try to grab the index name in this case.
        resolved_archive_file = Utils::Hub.cached_file(
          pretrained_model_name_or_path,
          _add_variant(SAFE_WEIGHTS_INDEX_NAME, variant),
          **cached_file_kwargs,
        )
        if !resolved_archive_file.nil?
          is_sharded = true
        elsif use_safetensors
          raise Todo
        else
          # This repo has no safetensors file of any kind, we switch to PyTorch.
          filename = _add_variant(WEIGHTS_NAME, variant)
          resolved_archive_file = Utils::Hub.cached_file(
            pretrained_model_name_or_path, filename, **cached_file_kwargs
          )
        end
      end
      if resolved_archive_file.nil? && filename == _add_variant(WEIGHTS_NAME, variant)
        # Maybe the checkpoint is sharded, we try to grab the index name in this case.
        resolved_archive_file = Utils::Hub.cached_file(
          pretrained_model_name_or_path,
          _add_variant(WEIGHTS_INDEX_NAME, variant),
          **cached_file_kwargs
        )
        if !resolved_archive_file.nil?
          is_sharded = true
        end
      end
      if !local_files_only && !Utils::Hub.is_offline_mode
        if !resolved_archive_file.nil?
          if [WEIGHTS_NAME, WEIGHTS_INDEX_NAME].include?(filename)
            # If the PyTorch file was found, check if there is a safetensors file on the repository
            # If there is no safetensors file on the repositories, start an auto conversion
            _safe_weights_name = is_sharded ? SAFE_WEIGHTS_INDEX_NAME : SAFE_WEIGHTS_NAME
            has_file_kwargs = {
              revision: revision,
              proxies: proxies,
              token: token,
              cache_dir: cache_dir,
              local_files_only: local_files_only
            }
            cached_file_kwargs = {
              cache_dir: cache_dir,
              force_download: force_download,
              resume_download: resume_download,
              local_files_only: local_files_only,
              user_agent: user_agent,
              subfolder: subfolder,
              _raise_exceptions_for_gated_repo: false,
              _raise_exceptions_for_missing_entries: false,
              _commit_hash: commit_hash,
              **has_file_kwargs
            }
            # skip auto conversion
            # if !Utils::Hub.has_file(pretrained_model_name_or_path, safe_weights_name, **has_file_kwargs)
            # end
          end
        else
          raise Todo
        end
      end

      if is_local
        Transformers.logger.info("loading weights file #{archive_file}")
        resolved_archive_file = archive_file
      else
        Transformers.logger.info("loading weights file #{filename} from cache at #{resolved_archive_file}")
      end
    end
  else
    resolved_archive_file = nil
  end

  # We'll need to download and cache each checkpoint shard if the checkpoint is sharded.
  if is_sharded
    raise Todo
  end

  metadata = nil
  if is_safetensors_available && resolved_archive_file.is_a?(String) && resolved_archive_file.end_with?(".safetensors")
    Safetensors.safe_open(resolved_archive_file, framework: "pt") do |f|
      metadata = f.metadata
    end

    if metadata["format"] == "pt"
      # do nothing
    else
      raise ArgumentError,
        "Incompatible safetensors file. File metadata is not ['pt'] but #{metadata["format"]}"
    end
  end

  from_pt = !(from_tf || from_flax)

  # load pt weights early so that we know which dtype to init the model under
  if from_pt
    if !is_sharded && state_dict.nil?
      # Time to load the checkpoint
      state_dict = load_state_dict(resolved_archive_file)
    end

    # set dtype to instantiate the model under:
    # 1. If torch_dtype is not None, we use that dtype
    # 2. If torch_dtype is "auto", we auto-detect dtype from the loaded state_dict, by checking its first
    #    weights entry that is of a floating type - we assume all floating dtype weights are of the same dtype
    # we also may have config.torch_dtype available, but we won't rely on it till v5
    dtype_orig = nil

    if !torch_dtype.nil?
      raise Todo
    end

    if is_sharded
      loaded_state_dict_keys = sharded_metadata["all_checkpoint_keys"]
    else
      loaded_state_dict_keys = state_dict.keys
    end
  end

  config.name_or_path = pretrained_model_name_or_path

  # Instantiate model.
  model = ModelingUtils.no_init_weights { new(config, *model_args, **model_kwargs) }

  # make sure we use the model's config since the __init__ call might have copied it
  config = model.config

  if device_map.is_a?(String)
    raise Todo
  elsif !device_map.nil?
    raise Todo
  end

  if from_pt
    # restore default dtype
    if !dtype_orig.nil?
      Torch.set_default_dtype(dtype_orig)
    end

    model, _missing_keys, _unexpected_keys, _mismatched_keys, _offload_index, _error_msgs =
      _load_pretrained_model(
        model,
        state_dict,
        loaded_state_dict_keys,  # XXX: rename?
        resolved_archive_file,
        pretrained_model_name_or_path,
        ignore_mismatched_sizes: ignore_mismatched_sizes,
        sharded_metadata: sharded_metadata,
        _fast_init: _fast_init,
        low_cpu_mem_usage: low_cpu_mem_usage,
        device_map: device_map,
        offload_folder: offload_folder,
        offload_state_dict: offload_state_dict,
        dtype: torch_dtype,
        hf_quantizer: hf_quantizer,
        keep_in_fp32_modules: keep_in_fp32_modules
      )
  end

  # make sure token embedding weights are still tied if needed
  model.tie_weights

  # Set model in evaluation mode to deactivate DropOut modules by default
  model.eval

  # If it is a model with generation capabilities, attempt to load the generation config
  if model.can_generate && !pretrained_model_name_or_path.nil?
    raise Todo
  end

  # Dispatch model with hooks on all devices if necessary
  if !device_map.nil?
    raise Todo
  end

  if !hf_quantizer.nil?
    raise Todo
  end

  if output_loading_info
    raise Todo
  end

  model
end

Instance Method Details

#_backward_compatibility_gradient_checkpointing ⇒ `Object`



163
164
165

# File 'lib/transformers/modeling_utils.rb', line 163

def _backward_compatibility_gradient_checkpointing
  # TODO
end

#_init_weights(mod) ⇒ `Object`



188
189
190

# File 'lib/transformers/modeling_utils.rb', line 188

def _init_weights(mod)
  # pass
end

#_initialize_weights(mod) ⇒ `Object`

# File 'lib/transformers/modeling_utils.rb', line 192

def _initialize_weights(mod)
  if mod.instance_variable_defined?(:@is_hf_initialized)
    return
  end
  _init_weights(mod)
  mod.instance_variable_set(:@is_hf_initialized, true)
end

#base_model ⇒ `Object`



167
168
169

# File 'lib/transformers/modeling_utils.rb', line 167

def base_model
  instance_variable_get("@#{self.class.base_model_prefix}") || self
end

#can_generate ⇒ `Object`

# File 'lib/transformers/modeling_utils.rb', line 171

def can_generate
  # TODO improve
  false
end

#dequantize ⇒ `Object`

Raises:

(Todo)



159
160
161

# File 'lib/transformers/modeling_utils.rb', line 159

def dequantize
  raise Todo
end

#dummy_inputs ⇒ `Object`

Raises:

(Todo)



141
142
143

# File 'lib/transformers/modeling_utils.rb', line 141

def dummy_inputs
  raise Todo
end

#framework ⇒ `Object`



145
146
147

# File 'lib/transformers/modeling_utils.rb', line 145

def framework
  "pt"
end

#get_input_embeddings ⇒ `Object`

Raises:

(Todo)



176
177
178

# File 'lib/transformers/modeling_utils.rb', line 176

def get_input_embeddings
  raise Todo
end

#get_output_embeddings ⇒ `Object`



184
185
186

# File 'lib/transformers/modeling_utils.rb', line 184

def get_output_embeddings
  nil  # Overwrite for models with output embeddings
end

#init_weights ⇒ `Object`

# File 'lib/transformers/modeling_utils.rb', line 219

def init_weights
  # Prune heads if needed
  if @config.pruned_heads.any?
    prune_heads(@config.pruned_heads)
  end

  # TODO implement no_init_weights context manager
  _init_weights = false
  if _init_weights
    # Initialize weights
    apply(method(:_initialize_weights))

    # Tie weights should be skipped when not initializing all weights
    # since from_pretrained(...) calls tie weights anyways
    tie_weights
  end
end

#post_init ⇒ `Object`

# File 'lib/transformers/modeling_utils.rb', line 154

def post_init
  init_weights
  _backward_compatibility_gradient_checkpointing
end

#prune_heads(heads_to_prune) ⇒ `Object`

# File 'lib/transformers/modeling_utils.rb', line 237

def prune_heads(heads_to_prune)
  # save new sets of pruned heads as union of previously stored pruned heads and newly pruned heads
  heads_to_prune.each do |layer, heads|
    union_heads = Set.new(@config.pruned_heads.fetch(layer, [])) | Set.new(heads)
    @config.pruned_heads[layer] = union_heads.to_a # Unfortunately we have to store it as list for JSON
  end

  base_model._prune_heads(heads_to_prune)
end

#set_input_embeddings(value) ⇒ `Object`

Raises:

(Todo)



180
181
182

# File 'lib/transformers/modeling_utils.rb', line 180

def set_input_embeddings(value)
  raise Todo
end

#tie_weights ⇒ `Object`

# File 'lib/transformers/modeling_utils.rb', line 200

def tie_weights
  if @config.tie_word_embeddings != false
    output_embeddings = get_output_embeddings
    if !output_embeddings.nil?
      raise Todo
    end
  end

  if @config.is_encoder_decoder && @config.tie_encoder_decoder
    raise Todo
  end

  modules.each do |mod|
    if mod.respond_to?(:_tie_weights)
      mod._tie_weights
    end
  end
end

#warn_if_padding_and_no_attention_mask(input_ids, attention_mask) ⇒ `Object`

# File 'lib/transformers/modeling_utils.rb', line 247

def warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
  if !attention_mask.nil? || @config.pad_token_id.nil?
    return
  end

  # Check only the first and last input IDs to reduce overhead.
  if input_ids[0.., [-1, 0]].include?(@config.pad_token_id)
    raise Todo
  end
end

Class: Transformers::PreTrainedModel

Direct Known Subclasses

Instance Attribute Summary collapse

Class Method Summary collapse

Instance Method Summary collapse

Methods included from ClassAttribute

Methods included from ModuleUtilsMixin

Constructor Details

#initialize(config, *inputs, **kwargs) ⇒ PreTrainedModel

Instance Attribute Details

#config ⇒ Object (readonly)

Class Method Details

.from_pretrained(pretrained_model_name_or_path, *model_args, config: nil, cache_dir: nil, ignore_mismatched_sizes: false, force_download: false, local_files_only: false, token: nil, revision: "main", use_safetensors: nil, **kwargs) ⇒ Object

Instance Method Details

#_backward_compatibility_gradient_checkpointing ⇒ Object

#_init_weights(mod) ⇒ Object

#_initialize_weights(mod) ⇒ Object

#base_model ⇒ Object

#can_generate ⇒ Object

#dequantize ⇒ Object

#dummy_inputs ⇒ Object

#framework ⇒ Object

#get_input_embeddings ⇒ Object

#get_output_embeddings ⇒ Object

#init_weights ⇒ Object

#post_init ⇒ Object

#prune_heads(heads_to_prune) ⇒ Object

#set_input_embeddings(value) ⇒ Object

#tie_weights ⇒ Object

#warn_if_padding_and_no_attention_mask(input_ids, attention_mask) ⇒ Object

#initialize(config, *inputs, **kwargs) ⇒ `PreTrainedModel`

#config ⇒ `Object` (readonly)

.from_pretrained(pretrained_model_name_or_path, *model_args, config: nil, cache_dir: nil, ignore_mismatched_sizes: false, force_download: false, local_files_only: false, token: nil, revision: "main", use_safetensors: nil, **kwargs) ⇒ `Object`

#_backward_compatibility_gradient_checkpointing ⇒ `Object`

#_init_weights(mod) ⇒ `Object`

#_initialize_weights(mod) ⇒ `Object`

#base_model ⇒ `Object`

#can_generate ⇒ `Object`

#dequantize ⇒ `Object`

#dummy_inputs ⇒ `Object`

#framework ⇒ `Object`

#get_input_embeddings ⇒ `Object`

#get_output_embeddings ⇒ `Object`

#init_weights ⇒ `Object`

#post_init ⇒ `Object`

#prune_heads(heads_to_prune) ⇒ `Object`

#set_input_embeddings(value) ⇒ `Object`

#tie_weights ⇒ `Object`

#warn_if_padding_and_no_attention_mask(input_ids, attention_mask) ⇒ `Object`