Class: Transformers::Distilbert::DistilBertModel

Inherits:
DistilBertPreTrainedModel show all
Defined in:
lib/transformers/models/distilbert/modeling_distilbert.rb

Instance Attribute Summary

Attributes inherited from PreTrainedModel

#config

Instance Method Summary collapse

Methods inherited from DistilBertPreTrainedModel

#_init_weights

Methods inherited from PreTrainedModel

#_backward_compatibility_gradient_checkpointing, #_init_weights, #_initialize_weights, #base_model, #can_generate, #dequantize, #dummy_inputs, #framework, from_pretrained, #get_output_embeddings, #init_weights, #post_init, #prune_heads, #set_input_embeddings, #tie_weights, #warn_if_padding_and_no_attention_mask

Methods included from ClassAttribute

#class_attribute

Methods included from ModuleUtilsMixin

#device, #get_extended_attention_mask, #get_head_mask

Constructor Details

#initialize(config) ⇒ DistilBertModel

Returns a new instance of DistilBertModel.



344
345
346
347
348
349
350
351
352
353
# File 'lib/transformers/models/distilbert/modeling_distilbert.rb', line 344

def initialize(config)
  super(config)

  @embeddings = Embeddings.new(config)  # Embeddings
  @transformer = Transformer.new(config)  # Encoder
  @use_flash_attention_2 = config._attn_implementation == "flash_attention_2"

  # Initialize weights and apply final processing
  post_init
end

Instance Method Details

#_prune_heads(heads_to_prune) ⇒ Object



363
364
365
366
367
# File 'lib/transformers/models/distilbert/modeling_distilbert.rb', line 363

def _prune_heads(heads_to_prune)
  heads_to_prune.each do |layer, heads|
    @transformer.layer[layer].attention.prune_heads(heads)
  end
end

#forward(input_ids: nil, attention_mask: nil, head_mask: nil, inputs_embeds: nil, output_attentions: nil, output_hidden_states: nil, return_dict: nil) ⇒ Object



369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
# File 'lib/transformers/models/distilbert/modeling_distilbert.rb', line 369

def forward(
  input_ids: nil,
  attention_mask: nil,
  head_mask: nil,
  inputs_embeds: nil,
  output_attentions: nil,
  output_hidden_states: nil,
  return_dict: nil
)
  output_attentions = !output_attentions.nil? ? output_attentions : @config.output_attentions
  output_hidden_states = (
    !output_hidden_states.nil? ? output_hidden_states : @config.output_hidden_states
  )
  return_dict = !return_dict.nil? ? return_dict : @config.use_return_dict

  if !input_ids.nil? && !inputs_embeds.nil?
    raise ArgumentError, "You cannot specify both input_ids and inputs_embeds at the same time"
  elsif !input_ids.nil?
    warn_if_padding_and_no_attention_mask(input_ids, attention_mask)
    input_shape = input_ids.size
  elsif !inputs_embeds.nil?
    input_shape = inputs_embeds.size[...-1]
  else
    raise ArgumentError, "You have to specify either input_ids or inputs_embeds"
  end

  device = !input_ids.nil? ? input_ids.device : inputs_embeds.device

  # Prepare head mask if needed
  head_mask = get_head_mask(head_mask, @config.num_hidden_layers)

  embeddings = @embeddings.(input_ids, inputs_embeds)  # (bs, seq_length, dim)

  if @use_flash_attention_2
    raise Todo
  else
    if attention_mask.nil?
      attention_mask = Torch.ones(input_shape, device: device)  # (bs, seq_length)
    end
  end

  @transformer.(
    x: embeddings,
    attn_mask: attention_mask,
    head_mask: head_mask,
    output_attentions: output_attentions,
    output_hidden_states: output_hidden_states,
    return_dict: return_dict
  )
end

#get_input_embeddingsObject



359
360
361
# File 'lib/transformers/models/distilbert/modeling_distilbert.rb', line 359

def get_input_embeddings
  @embeddings.word_embeddings
end

#get_position_embeddingsObject



355
356
357
# File 'lib/transformers/models/distilbert/modeling_distilbert.rb', line 355

def get_position_embeddings
  @embeddings.position_embeddings
end