Class: Informers::ZeroShotImageClassificationPipeline

Inherits:
Pipeline
  • Object
show all
Defined in:
lib/informers/pipelines.rb

Instance Method Summary collapse

Methods inherited from Pipeline

#initialize

Constructor Details

This class inherits a constructor from Informers::Pipeline

Instance Method Details

#call(images, candidate_labels, hypothesis_template: "This is a photo of {}") ⇒ Object



628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
# File 'lib/informers/pipelines.rb', line 628

def call(images, candidate_labels, hypothesis_template: "This is a photo of {}")
  is_batched = images.is_a?(Array)
  prepared_images = prepare_images(images)

  # Insert label into hypothesis template
  texts = candidate_labels.map { |x| hypothesis_template.sub("{}", x) }

  #  Run tokenization
  text_inputs = @tokenizer.(texts,
    padding: @model.config[:model_type] == "siglip" ? "max_length" : true,
    truncation: true
  )

  # Run processor
  pixel_values = @processor.(prepared_images)[:pixel_values]

  # Run model with both text and pixel inputs
  output = @model.(text_inputs.merge(pixel_values: pixel_values))

  function_to_apply =
    if @model.config[:model_type] == "siglip"
      ->(batch) { Utils.sigmoid(batch) }
    else
      ->(batch) { Utils.softmax(batch) }
    end

  # Compare each image with each candidate label
  to_return = []
  output[0].each do |batch|
    # Compute softmax per image
    probs = function_to_apply.(batch)

    result = probs
      .map.with_index { |x, i| {label: candidate_labels[i], score: x} }
      .sort_by { |v| -v[:score] }

    to_return << result
  end

  is_batched ? to_return : to_return[0]
end