Class: Informers::ZeroShotAudioClassificationPipeline

Inherits:
Pipeline
  • Object
show all
Defined in:
lib/informers/pipelines.rb

Instance Method Summary collapse

Methods inherited from Pipeline

#initialize

Constructor Details

This class inherits a constructor from Informers::Pipeline

Instance Method Details

#call(audio, candidate_labels, hypothesis_template: "This is a sound of {}.") ⇒ Object



931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
# File 'lib/informers/pipelines.rb', line 931

def call(audio, candidate_labels, hypothesis_template: "This is a sound of {}.")
  single = !audio.is_a?(Array)
  if single
    audio = [audio]
  end

  # Insert label into hypothesis template
  texts = candidate_labels.map { |x| hypothesis_template.sub("{}", x) }

  # Run tokenization
  text_inputs =
    @tokenizer.(
      texts,
      padding: true,
      truncation: true
    )

  sampling_rate = @processor.feature_extractor.config["sampling_rate"]
  prepared_audios = prepare_audios(audio, sampling_rate)

  to_return = []
  prepared_audios.each do |aud|
    audio_inputs = @processor.(aud)

    # Run model with both text and audio inputs
    output = @model.(text_inputs.merge(audio_inputs))

    # Compute softmax per audio
    probs = Utils.softmax(output.logits_per_audio.data)

    to_return <<
      probs.map.with_index do |x, i|
        {
          label: candidate_labels[i],
          score: x
        }
      end
  end
  single ? to_return[0] : to_return
end