Class: Whisper::Model

Inherits:
Object
  • Object
show all
Defined in:
lib/whisper/model.rb

Defined Under Namespace

Classes: TranscriptionResult

Instance Method Summary collapse

Constructor Details

#initialize(model_path) ⇒ Model

Returns a new instance of Model.



8
9
10
11
12
13
14
# File 'lib/whisper/model.rb', line 8

def initialize(model_path)
  @model_path = model_path
  @ctx = nil
  @state = nil
  init_whisper_context
  init_whisper_state
end

Instance Method Details

#closeObject



46
47
48
49
# File 'lib/whisper/model.rb', line 46

def close
  Whisper.whisper_free_state @state unless @state.nil?
  Whisper.whisper_free @ctx unless @ctx.nil?
end

#transcribe_from_audio_data(audio_data, format: 'plaintext', **params) ⇒ Object



22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
# File 'lib/whisper/model.rb', line 22

def transcribe_from_audio_data(audio_data, format: 'plaintext', **params)
  # Prepare full params
  full_params = default_full_params params

  # Prepare audio data pointer
  n_samples = audio_data.size
  samples_ptr = FFI::MemoryPointer.new :float, n_samples
  samples_ptr.write_array_of_float audio_data

  # Call the whisper_full_with_state function
  result = Whisper.whisper_full_with_state @ctx, @state, full_params, samples_ptr, n_samples
  raise 'Transcription failed' if result != 0

  # Retrieve detected language
  lang_id = Whisper.whisper_full_lang_id_from_state @state
  language = Whisper.whisper_lang_str lang_id

  # Retrieve the transcription output
  n_segments = Whisper.whisper_full_n_segments_from_state @state
  output = format_transcription format, n_segments: n_segments

  TranscriptionResult.new language, output
end

#transcribe_from_file(audio_file_path, format: 'plaintext', **params) ⇒ Object



16
17
18
19
20
# File 'lib/whisper/model.rb', line 16

def transcribe_from_file(audio_file_path, format: 'plaintext', **params)
  # Load audio file and convert to float array
  audio_data = Whisper::AudioProcessor.convert_to_float_array audio_file_path
  transcribe_from_audio_data audio_data, format: format, **params
end