Class: Whisper::Model
- Inherits:
-
Object
- Object
- Whisper::Model
- Defined in:
- lib/whisper/model.rb
Defined Under Namespace
Classes: TranscriptionResult
Instance Method Summary collapse
- #close ⇒ Object
-
#initialize(model_path) ⇒ Model
constructor
A new instance of Model.
- #transcribe_from_file(audio_file_path, format: 'plaintext') ⇒ Object
Constructor Details
#initialize(model_path) ⇒ Model
Returns a new instance of Model.
9 10 11 12 13 14 15 16 17 |
# File 'lib/whisper/model.rb', line 9 def initialize model_path params = Whisper.whisper_context_default_params # Modify params as needed params[:use_gpu] = true params[:gpu_device] = 0 @ctx = Whisper.whisper_init_from_file_with_params model_path, params raise 'Failed to initialize Whisper model' if @ctx.null? end |
Instance Method Details
#close ⇒ Object
67 68 69 |
# File 'lib/whisper/model.rb', line 67 def close Whisper.whisper_free @ctx end |
#transcribe_from_file(audio_file_path, format: 'plaintext') ⇒ Object
19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 |
# File 'lib/whisper/model.rb', line 19 def transcribe_from_file(audio_file_path, format: 'plaintext') # Load audio file and convert to float array audio_data = Whisper::AudioProcessor.convert_to_float_array audio_file_path # Prepare full params params = Whisper.whisper_full_default_params Whisper::WHISPER_SAMPLING_GREEDY params[:n_threads] = 4 params[:translate] = false params[:language] = FFI::Pointer::NULL # Auto-detect language # Prepare audio data pointer n_samples = audio_data.size samples_ptr = FFI::MemoryPointer.new(:float, n_samples) samples_ptr.write_array_of_float audio_data # Call the whisper_full function result = Whisper.whisper_full @ctx, params, samples_ptr, n_samples raise 'Transcription failed' if result != 0 # Retrieve detected language lang_id = Whisper.whisper_full_lang_id(@ctx) language = Whisper.whisper_lang_str(lang_id) n_segments = Whisper.whisper_full_n_segments @ctx output = '' case format.downcase when 'plaintext' n_segments.times do |i| segment_text = Whisper.whisper_full_get_segment_text @ctx, i output += segment_text end when 'srt' n_segments.times do |i| start_time = Whisper.whisper_full_get_segment_t0(@ctx, i) / 100.0 end_time = Whisper.whisper_full_get_segment_t1(@ctx, i) / 100.0 segment_text = Whisper.whisper_full_get_segment_text @ctx, i output += "#{i + 1}\n" output += "#{format_time_srt start_time} --> #{format_time_srt end_time}\n" output += "#{segment_text.strip}\n\n" end else raise "Unsupported format: #{format}" end TranscriptionResult.new language, output end |