Module: RubyLLM::Providers::OpenAI::Transcription

Included in:: RubyLLM::Providers::OpenAI

Defined in:: lib/ruby_llm/providers/openai/transcription.rb

Overview

Audio transcription methods for the OpenAI API integration

Class Method Summary collapse

Class Method Details

.encode_speaker_references(references) ⇒ `Object`

# File 'lib/ruby_llm/providers/openai/transcription.rb', line 29

def encode_speaker_references(references)
  return nil unless references

  references.map do |ref|
    Attachment.new(ref).for_llm
  end
end

.parse_transcription_response(response, model:) ⇒ `Object`

# File 'lib/ruby_llm/providers/openai/transcription.rb', line 50

def parse_transcription_response(response, model:)
  data = response.body

  return RubyLLM::Transcription.new(text: data, model: model) if data.is_a?(String)

  usage = data['usage'] || {}

  RubyLLM::Transcription.new(
    text: data['text'],
    model: model,
    language: data['language'],
    duration: data['duration'],
    segments: data['segments'],
    input_tokens: usage['input_tokens'] || usage['prompt_tokens'],
    output_tokens: usage['output_tokens'] || usage['completion_tokens']
  )
end

.render_transcription_payload(file_part, model:, language:, **options) ⇒ `Object`

# File 'lib/ruby_llm/providers/openai/transcription.rb', line 14

def render_transcription_payload(file_part, model:, language:, **options)
  {
    model: model,
    file: file_part,
    language: language,
    chunking_strategy: (options[:chunking_strategy] || 'auto' if supports_chunking_strategy?(model, options)),
    response_format: response_format_for(model, options),
    prompt: options[:prompt],
    temperature: options[:temperature],
    timestamp_granularities: options[:timestamp_granularities],
    known_speaker_names: options[:speaker_names],
    known_speaker_references: encode_speaker_references(options[:speaker_references])
  }.compact
end

.response_format_for(model, options) ⇒ `Object`

# File 'lib/ruby_llm/providers/openai/transcription.rb', line 37

def response_format_for(model, options)
  return options[:response_format] if options.key?(:response_format)

  'diarized_json' if model.include?('diarize')
end

.supports_chunking_strategy?(model, options) ⇒ `Boolean`

# File 'lib/ruby_llm/providers/openai/transcription.rb', line 43

def supports_chunking_strategy?(model, options)
  return false if model.start_with?('whisper')
  return true if options.key?(:chunking_strategy)

  model.include?('diarize')
end

.transcription_url ⇒ `Object`



10
11
12

# File 'lib/ruby_llm/providers/openai/transcription.rb', line 10

def transcription_url
  'audio/transcriptions'
end

Module: RubyLLM::Providers::OpenAI::Transcription

Overview

Class Method Summary collapse

Class Method Details

.encode_speaker_references(references) ⇒ Object

.parse_transcription_response(response, model:) ⇒ Object

.render_transcription_payload(file_part, model:, language:, **options) ⇒ Object

.response_format_for(model, options) ⇒ Object

.supports_chunking_strategy?(model, options) ⇒ Boolean

.transcription_url ⇒ Object

.encode_speaker_references(references) ⇒ `Object`

.parse_transcription_response(response, model:) ⇒ `Object`

.render_transcription_payload(file_part, model:, language:, **options) ⇒ `Object`

.response_format_for(model, options) ⇒ `Object`

.supports_chunking_strategy?(model, options) ⇒ `Boolean`

.transcription_url ⇒ `Object`