Method: Langchain::LLM::Ollama#complete

Defined in:: lib/langchain/llm/ollama.rb

#complete(prompt:, model: defaults[:completion_model], images: nil, format: nil, system: nil, template: nil, context: nil, raw: nil, mirostat: nil, mirostat_eta: nil, mirostat_tau: nil, num_ctx: nil, num_gqa: nil, num_gpu: nil, num_thread: nil, repeat_last_n: nil, repeat_penalty: nil, temperature: defaults[:temperature], seed: nil, stop: nil, tfs_z: nil, num_predict: nil, top_k: nil, top_p: nil, stop_sequences: nil, &block) ⇒ `Langchain::LLM::OllamaResponse`

Generate the completion for a given prompt

Example:

final_resp = ollama.complete(prompt:) { |resp| print resp.completion }
final_resp.total_tokens

Parameters:

prompt (String) —

The prompt to complete
model (String) (defaults to: defaults[:completion_model]) —

The model to use For a list of valid parameters and values, see: github.com/jmorganca/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values
block (Hash) —

a customizable set of options

Returns:

(Langchain::LLM::OllamaResponse) —

Response object

# File 'lib/langchain/llm/ollama.rb', line 81

def complete(
  prompt:,
  model: defaults[:completion_model],
  images: nil,
  format: nil,
  system: nil,
  template: nil,
  context: nil,
  raw: nil,
  mirostat: nil,
  mirostat_eta: nil,
  mirostat_tau: nil,
  num_ctx: nil,
  num_gqa: nil,
  num_gpu: nil,
  num_thread: nil,
  repeat_last_n: nil,
  repeat_penalty: nil,
  temperature: defaults[:temperature],
  seed: nil,
  stop: nil,
  tfs_z: nil,
  num_predict: nil,
  top_k: nil,
  top_p: nil,
  stop_sequences: nil,
  &block
)
  if stop_sequences
    stop = stop_sequences
  end

  parameters = {
    prompt: prompt,
    model: model,
    images: images,
    format: format,
    system: system,
    template: template,
    context: context,
    stream: block_given?, # rubocop:disable Performance/BlockGivenWithExplicitBlock
    raw: raw
  }.compact

  llm_parameters = {
    mirostat: mirostat,
    mirostat_eta: mirostat_eta,
    mirostat_tau: mirostat_tau,
    num_ctx: num_ctx,
    num_gqa: num_gqa,
    num_gpu: num_gpu,
    num_thread: num_thread,
    repeat_last_n: repeat_last_n,
    repeat_penalty: repeat_penalty,
    temperature: temperature,
    seed: seed,
    stop: stop,
    tfs_z: tfs_z,
    num_predict: num_predict,
    top_k: top_k,
    top_p: top_p
  }

  parameters[:options] = llm_parameters.compact
  responses_stream = []

  client.post("api/generate", parameters) do |req|
    req.options.on_data = json_responses_chunk_handler do |parsed_chunk|
      responses_stream << parsed_chunk

      block&.call(OllamaResponse.new(parsed_chunk, model: parameters[:model]))
    end
  end

  generate_final_completion_response(responses_stream, parameters[:model])
end