Class: Langchain::LLM::Ollama

Inherits:
Base
  • Object
show all
Defined in:
lib/langchain/llm/ollama.rb

Overview

Interface to Ollama API. Available models: ollama.ai/library

Usage:

llm = Langchain::LLM::Ollama.new(url: ENV["OLLAMA_URL"], default_options: {})

Constant Summary collapse

DEFAULTS =
{
  temperature: 0.0,
  completion_model: "llama3.1",
  embedding_model: "llama3.1",
  chat_model: "llama3.1",
  options: {}
}.freeze
EMBEDDING_SIZES =
{
  codellama: 4_096,
  "dolphin-mixtral": 4_096,
  llama2: 4_096,
  llama3: 4_096,
  "llama3.1": 4_096,
  llava: 4_096,
  mistral: 4_096,
  "mistral-openorca": 4_096,
  mixtral: 4_096,
  tinydolphin: 2_048
}.freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from Base

#chat_parameters, #default_dimension

Methods included from DependencyHelper

#depends_on

Constructor Details

#initialize(url: "http://localhost:11434", api_key: nil, default_options: {}) ⇒ Ollama

Initialize the Ollama client

Parameters:

  • url (String) (defaults to: "http://localhost:11434")

    The URL of the Ollama instance

  • api_key (String) (defaults to: nil)

    The API key to use. This is optional and used when you expose Ollama API using Open WebUI

  • default_options (Hash) (defaults to: {})

    The default options to use



39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
# File 'lib/langchain/llm/ollama.rb', line 39

def initialize(url: "http://localhost:11434", api_key: nil, default_options: {})
  depends_on "faraday"
  @url = url
  @api_key = api_key
  @defaults = DEFAULTS.merge(default_options)
  chat_parameters.update(
    model: {default: @defaults[:chat_model]},
    temperature: {default: @defaults[:temperature]},
    template: {},
    stream: {default: false},
    response_format: {default: @defaults[:response_format]},
    options: {default: @defaults[:options]}
  )
  chat_parameters.remap(response_format: :format)
end

Instance Attribute Details

#defaultsObject (readonly)

Returns the value of attribute defaults.



11
12
13
# File 'lib/langchain/llm/ollama.rb', line 11

def defaults
  @defaults
end

#urlObject (readonly)

Returns the value of attribute url.



11
12
13
# File 'lib/langchain/llm/ollama.rb', line 11

def url
  @url
end

Instance Method Details

#chat(messages:, model: nil, **params, &block) ⇒ Langchain::LLM::OllamaResponse

Generate a chat completion

Example:

final_resp = ollama.chat(messages:) { |resp| print resp.chat_completion }
final_resp.total_tokens

The message object has the following fields:

role: the role of the message, either system, user or assistant
content: the content of the message
images (optional): a list of images to include in the message (for multimodal models such as llava)

Parameters:

  • messages (Array)

    The chat messages

  • model (String) (defaults to: nil)

    The model to use

  • params (Hash)

    Unified chat parmeters from [Langchain::LLM::Parameters::Chat::SCHEMA]

  • block (Hash)

    a customizable set of options

Options Hash (**params):

  • :messages (Array<Hash>)

    Array of messages

  • :model (String)

    Model name

  • :format (String)

    Format to return a response in. Currently the only accepted value is ‘json`

  • :temperature (Float)

    The temperature to use

  • :template (String)

    The prompt template to use (overrides what is defined in the ‘Modelfile`)

Returns:



179
180
181
182
183
184
185
186
187
188
189
190
191
192
# File 'lib/langchain/llm/ollama.rb', line 179

def chat(messages:, model: nil, **params, &block)
  parameters = chat_parameters.to_params(params.merge(messages:, model:, stream: block_given?)) # rubocop:disable Performance/BlockGivenWithExplicitBlock
  responses_stream = []

  client.post("api/chat", parameters) do |req|
    req.options.on_data = json_responses_chunk_handler do |parsed_chunk|
      responses_stream << parsed_chunk

      block&.call(OllamaResponse.new(parsed_chunk, model: parameters[:model]))
    end
  end

  generate_final_chat_completion_response(responses_stream, parameters[:model])
end

#complete(prompt:, model: defaults[:completion_model], images: nil, format: nil, system: nil, template: nil, context: nil, raw: nil, mirostat: nil, mirostat_eta: nil, mirostat_tau: nil, num_ctx: nil, num_gqa: nil, num_gpu: nil, num_thread: nil, repeat_last_n: nil, repeat_penalty: nil, temperature: defaults[:temperature], seed: nil, stop: nil, tfs_z: nil, num_predict: nil, top_k: nil, top_p: nil, stop_sequences: nil, &block) ⇒ Langchain::LLM::OllamaResponse

Generate the completion for a given prompt

Example:

final_resp = ollama.complete(prompt:) { |resp| print resp.completion }
final_resp.total_tokens

Parameters:

Returns:



80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
# File 'lib/langchain/llm/ollama.rb', line 80

def complete(
  prompt:,
  model: defaults[:completion_model],
  images: nil,
  format: nil,
  system: nil,
  template: nil,
  context: nil,
  raw: nil,
  mirostat: nil,
  mirostat_eta: nil,
  mirostat_tau: nil,
  num_ctx: nil,
  num_gqa: nil,
  num_gpu: nil,
  num_thread: nil,
  repeat_last_n: nil,
  repeat_penalty: nil,
  temperature: defaults[:temperature],
  seed: nil,
  stop: nil,
  tfs_z: nil,
  num_predict: nil,
  top_k: nil,
  top_p: nil,
  stop_sequences: nil,
  &block
)
  if stop_sequences
    stop = stop_sequences
  end

  parameters = {
    prompt: prompt,
    model: model,
    images: images,
    format: format,
    system: system,
    template: template,
    context: context,
    stream: block_given?, # rubocop:disable Performance/BlockGivenWithExplicitBlock
    raw: raw
  }.compact

  llm_parameters = {
    mirostat: mirostat,
    mirostat_eta: mirostat_eta,
    mirostat_tau: mirostat_tau,
    num_ctx: num_ctx,
    num_gqa: num_gqa,
    num_gpu: num_gpu,
    num_thread: num_thread,
    repeat_last_n: repeat_last_n,
    repeat_penalty: repeat_penalty,
    temperature: temperature,
    seed: seed,
    stop: stop,
    tfs_z: tfs_z,
    num_predict: num_predict,
    top_k: top_k,
    top_p: top_p
  }

  parameters[:options] = llm_parameters.compact
  responses_stream = []

  client.post("api/generate", parameters) do |req|
    req.options.on_data = json_responses_chunk_handler do |parsed_chunk|
      responses_stream << parsed_chunk

      block&.call(OllamaResponse.new(parsed_chunk, model: parameters[:model]))
    end
  end

  generate_final_completion_response(responses_stream, parameters[:model])
end

#default_dimensionsInteger

Returns the # of vector dimensions for the embeddings

Returns:

  • (Integer)

    The # of vector dimensions



57
58
59
60
61
62
63
# File 'lib/langchain/llm/ollama.rb', line 57

def default_dimensions
  # since Ollama can run multiple models, look it up or generate an embedding and return the size
  @default_dimensions ||=
    EMBEDDING_SIZES.fetch(defaults[:embedding_model].to_sym) do
      embed(text: "test").embedding.size
    end
end

#embed(text:, model: , mirostat: nil, mirostat_eta: nil, mirostat_tau: nil, num_ctx: nil, num_gqa: nil, num_gpu: nil, num_thread: nil, repeat_last_n: nil, repeat_penalty: nil, temperature: , seed: nil, stop: nil, tfs_z: nil, num_predict: nil, top_k: nil, top_p: nil) ⇒ Langchain::LLM::OllamaResponse

Generate an embedding for a given text

Parameters:

  • text (String)

    The text to generate an embedding for

  • model (String) (defaults to: )

    The model to use

  • options (Hash)

    The options to use

Returns:



202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
# File 'lib/langchain/llm/ollama.rb', line 202

def embed(
  text:,
  model: defaults[:embedding_model],
  mirostat: nil,
  mirostat_eta: nil,
  mirostat_tau: nil,
  num_ctx: nil,
  num_gqa: nil,
  num_gpu: nil,
  num_thread: nil,
  repeat_last_n: nil,
  repeat_penalty: nil,
  temperature: defaults[:temperature],
  seed: nil,
  stop: nil,
  tfs_z: nil,
  num_predict: nil,
  top_k: nil,
  top_p: nil
)
  parameters = {
    model: model,
    input: Array(text)
  }.compact

  llm_parameters = {
    mirostat: mirostat,
    mirostat_eta: mirostat_eta,
    mirostat_tau: mirostat_tau,
    num_ctx: num_ctx,
    num_gqa: num_gqa,
    num_gpu: num_gpu,
    num_thread: num_thread,
    repeat_last_n: repeat_last_n,
    repeat_penalty: repeat_penalty,
    temperature: temperature,
    seed: seed,
    stop: stop,
    tfs_z: tfs_z,
    num_predict: num_predict,
    top_k: top_k,
    top_p: top_p
  }

  parameters[:options] = llm_parameters.compact

  response = client.post("api/embed") do |req|
    req.body = parameters
  end

  OllamaResponse.new(response.body, model: parameters[:model])
end

#summarize(text:) ⇒ String

Generate a summary for a given text

Parameters:

  • text (String)

    The text to generate a summary for

Returns:

  • (String)

    The summary



259
260
261
262
263
264
265
266
# File 'lib/langchain/llm/ollama.rb', line 259

def summarize(text:)
  prompt_template = Langchain::Prompt.load_from_path(
    file_path: Langchain.root.join("langchain/llm/prompts/ollama/summarize_template.yaml")
  )
  prompt = prompt_template.format(text: text)

  complete(prompt: prompt)
end