Class: Langchain::LLM::Ollama

Inherits:
Base
  • Object
show all
Defined in:
lib/langchain/llm/ollama.rb

Overview

Interface to Ollama API. Available models: ollama.ai/library

Usage:

llm = Langchain::LLM::Ollama.new
llm = Langchain::LLM::Ollama.new(url: ENV["OLLAMA_URL"], default_options: {})

Constant Summary collapse

DEFAULTS =
{
  temperature: 0.8,
  completion_model_name: "llama3",
  embeddings_model_name: "llama3",
  chat_completion_model_name: "llama3"
}.freeze
EMBEDDING_SIZES =
{
  codellama: 4_096,
  "dolphin-mixtral": 4_096,
  llama2: 4_096,
  llama3: 4_096,
  llava: 4_096,
  mistral: 4_096,
  "mistral-openorca": 4_096,
  mixtral: 4_096
}.freeze

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods inherited from Base

#chat_parameters, #default_dimension

Methods included from DependencyHelper

#depends_on

Constructor Details

#initialize(url: "http://localhost:11434", default_options: {}) ⇒ Ollama

Initialize the Ollama client

Parameters:

  • url (String) (defaults to: "http://localhost:11434")

    The URL of the Ollama instance

  • default_options (Hash) (defaults to: {})

    The default options to use



38
39
40
41
42
43
44
45
46
47
48
49
# File 'lib/langchain/llm/ollama.rb', line 38

def initialize(url: "http://localhost:11434", default_options: {})
  depends_on "faraday"
  @url = url
  @defaults = DEFAULTS.deep_merge(default_options)
  chat_parameters.update(
    model: {default: @defaults[:chat_completion_model_name]},
    temperature: {default: @defaults[:temperature]},
    template: {},
    stream: {default: false}
  )
  chat_parameters.remap(response_format: :format)
end

Instance Attribute Details

#defaultsObject (readonly)

Returns the value of attribute defaults.



14
15
16
# File 'lib/langchain/llm/ollama.rb', line 14

def defaults
  @defaults
end

#urlObject (readonly)

Returns the value of attribute url.



14
15
16
# File 'lib/langchain/llm/ollama.rb', line 14

def url
  @url
end

Instance Method Details

#chat(params = {}) ⇒ Object

Generate a chat completion

The message object has the following fields:

role: the role of the message, either system, user or assistant
content: the content of the message
images (optional): a list of images to include in the message (for multimodal models such as llava)

Parameters:

  • params (Hash) (defaults to: {})

    unified chat parmeters from [Langchain::LLM::Parameters::Chat::SCHEMA]

Options Hash (params):

  • :model (String)

    Model name

  • :messages (Array<Hash>)

    Array of messages

  • :format (String)

    Format to return a response in. Currently the only accepted value is ‘json`

  • :temperature (Float)

    The temperature to use

  • :template (String)

    The prompt template to use (overrides what is defined in the ‘Modelfile`)

  • :stream (Boolean)

    Streaming the response. If false the response will be returned as a single response object, rather than a stream of objects



174
175
176
177
178
179
180
181
182
# File 'lib/langchain/llm/ollama.rb', line 174

def chat(params = {})
  parameters = chat_parameters.to_params(params)

  response = client.post("api/chat") do |req|
    req.body = parameters
  end

  Langchain::LLM::OllamaResponse.new(response.body, model: parameters[:model])
end

#complete(prompt:, model: defaults[:completion_model_name], images: nil, format: nil, system: nil, template: nil, context: nil, stream: nil, raw: nil, mirostat: nil, mirostat_eta: nil, mirostat_tau: nil, num_ctx: nil, num_gqa: nil, num_gpu: nil, num_thread: nil, repeat_last_n: nil, repeat_penalty: nil, temperature: defaults[:temperature], seed: nil, stop: nil, tfs_z: nil, num_predict: nil, top_k: nil, top_p: nil, stop_sequences: nil, &block) ⇒ Langchain::LLM::OllamaResponse

Generate the completion for a given prompt

Parameters:

Returns:



70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/langchain/llm/ollama.rb', line 70

def complete(
  prompt:,
  model: defaults[:completion_model_name],
  images: nil,
  format: nil,
  system: nil,
  template: nil,
  context: nil,
  stream: nil,
  raw: nil,
  mirostat: nil,
  mirostat_eta: nil,
  mirostat_tau: nil,
  num_ctx: nil,
  num_gqa: nil,
  num_gpu: nil,
  num_thread: nil,
  repeat_last_n: nil,
  repeat_penalty: nil,
  temperature: defaults[:temperature],
  seed: nil,
  stop: nil,
  tfs_z: nil,
  num_predict: nil,
  top_k: nil,
  top_p: nil,
  stop_sequences: nil,
  &block
)
  if stop_sequences
    stop = stop_sequences
  end

  parameters = {
    prompt: prompt,
    model: model,
    images: images,
    format: format,
    system: system,
    template: template,
    context: context,
    stream: stream,
    raw: raw
  }.compact

  llm_parameters = {
    mirostat: mirostat,
    mirostat_eta: mirostat_eta,
    mirostat_tau: mirostat_tau,
    num_ctx: num_ctx,
    num_gqa: num_gqa,
    num_gpu: num_gpu,
    num_thread: num_thread,
    repeat_last_n: repeat_last_n,
    repeat_penalty: repeat_penalty,
    temperature: temperature,
    seed: seed,
    stop: stop,
    tfs_z: tfs_z,
    num_predict: num_predict,
    top_k: top_k,
    top_p: top_p
  }

  parameters[:options] = llm_parameters.compact

  response = ""

  client.post("api/generate") do |req|
    req.body = parameters

    req.options.on_data = proc do |chunk, size|
      chunk.split("\n").each do |line_chunk|
        json_chunk = begin
          JSON.parse(line_chunk)
        # In some instance the chunk exceeds the buffer size and the JSON parser fails
        rescue JSON::ParserError
          nil
        end

        response += json_chunk.dig("response") unless json_chunk.blank?
      end

      yield json_chunk, size if block
    end
  end

  Langchain::LLM::OllamaResponse.new(response, model: parameters[:model])
end

#default_dimensionsInteger

Returns the # of vector dimensions for the embeddings

Returns:

  • (Integer)

    The # of vector dimensions



53
54
55
56
57
58
59
# File 'lib/langchain/llm/ollama.rb', line 53

def default_dimensions
  # since Ollama can run multiple models, look it up or generate an embedding and return the size
  @default_dimensions ||=
    EMBEDDING_SIZES.fetch(defaults[:embeddings_model_name].to_sym) do
      embed(text: "test").embedding.size
    end
end

#embed(text:, model: , mirostat: nil, mirostat_eta: nil, mirostat_tau: nil, num_ctx: nil, num_gqa: nil, num_gpu: nil, num_thread: nil, repeat_last_n: nil, repeat_penalty: nil, temperature: , seed: nil, stop: nil, tfs_z: nil, num_predict: nil, top_k: nil, top_p: nil) ⇒ Langchain::LLM::OllamaResponse

Generate an embedding for a given text

Parameters:

  • text (String)

    The text to generate an embedding for

  • model (String) (defaults to: )

    The model to use

  • options (Hash)

    The options to use

Returns:



192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
# File 'lib/langchain/llm/ollama.rb', line 192

def embed(
  text:,
  model: defaults[:embeddings_model_name],
  mirostat: nil,
  mirostat_eta: nil,
  mirostat_tau: nil,
  num_ctx: nil,
  num_gqa: nil,
  num_gpu: nil,
  num_thread: nil,
  repeat_last_n: nil,
  repeat_penalty: nil,
  temperature: defaults[:temperature],
  seed: nil,
  stop: nil,
  tfs_z: nil,
  num_predict: nil,
  top_k: nil,
  top_p: nil
)
  parameters = {
    prompt: text,
    model: model
  }.compact

  llm_parameters = {
    mirostat: mirostat,
    mirostat_eta: mirostat_eta,
    mirostat_tau: mirostat_tau,
    num_ctx: num_ctx,
    num_gqa: num_gqa,
    num_gpu: num_gpu,
    num_thread: num_thread,
    repeat_last_n: repeat_last_n,
    repeat_penalty: repeat_penalty,
    temperature: temperature,
    seed: seed,
    stop: stop,
    tfs_z: tfs_z,
    num_predict: num_predict,
    top_k: top_k,
    top_p: top_p
  }

  parameters[:options] = llm_parameters.compact

  response = client.post("api/embeddings") do |req|
    req.body = parameters
  end

  Langchain::LLM::OllamaResponse.new(response.body, model: parameters[:model])
end

#summarize(text:) ⇒ String

Generate a summary for a given text

Parameters:

  • text (String)

    The text to generate a summary for

Returns:

  • (String)

    The summary



249
250
251
252
253
254
255
256
# File 'lib/langchain/llm/ollama.rb', line 249

def summarize(text:)
  prompt_template = Langchain::Prompt.load_from_path(
    file_path: Langchain.root.join("langchain/llm/prompts/ollama/summarize_template.yaml")
  )
  prompt = prompt_template.format(text: text)

  complete(prompt: prompt)
end