Class: Langchain::Vectorsearch::Chroma

Inherits:
Base
  • Object
show all
Defined in:
lib/langchain/vectorsearch/chroma.rb

Constant Summary

Constants inherited from Base

Base::DEFAULT_METRIC

Instance Attribute Summary

Attributes inherited from Base

#client, #index_name, #llm

Instance Method Summary collapse

Methods inherited from Base

#add_data, #generate_hyde_prompt, #generate_rag_prompt, logger_options, #similarity_search_with_hyde

Methods included from DependencyHelper

#depends_on

Constructor Details

#initialize(url:, index_name:, llm:) ⇒ Chroma

Initialize the Chroma client

Parameters:

  • url (String)

    The URL of the Chroma server

  • index_name (String)

    The name of the index to use

  • llm (Object)

    The LLM client to use



19
20
21
22
23
24
25
26
27
28
29
# File 'lib/langchain/vectorsearch/chroma.rb', line 19

def initialize(url:, index_name:, llm:)
  depends_on "chroma-db"

  ::Chroma.connect_host = url
  ::Chroma.logger = Langchain.logger
  ::Chroma.log_level = Langchain.logger.level

  @index_name = index_name

  super(llm: llm)
end

Instance Method Details

#add_texts(texts:, ids: [], metadatas: []) ⇒ Hash

Add a list of texts to the index

Parameters:

  • texts (Array<String>)

    The list of texts to add

  • ids (Array<String>) (defaults to: [])

    The list of ids to use for the texts (optional)

  • metadatas (Array<Hash>) (defaults to: [])

    The list of metadata to use for the texts (optional)

Returns:

  • (Hash)

    The response from the server



36
37
38
39
40
41
42
43
44
45
46
47
48
# File 'lib/langchain/vectorsearch/chroma.rb', line 36

def add_texts(texts:, ids: [], metadatas: [])
  embeddings = Array(texts).map.with_index do |text, i|
    ::Chroma::Resources::Embedding.new(
      id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
      embedding: llm.embed(text: text).embedding,
      metadata: metadatas[i] || {},
      document: text # Do we actually need to store the whole original document?
    )
  end

  collection = ::Chroma::Resources::Collection.get(index_name)
  collection.add(embeddings)
end

#ask(question:, k: 4) {|String| ... } ⇒ String

Ask a question and return the answer

Parameters:

  • question (String)

    The question to ask

  • k (Integer) (defaults to: 4)

    The number of results to have in context

Yields:

  • (String)

    Stream responses back one String at a time

Returns:

  • (String)

    The answer to the question



125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
# File 'lib/langchain/vectorsearch/chroma.rb', line 125

def ask(question:, k: 4, &block)
  search_results = similarity_search(query: question, k: k)

  context = search_results.map do |result|
    result.document
  end

  context = context.join("\n---\n")

  prompt = generate_rag_prompt(question: question, context: context)

  messages = [{role: "user", content: prompt}]
  response = llm.chat(messages: messages, &block)

  response.context = context
  response
end

#create_default_schema::Chroma::Resources::Collection

Create the collection with the default schema

Returns:

  • (::Chroma::Resources::Collection)

    Created collection



72
73
74
# File 'lib/langchain/vectorsearch/chroma.rb', line 72

def create_default_schema
  ::Chroma::Resources::Collection.create(index_name)
end

#destroy_default_schemabool

Delete the default schema

Returns:

  • (bool)

    Success or failure



84
85
86
# File 'lib/langchain/vectorsearch/chroma.rb', line 84

def destroy_default_schema
  ::Chroma::Resources::Collection.delete(index_name)
end

#get_default_schema::Chroma::Resources::Collection

Get the default schema

Returns:

  • (::Chroma::Resources::Collection)

    Default schema



78
79
80
# File 'lib/langchain/vectorsearch/chroma.rb', line 78

def get_default_schema
  ::Chroma::Resources::Collection.get(index_name)
end

#remove_texts(ids:) ⇒ Hash

Remove a list of texts from the index

Parameters:

  • ids (Array<String>)

    The list of ids to remove

Returns:

  • (Hash)

    The response from the server



66
67
68
# File 'lib/langchain/vectorsearch/chroma.rb', line 66

def remove_texts(ids:)
  collection.delete(ids)
end

#similarity_search(query:, k: 4) ⇒ Chroma::Resources::Embedding

Search for similar texts

Parameters:

  • query (String)

    The text to search for

  • k (Integer) (defaults to: 4)

    The number of results to return

Returns:

  • (Chroma::Resources::Embedding)

    The response from the server



92
93
94
95
96
97
98
99
100
101
102
# File 'lib/langchain/vectorsearch/chroma.rb', line 92

def similarity_search(
  query:,
  k: 4
)
  embedding = llm.embed(text: query).embedding

  similarity_search_by_vector(
    embedding: embedding,
    k: k
  )
end

#similarity_search_by_vector(embedding:, k: 4) ⇒ Chroma::Resources::Embedding

Search for similar texts by embedding

Parameters:

  • embedding (Array<Float>)

    The embedding to search for

  • k (Integer) (defaults to: 4)

    The number of results to return

Returns:

  • (Chroma::Resources::Embedding)

    The response from the server



108
109
110
111
112
113
114
115
116
117
118
# File 'lib/langchain/vectorsearch/chroma.rb', line 108

def similarity_search_by_vector(
  embedding:,
  k: 4
)
  # Requesting more results than the number of documents in the collection currently throws an error in Chroma DB
  # Temporary fix inspired by this comment: https://github.com/chroma-core/chroma/issues/301#issuecomment-1520494512
  count = collection.count
  n_results = [count, k].min

  collection.query(query_embeddings: [embedding], results: n_results)
end

#update_texts(texts:, ids:, metadatas: []) ⇒ Object



50
51
52
53
54
55
56
57
58
59
60
61
# File 'lib/langchain/vectorsearch/chroma.rb', line 50

def update_texts(texts:, ids:, metadatas: [])
  embeddings = Array(texts).map.with_index do |text, i|
    ::Chroma::Resources::Embedding.new(
      id: ids[i].to_s,
      embedding: llm.embed(text: text).embedding,
      metadata: metadatas[i] || {},
      document: text # Do we actually need to store the whole original document?
    )
  end

  collection.update(embeddings)
end