Class: Langchain::Vectorsearch::Chroma

# File 'lib/langchain/vectorsearch/chroma.rb', line 19

def initialize(url:, index_name:, llm:)
  depends_on "chroma-db"

  ::Chroma.connect_host = url
  ::Chroma.logger = Langchain.logger
  ::Chroma.log_level = Langchain.logger.level

  @index_name = index_name

  super(llm: llm)
end

Instance Method Details

#add_texts(texts:, ids: [], metadatas: []) ⇒ `Hash`

Add a list of texts to the index

Parameters:

texts (Array<String>) —

The list of texts to add
ids (Array<String>) (defaults to: []) —

The list of ids to use for the texts (optional)
metadatas (Array<Hash>) (defaults to: []) —

The list of metadata to use for the texts (optional)

Returns:

(Hash) —

The response from the server

# File 'lib/langchain/vectorsearch/chroma.rb', line 36

def add_texts(texts:, ids: [], metadatas: [])
  embeddings = Array(texts).map.with_index do |text, i|
    ::Chroma::Resources::Embedding.new(
      id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
      embedding: llm.embed(text: text).embedding,
      metadata: metadatas[i] || {},
      document: text # Do we actually need to store the whole original document?
    )
  end

  collection = ::Chroma::Resources::Collection.get(index_name)
  collection.add(embeddings)
end

#ask(question:, k: 4) {|String| ... } ⇒ `String`

Ask a question and return the answer

Parameters:

question (String) —

The question to ask
k (Integer) (defaults to: 4) —

The number of results to have in context

Yields:

(String) —

Stream responses back one String at a time

Returns:

(String) —

The answer to the question

# File 'lib/langchain/vectorsearch/chroma.rb', line 128

def ask(question:, k: 4, &block)
  search_results = similarity_search(query: question, k: k)

  context = search_results.map do |result|
    result.document
  end

  context = context.join("\n---\n")

  prompt = generate_rag_prompt(question: question, context: context)

  messages = [{role: "user", content: prompt}]
  response = llm.chat(messages: messages, &block)

  response.context = context
  response
end

#create_default_schema ⇒ `::Chroma::Resources::Collection`

Create the collection with the default schema

Returns:

(::Chroma::Resources::Collection) —

Created collection


74
75
76

# File 'lib/langchain/vectorsearch/chroma.rb', line 74

def create_default_schema
  ::Chroma::Resources::Collection.create(index_name)
end

#destroy_default_schema ⇒ `bool`

Delete the default schema

Returns:

(bool) —

Success or failure


86
87
88

# File 'lib/langchain/vectorsearch/chroma.rb', line 86

def destroy_default_schema
  ::Chroma::Resources::Collection.delete(index_name)
end

#get_default_schema ⇒ `::Chroma::Resources::Collection`

Get the default schema

Returns:

(::Chroma::Resources::Collection) —

Default schema


80
81
82

# File 'lib/langchain/vectorsearch/chroma.rb', line 80

def get_default_schema
  ::Chroma::Resources::Collection.get(index_name)
end

#remove_texts(ids:) ⇒ `Hash`

Remove a list of texts from the index

Parameters:

ids (Array<String>) —

The list of ids to remove

Returns:

(Hash) —

The response from the server

# File 'lib/langchain/vectorsearch/chroma.rb', line 66

def remove_texts(ids:)
  collection.delete(
    ids: ids.map(&:to_s)
  )
end

#similarity_search(query:, k: 4) ⇒ `Chroma::Resources::Embedding`

Search for similar texts

Parameters:

query (String) —

The text to search for
k (Integer) (defaults to: 4) —

The number of results to return

Returns:

(Chroma::Resources::Embedding) —

The response from the server

# File 'lib/langchain/vectorsearch/chroma.rb', line 94

def similarity_search(
  query:,
  k: 4
)
  embedding = llm.embed(text: query).embedding

  similarity_search_by_vector(
    embedding: embedding,
    k: k
  )
end

#similarity_search_by_vector(embedding:, k: 4) ⇒ `Chroma::Resources::Embedding`

Search for similar texts by embedding

Parameters:

embedding (Array<Float>) —

The embedding to search for
k (Integer) (defaults to: 4) —

The number of results to return

Returns:

(Chroma::Resources::Embedding) —

The response from the server

# File 'lib/langchain/vectorsearch/chroma.rb', line 110

def similarity_search_by_vector(
  embedding:,
  k: 4
)
  # Requesting more results than the number of documents in the collection currently throws an error in Chroma DB
  # Temporary fix inspired by this comment: https://github.com/chroma-core/chroma/issues/301#issuecomment-1520494512
  count = collection.count
  n_results = [count, k].min

  # workaround mentioned here: https://github.com/mariochavez/chroma/issues/29
  collection.query(query_embeddings: [embedding], results: n_results, where: nil, where_document: nil)
end

#update_texts(texts:, ids:, metadatas: []) ⇒ `Object`

# File 'lib/langchain/vectorsearch/chroma.rb', line 50

def update_texts(texts:, ids:, metadatas: [])
  embeddings = Array(texts).map.with_index do |text, i|
    ::Chroma::Resources::Embedding.new(
      id: ids[i].to_s,
      embedding: llm.embed(text: text).embedding,
      metadata: metadatas[i] || {},
      document: text # Do we actually need to store the whole original document?
    )
  end

  collection.update(embeddings)
end

Class: Langchain::Vectorsearch::Chroma

Constant Summary

Constants inherited from Base

Instance Attribute Summary

Attributes inherited from Base

Instance Method Summary collapse

Methods inherited from Base

Methods included from DependencyHelper

Constructor Details

#initialize(url:, index_name:, llm:) ⇒ Chroma

Instance Method Details

#add_texts(texts:, ids: [], metadatas: []) ⇒ Hash

#ask(question:, k: 4) {|String| ... } ⇒ String

#create_default_schema ⇒ ::Chroma::Resources::Collection

#destroy_default_schema ⇒ bool

#get_default_schema ⇒ ::Chroma::Resources::Collection

#remove_texts(ids:) ⇒ Hash

#similarity_search(query:, k: 4) ⇒ Chroma::Resources::Embedding

#similarity_search_by_vector(embedding:, k: 4) ⇒ Chroma::Resources::Embedding

#update_texts(texts:, ids:, metadatas: []) ⇒ Object

#initialize(url:, index_name:, llm:) ⇒ `Chroma`

#add_texts(texts:, ids: [], metadatas: []) ⇒ `Hash`

#ask(question:, k: 4) {|String| ... } ⇒ `String`

#create_default_schema ⇒ `::Chroma::Resources::Collection`

#destroy_default_schema ⇒ `bool`

#get_default_schema ⇒ `::Chroma::Resources::Collection`

#remove_texts(ids:) ⇒ `Hash`

#similarity_search(query:, k: 4) ⇒ `Chroma::Resources::Embedding`

#similarity_search_by_vector(embedding:, k: 4) ⇒ `Chroma::Resources::Embedding`

#update_texts(texts:, ids:, metadatas: []) ⇒ `Object`