Class: Langchain::Vectorsearch::Weaviate

# File 'lib/langchain/vectorsearch/weaviate.rb', line 20

def initialize(url:, index_name:, llm:, api_key: nil)
  depends_on "weaviate-ruby", req: "weaviate"

  @client = ::Weaviate::Client.new(
    url: url,
    api_key: api_key,
    logger: Langchain.logger
  )

  # Weaviate requires the class name to be Capitalized: https://weaviate.io/developers/weaviate/configuration/schema-configuration#create-a-class
  # TODO: Capitalize index_name
  @index_name = index_name

  super(llm: llm)
end

Instance Method Details

#add_texts(texts:, ids: []) ⇒ `Hash`

Add a list of texts to the index

Parameters:

texts (Array<String>) —

The list of texts to add

Returns:

(Hash) —

The response from the server

# File 'lib/langchain/vectorsearch/weaviate.rb', line 39

def add_texts(texts:, ids: [])
  client.objects.batch_create(
    objects: weaviate_objects(texts, ids)
  )
end

#ask(question:, k: 4) {|String| ... } ⇒ `Hash`

Ask a question and return the answer

Parameters:

question (String) —

The question to ask
k (Integer) (defaults to: 4) —

The number of results to have in context

Yields:

(String) —

Stream responses back one String at a time

Returns:

(Hash) —

The answer

# File 'lib/langchain/vectorsearch/weaviate.rb', line 147

def ask(question:, k: 4, &block)
  search_results = similarity_search(query: question, k: k)

  context = search_results.map do |result|
    result.dig("content").to_s
  end
  context = context.join("\n---\n")

  prompt = generate_rag_prompt(question: question, context: context)

  messages = [{role: "user", content: prompt}]
  response = llm.chat(messages: messages, &block)

  response.context = context
  response
end

#create_default_schema ⇒ `Hash`

Create default schema

Returns:

(Hash) —

The response from the server

# File 'lib/langchain/vectorsearch/weaviate.rb', line 93

def create_default_schema
  client.schema.create(
    class_name: index_name,
    vectorizer: "none",
    properties: [
      # __id to be used a pointer to the original document
      {dataType: ["string"], name: "__id"}, # '_id' is a reserved property name (single underscore)
      {dataType: ["text"], name: "content"}
    ]
  )
end

#destroy_default_schema ⇒ `Boolean`

Delete the index

Returns:

(Boolean) —

Whether the index was deleted



113
114
115

# File 'lib/langchain/vectorsearch/weaviate.rb', line 113

def destroy_default_schema
  client.schema.delete(class_name: index_name)
end

#get_default_schema ⇒ `Hash`

Get default schema

Returns:

(Hash) —

The response from the server



107
108
109

# File 'lib/langchain/vectorsearch/weaviate.rb', line 107

def get_default_schema
  client.schema.get(class_name: index_name)
end

#remove_texts(ids:) ⇒ `Hash`

Deletes a list of texts in the index

Parameters:

ids (Array) —

The ids of texts to delete

Returns:

(Hash) —

The response from the server

Raises:

(ArgumentError)

# File 'lib/langchain/vectorsearch/weaviate.rb', line 78

def remove_texts(ids:)
  raise ArgumentError, "ids must be an array" unless ids.is_a?(Array)

  client.objects.batch_delete(
    class_name: index_name,
    where: {
      path: ["__id"],
      operator: "ContainsAny",
      valueTextArray: ids
    }
  )
end

#similarity_search(query:, k: 4) ⇒ `Hash`

Return documents similar to the query

Parameters:

query (String) —

The query to search for
k (Integer|String) (defaults to: 4) —

The number of results to return

Returns:

(Hash) —

The search results

# File 'lib/langchain/vectorsearch/weaviate.rb', line 121

def similarity_search(query:, k: 4)
  embedding = llm.embed(text: query).embedding

  similarity_search_by_vector(embedding: embedding, k: k)
end

#similarity_search_by_vector(embedding:, k: 4) ⇒ `Hash`

Return documents similar to the vector

Parameters:

embedding (Array<Float>) —

The vector to search for
k (Integer|String) (defaults to: 4) —

The number of results to return

Returns:

(Hash) —

The search results

# File 'lib/langchain/vectorsearch/weaviate.rb', line 131

def similarity_search_by_vector(embedding:, k: 4)
  near_vector = "{ vector: #{embedding} }"

  client.query.get(
    class_name: index_name,
    near_vector: near_vector,
    limit: k.to_s,
    fields: "__id content _additional { id }"
  )
end

#update_texts(texts:, ids:) ⇒ `Hash`

Update a list of texts in the index

Parameters:

texts (Array<String>) —

The list of texts to update

Returns:

(Hash) —

The response from the server

# File 'lib/langchain/vectorsearch/weaviate.rb', line 48

def update_texts(texts:, ids:)
  uuids = []

  # Retrieve the UUIDs of the objects to update
  Array(texts).map.with_index do |text, i|
    record = client.query.get(
      class_name: index_name,
      fields: "_additional { id }",
      where: "{ path: [\"__id\"], operator: Equal, valueString: \"#{ids[i]}\" }"
    )
    uuids.push record[0].dig("_additional", "id")
  end

  # Update the objects
  texts.map.with_index do |text, i|
    client.objects.update(
      class_name: index_name,
      id: uuids[i],
      properties: {
        __id: ids[i].to_s,
        content: text
      },
      vector: llm.embed(text: text).embedding
    )
  end
end

Class: Langchain::Vectorsearch::Weaviate

Constant Summary

Constants inherited from Base

Instance Attribute Summary

Attributes inherited from Base

Instance Method Summary collapse

Methods inherited from Base

Methods included from DependencyHelper

Constructor Details

#initialize(url:, index_name:, llm:, api_key: nil) ⇒ Weaviate

Instance Method Details

#add_texts(texts:, ids: []) ⇒ Hash

#ask(question:, k: 4) {|String| ... } ⇒ Hash

#create_default_schema ⇒ Hash

#destroy_default_schema ⇒ Boolean

#get_default_schema ⇒ Hash

#remove_texts(ids:) ⇒ Hash

#similarity_search(query:, k: 4) ⇒ Hash

#similarity_search_by_vector(embedding:, k: 4) ⇒ Hash

#update_texts(texts:, ids:) ⇒ Hash

#initialize(url:, index_name:, llm:, api_key: nil) ⇒ `Weaviate`

#add_texts(texts:, ids: []) ⇒ `Hash`

#ask(question:, k: 4) {|String| ... } ⇒ `Hash`

#create_default_schema ⇒ `Hash`

#destroy_default_schema ⇒ `Boolean`

#get_default_schema ⇒ `Hash`

#remove_texts(ids:) ⇒ `Hash`

#similarity_search(query:, k: 4) ⇒ `Hash`

#similarity_search_by_vector(embedding:, k: 4) ⇒ `Hash`

#update_texts(texts:, ids:) ⇒ `Hash`