Class: Langchain::Vectorsearch::Weaviate

Inherits:
Base
  • Object
show all
Defined in:
lib/langchain/vectorsearch/weaviate.rb

Constant Summary

Constants inherited from Base

Base::DEFAULT_METRIC

Instance Attribute Summary

Attributes inherited from Base

#client, #index_name, #llm

Instance Method Summary collapse

Methods inherited from Base

#add_data, #generate_hyde_prompt, #generate_rag_prompt, #similarity_search_with_hyde

Methods included from DependencyHelper

#depends_on

Constructor Details

#initialize(url:, index_name:, llm:, api_key: nil) ⇒ Weaviate

Initialize the Weaviate adapter

Parameters:

  • url (String)

    The URL of the Weaviate instance

  • api_key (String) (defaults to: nil)

    The API key to use

  • index_name (String)

    The capitalized name of the index to use

  • llm (Object)

    The LLM client to use



20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/langchain/vectorsearch/weaviate.rb', line 20

def initialize(url:, index_name:, llm:, api_key: nil)
  depends_on "weaviate-ruby", req: "weaviate"

  @client = ::Weaviate::Client.new(
    url: url,
    api_key: api_key,
    logger: Langchain.logger
  )

  # Weaviate requires the class name to be Capitalized: https://weaviate.io/developers/weaviate/configuration/schema-configuration#create-a-class
  # TODO: Capitalize index_name
  @index_name = index_name

  super(llm: llm)
end

Instance Method Details

#add_texts(texts:, ids: []) ⇒ Hash

Add a list of texts to the index

Parameters:

  • texts (Array<String>)

    The list of texts to add

Returns:

  • (Hash)

    The response from the server



39
40
41
42
43
# File 'lib/langchain/vectorsearch/weaviate.rb', line 39

def add_texts(texts:, ids: [])
  client.objects.batch_create(
    objects: weaviate_objects(texts, ids)
  )
end

#ask(question:, k: 4) {|String| ... } ⇒ Hash

Ask a question and return the answer

Parameters:

  • question (String)

    The question to ask

  • k (Integer) (defaults to: 4)

    The number of results to have in context

Yields:

  • (String)

    Stream responses back one String at a time

Returns:

  • (Hash)

    The answer



147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
# File 'lib/langchain/vectorsearch/weaviate.rb', line 147

def ask(question:, k: 4, &block)
  search_results = similarity_search(query: question, k: k)

  context = search_results.map do |result|
    result.dig("content").to_s
  end
  context = context.join("\n---\n")

  prompt = generate_rag_prompt(question: question, context: context)

  messages = [{role: "user", content: prompt}]
  response = llm.chat(messages: messages, &block)

  response.context = context
  response
end

#create_default_schemaHash

Create default schema

Returns:

  • (Hash)

    The response from the server



93
94
95
96
97
98
99
100
101
102
103
# File 'lib/langchain/vectorsearch/weaviate.rb', line 93

def create_default_schema
  client.schema.create(
    class_name: index_name,
    vectorizer: "none",
    properties: [
      # __id to be used a pointer to the original document
      {dataType: ["string"], name: "__id"}, # '_id' is a reserved property name (single underscore)
      {dataType: ["text"], name: "content"}
    ]
  )
end

#destroy_default_schemaBoolean

Delete the index

Returns:

  • (Boolean)

    Whether the index was deleted



113
114
115
# File 'lib/langchain/vectorsearch/weaviate.rb', line 113

def destroy_default_schema
  client.schema.delete(class_name: index_name)
end

#get_default_schemaHash

Get default schema

Returns:

  • (Hash)

    The response from the server



107
108
109
# File 'lib/langchain/vectorsearch/weaviate.rb', line 107

def get_default_schema
  client.schema.get(class_name: index_name)
end

#remove_texts(ids:) ⇒ Hash

Deletes a list of texts in the index

Parameters:

  • ids (Array)

    The ids of texts to delete

Returns:

  • (Hash)

    The response from the server

Raises:

  • (ArgumentError)


78
79
80
81
82
83
84
85
86
87
88
89
# File 'lib/langchain/vectorsearch/weaviate.rb', line 78

def remove_texts(ids:)
  raise ArgumentError, "ids must be an array" unless ids.is_a?(Array)

  client.objects.batch_delete(
    class_name: index_name,
    where: {
      path: ["__id"],
      operator: "ContainsAny",
      valueTextArray: ids
    }
  )
end

#similarity_search(query:, k: 4) ⇒ Hash

Return documents similar to the query

Parameters:

  • query (String)

    The query to search for

  • k (Integer|String) (defaults to: 4)

    The number of results to return

Returns:

  • (Hash)

    The search results



121
122
123
124
125
# File 'lib/langchain/vectorsearch/weaviate.rb', line 121

def similarity_search(query:, k: 4)
  embedding = llm.embed(text: query).embedding

  similarity_search_by_vector(embedding: embedding, k: k)
end

#similarity_search_by_vector(embedding:, k: 4) ⇒ Hash

Return documents similar to the vector

Parameters:

  • embedding (Array<Float>)

    The vector to search for

  • k (Integer|String) (defaults to: 4)

    The number of results to return

Returns:

  • (Hash)

    The search results



131
132
133
134
135
136
137
138
139
140
# File 'lib/langchain/vectorsearch/weaviate.rb', line 131

def similarity_search_by_vector(embedding:, k: 4)
  near_vector = "{ vector: #{embedding} }"

  client.query.get(
    class_name: index_name,
    near_vector: near_vector,
    limit: k.to_s,
    fields: "__id content _additional { id }"
  )
end

#update_texts(texts:, ids:) ⇒ Hash

Update a list of texts in the index

Parameters:

  • texts (Array<String>)

    The list of texts to update

Returns:

  • (Hash)

    The response from the server



48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
# File 'lib/langchain/vectorsearch/weaviate.rb', line 48

def update_texts(texts:, ids:)
  uuids = []

  # Retrieve the UUIDs of the objects to update
  Array(texts).map.with_index do |text, i|
    record = client.query.get(
      class_name: index_name,
      fields: "_additional { id }",
      where: "{ path: [\"__id\"], operator: Equal, valueString: \"#{ids[i]}\" }"
    )
    uuids.push record[0].dig("_additional", "id")
  end

  # Update the objects
  texts.map.with_index do |text, i|
    client.objects.update(
      class_name: index_name,
      id: uuids[i],
      properties: {
        __id: ids[i].to_s,
        content: text
      },
      vector: llm.embed(text: text).embedding
    )
  end
end