Class: Langchain::Vectorsearch::Milvus

Inherits:
Base
  • Object
show all
Defined in:
lib/langchain/vectorsearch/milvus.rb

Constant Summary

Constants inherited from Base

Base::DEFAULT_METRIC

Instance Attribute Summary

Attributes inherited from Base

#client, #index_name, #llm

Instance Method Summary collapse

Methods inherited from Base

#add_data, #generate_hyde_prompt, #generate_rag_prompt, #similarity_search_with_hyde, #update_texts

Methods included from DependencyHelper

#depends_on

Constructor Details

#initialize(url:, index_name:, llm:, api_key: nil) ⇒ Milvus

Wrapper around Milvus REST APIs.

Gem requirements:

gem "milvus", "~> 0.10.3"

Usage:

milvus = Langchain::Vectorsearch::Milvus.new(url:, index_name:, llm:, api_key:)
[View source]

14
15
16
17
18
19
20
21
22
23
24
# File 'lib/langchain/vectorsearch/milvus.rb', line 14

def initialize(url:, index_name:, llm:, api_key: nil)
  depends_on "milvus"

  @client = ::Milvus::Client.new(
    url: url,
    logger: Langchain.logger
  )
  @index_name = index_name

  super(llm: llm)
end

Instance Method Details

#add_texts(texts:) ⇒ Object

[View source]

26
27
28
29
30
31
32
33
# File 'lib/langchain/vectorsearch/milvus.rb', line 26

def add_texts(texts:)
  client.entities.insert(
    collection_name: index_name,
    data: texts.map do |text|
      {content: text, vector: llm.embed(text: text).embedding}
    end
  )
end

#ask(question:, k: 4) {|String| ... } ⇒ String

Ask a question and return the answer

Parameters:

  • question (String)

    The question to ask

  • k (Integer) (defaults to: 4)

    The number of results to have in context

Yields:

  • (String)

    Stream responses back one String at a time

Returns:

  • (String)

    The answer to the question

[View source]

144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
# File 'lib/langchain/vectorsearch/milvus.rb', line 144

def ask(question:, k: 4, &block)
  search_results = similarity_search(query: question, k: k)

  content_data = search_results.dig("data").map { |result| result.dig("content") }

  context = content_data.join("\n---\n")

  prompt = generate_rag_prompt(question: question, context: context)

  messages = [{role: "user", content: prompt}]
  response = llm.chat(messages: messages, &block)

  response.context = context
  response
end

#create_default_indexBoolean

Create the default index

Returns:

  • (Boolean)

    The response from the server

[View source]

84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# File 'lib/langchain/vectorsearch/milvus.rb', line 84

def create_default_index
  client.indexes.create(
    collection_name: index_name,
    index_params: [
      {
        metricType: "L2",
        fieldName: "vector",
        indexName: "vector_idx",
        indexConfig: {
          index_type: "AUTOINDEX"
        }
      }
    ]
  )
end

#create_default_schemaHash

Create default schema

Returns:

  • (Hash)

    The response from the server

[View source]

54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/langchain/vectorsearch/milvus.rb', line 54

def create_default_schema
  client.collections.create(
    auto_id: true,
    collection_name: index_name,
    fields: [
      {
        fieldName: "id",
        isPrimary: true,
        dataType: "Int64"
      }, {
        fieldName: "content",
        isPrimary: false,
        dataType: "VarChar",
        elementTypeParams: {
          max_length: "32768" # Largest allowed value
        }
      }, {
        fieldName: "vector",
        isPrimary: false,
        dataType: "FloatVector",
        elementTypeParams: {
          dim: llm.default_dimensions.to_s
        }
      }
    ]
  )
end

#destroy_default_schemaHash

Delete default schema

Returns:

  • (Hash)

    The response from the server

[View source]

108
109
110
# File 'lib/langchain/vectorsearch/milvus.rb', line 108

def destroy_default_schema
  client.collections.drop(collection_name: index_name)
end

#get_default_schemaHash

Get the default schema

Returns:

  • (Hash)

    The response from the server

[View source]

102
103
104
# File 'lib/langchain/vectorsearch/milvus.rb', line 102

def get_default_schema
  client.collections.describe(collection_name: index_name)
end

#load_default_schemaBoolean

Load default schema into memory

Returns:

  • (Boolean)

    The response from the server

[View source]

114
115
116
# File 'lib/langchain/vectorsearch/milvus.rb', line 114

def load_default_schema
  client.collections.load(collection_name: index_name)
end

#remove_texts(ids:) ⇒ Boolean

Deletes a list of texts in the index

Parameters:

  • ids (Array<Integer>)

    The ids of texts to delete

Returns:

  • (Boolean)

    The response from the server

Raises:

  • (ArgumentError)
[View source]

41
42
43
44
45
46
47
48
# File 'lib/langchain/vectorsearch/milvus.rb', line 41

def remove_texts(ids:)
  raise ArgumentError, "ids must be an array" unless ids.is_a?(Array)

  client.entities.delete(
    collection_name: index_name,
    filter: "id in #{ids}"
  )
end

#similarity_search(query:, k: 4) ⇒ Object

[View source]

118
119
120
121
122
123
124
125
# File 'lib/langchain/vectorsearch/milvus.rb', line 118

def similarity_search(query:, k: 4)
  embedding = llm.embed(text: query).embedding

  similarity_search_by_vector(
    embedding: embedding,
    k: k
  )
end

#similarity_search_by_vector(embedding:, k: 4) ⇒ Object

[View source]

127
128
129
130
131
132
133
134
135
136
137
# File 'lib/langchain/vectorsearch/milvus.rb', line 127

def similarity_search_by_vector(embedding:, k: 4)
  load_default_schema

  client.entities.search(
    collection_name: index_name,
    anns_field: "vector",
    data: [embedding],
    limit: k,
    output_fields: ["content", "id", "vector"]
  )
end