Class: Langchain::Vectorsearch::Elasticsearch
- Defined in:
- lib/langchain/vectorsearch/elasticsearch.rb
Constant Summary
Constants inherited from Base
Instance Attribute Summary collapse
-
#es_client ⇒ Object
Wrapper around Elasticsearch vector search capabilities.
-
#index_name ⇒ Object
Wrapper around Elasticsearch vector search capabilities.
-
#options ⇒ Object
Wrapper around Elasticsearch vector search capabilities.
Attributes inherited from Base
Instance Method Summary collapse
-
#add_texts(texts: []) ⇒ Elasticsearch::Response
Add a list of texts to the index.
-
#ask(question:, k: 4) {|String| ... } ⇒ String
Ask a question and return the answer.
-
#create_default_schema ⇒ Elasticsearch::Response
Create the index with the default schema.
- #default_query(query_vector) ⇒ Object
- #default_schema ⇒ Object
- #default_vector_settings ⇒ Object
-
#delete_default_schema ⇒ Elasticsearch::Response
Deletes the default schema.
-
#initialize(url:, index_name:, llm:, api_key: nil, es_options: {}) ⇒ Elasticsearch
constructor
A new instance of Elasticsearch.
-
#remove_texts(ids: []) ⇒ Elasticsearch::Response
Remove a list of texts from the index.
-
#similarity_search(text: "", k: 10, query: {}) ⇒ Elasticsearch::Response
Search for similar texts.
-
#similarity_search_by_vector(embedding: [], k: 10, query: {}) ⇒ Elasticsearch::Response
Search for similar texts by embedding.
-
#update_texts(texts: [], ids: []) ⇒ Elasticsearch::Response
Add a list of texts to the index.
- #vector_settings ⇒ Object
Methods inherited from Base
#add_data, #destroy_default_schema, #generate_hyde_prompt, #generate_rag_prompt, #get_default_schema, #similarity_search_with_hyde
Methods included from DependencyHelper
Constructor Details
#initialize(url:, index_name:, llm:, api_key: nil, es_options: {}) ⇒ Elasticsearch
Returns a new instance of Elasticsearch.
34 35 36 37 38 39 40 41 42 43 44 45 46 47 |
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 34 def initialize(url:, index_name:, llm:, api_key: nil, es_options: {}) require "elasticsearch" @options = { url: url, request_timeout: 20, logger: Langchain.logger }.merge() @es_client = ::Elasticsearch::Client.new(**) @index_name = index_name super(llm: llm) end |
Instance Attribute Details
#es_client ⇒ Object
Wrapper around Elasticsearch vector search capabilities.
Setting up Elasticsearch:
-
Get Elasticsearch up and running with Docker: www.elastic.co/guide/en/elasticsearch/reference/current/docker.html
-
Copy the HTTP CA certificate SHA-256 fingerprint and set the ELASTICSEARCH_CA_FINGERPRINT environment variable
-
Set the ELASTICSEARCH_URL environment variable
Gem requirements:
gem "elasticsearch", "~> 8.0.0"
Usage:
llm = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
es = Langchain::Vectorsearch::Elasticsearch.new(
url: ENV["ELASTICSEARCH_URL"],
index_name: "docs",
llm: llm,
es_options: {
transport_options: {ssl: {verify: false}},
ca_fingerprint: ENV["ELASTICSEARCH_CA_FINGERPRINT"]
}
)
es.create_default_schema
es.add_texts(texts: ["..."])
es.similarity_search(text: "...")
32 33 34 |
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 32 def es_client @es_client end |
#index_name ⇒ Object
Wrapper around Elasticsearch vector search capabilities.
Setting up Elasticsearch:
-
Get Elasticsearch up and running with Docker: www.elastic.co/guide/en/elasticsearch/reference/current/docker.html
-
Copy the HTTP CA certificate SHA-256 fingerprint and set the ELASTICSEARCH_CA_FINGERPRINT environment variable
-
Set the ELASTICSEARCH_URL environment variable
Gem requirements:
gem "elasticsearch", "~> 8.0.0"
Usage:
llm = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
es = Langchain::Vectorsearch::Elasticsearch.new(
url: ENV["ELASTICSEARCH_URL"],
index_name: "docs",
llm: llm,
es_options: {
transport_options: {ssl: {verify: false}},
ca_fingerprint: ENV["ELASTICSEARCH_CA_FINGERPRINT"]
}
)
es.create_default_schema
es.add_texts(texts: ["..."])
es.similarity_search(text: "...")
32 33 34 |
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 32 def index_name @index_name end |
#options ⇒ Object
Wrapper around Elasticsearch vector search capabilities.
Setting up Elasticsearch:
-
Get Elasticsearch up and running with Docker: www.elastic.co/guide/en/elasticsearch/reference/current/docker.html
-
Copy the HTTP CA certificate SHA-256 fingerprint and set the ELASTICSEARCH_CA_FINGERPRINT environment variable
-
Set the ELASTICSEARCH_URL environment variable
Gem requirements:
gem "elasticsearch", "~> 8.0.0"
Usage:
llm = Langchain::LLM::OpenAI.new(api_key: ENV["OPENAI_API_KEY"])
es = Langchain::Vectorsearch::Elasticsearch.new(
url: ENV["ELASTICSEARCH_URL"],
index_name: "docs",
llm: llm,
es_options: {
transport_options: {ssl: {verify: false}},
ca_fingerprint: ENV["ELASTICSEARCH_CA_FINGERPRINT"]
}
)
es.create_default_schema
es.add_texts(texts: ["..."])
es.similarity_search(text: "...")
32 33 34 |
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 32 def @options end |
Instance Method Details
#add_texts(texts: []) ⇒ Elasticsearch::Response
Add a list of texts to the index
52 53 54 55 56 57 58 59 60 61 |
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 52 def add_texts(texts: []) body = texts.map do |text| [ {index: {_index: index_name}}, {input: text, input_vector: llm.(text: text).} ] end.flatten es_client.bulk(body: body) end |
#ask(question:, k: 4) {|String| ... } ⇒ String
Ask a question and return the answer
146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 |
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 146 def ask(question:, k: 4, &block) search_results = similarity_search(query: question, k: k) context = search_results.map do |result| result[:input] end.join("\n---\n") prompt = generate_rag_prompt(question: question, context: context) = [{role: "user", content: prompt}] response = llm.chat(messages: , &block) response.context = context response end |
#create_default_schema ⇒ Elasticsearch::Response
Create the index with the default schema
91 92 93 94 95 96 |
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 91 def create_default_schema es_client.indices.create( index: index_name, body: default_schema ) end |
#default_query(query_vector) ⇒ Object
127 128 129 130 131 132 133 134 135 136 137 138 139 |
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 127 def default_query(query_vector) { script_score: { query: {match_all: {}}, script: { source: "cosineSimilarity(params.query_vector, 'input_vector') + 1.0", params: { query_vector: query_vector } } } } end |
#default_schema ⇒ Object
114 115 116 117 118 119 120 121 122 123 124 125 |
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 114 def default_schema { mappings: { properties: { input: { type: "text" }, input_vector: vector_settings } } } end |
#default_vector_settings ⇒ Object
106 107 108 |
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 106 def default_vector_settings {type: "dense_vector", dims: llm.default_dimensions} end |
#delete_default_schema ⇒ Elasticsearch::Response
Deletes the default schema
100 101 102 103 104 |
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 100 def delete_default_schema es_client.indices.delete( index: index_name ) end |
#remove_texts(ids: []) ⇒ Elasticsearch::Response
Remove a list of texts from the index
81 82 83 84 85 86 87 |
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 81 def remove_texts(ids: []) body = ids.map do |id| {delete: {_index: index_name, _id: id}} end es_client.bulk(body: body) end |
#similarity_search(text: "", k: 10, query: {}) ⇒ Elasticsearch::Response
Search for similar texts
167 168 169 170 171 172 173 174 175 176 177 178 179 |
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 167 def similarity_search(text: "", k: 10, query: {}) if text.empty? && query.empty? raise "Either text or query should pass as an argument" end if query.empty? query_vector = llm.(text: text). query = default_query(query_vector) end es_client.search(body: {query: query, size: k}).body end |
#similarity_search_by_vector(embedding: [], k: 10, query: {}) ⇒ Elasticsearch::Response
Search for similar texts by embedding
186 187 188 189 190 191 192 193 194 |
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 186 def similarity_search_by_vector(embedding: [], k: 10, query: {}) if .empty? && query.empty? raise "Either embedding or query should pass as an argument" end query = default_query() if query.empty? es_client.search(body: {query: query, size: k}).body end |
#update_texts(texts: [], ids: []) ⇒ Elasticsearch::Response
Add a list of texts to the index
67 68 69 70 71 72 73 74 75 76 |
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 67 def update_texts(texts: [], ids: []) body = texts.map.with_index do |text, i| [ {index: {_index: index_name, _id: ids[i]}}, {input: text, input_vector: llm.(text: text).} ] end.flatten es_client.bulk(body: body) end |
#vector_settings ⇒ Object
110 111 112 |
# File 'lib/langchain/vectorsearch/elasticsearch.rb', line 110 def vector_settings [:vector_settings] || default_vector_settings end |