Class: Langchain::Vectorsearch::Epsilla

Inherits:

Base

Object
Base
Langchain::Vectorsearch::Epsilla

show all

Defined in:: lib/langchain/vectorsearch/epsilla.rb

Constant Summary

Constants inherited from Base

Base::DEFAULT_METRIC

Instance Attribute Summary

Attributes inherited from Base

#client, #index_name, #llm

Instance Method Summary collapse

#add_texts(texts:, ids: nil) ⇒ Object

Add a list of texts to the database.
#ask(question:, k: 4) {|String| ... } ⇒ String

Ask a question and return the answer.
#create_default_schema ⇒ Object

Create a table using the index_name passed in the constructor.
#destroy_default_schema ⇒ Object

Drop the table using the index_name passed in the constructor.
#initialize(url:, db_name:, db_path:, index_name:, llm:) ⇒ Epsilla constructor

Wrapper around Epsilla client library.
#similarity_search(query:, k: 4) ⇒ String

Search for similar texts.
#similarity_search_by_vector(embedding:, k: 4) ⇒ String

Search for entries by embedding.

Methods inherited from Base

#add_data, #generate_hyde_prompt, #generate_rag_prompt, #get_default_schema, #remove_texts, #similarity_search_with_hyde, #update_texts

Methods included from DependencyHelper

#depends_on

Constructor Details

#initialize(url:, db_name:, db_path:, index_name:, llm:) ⇒ `Epsilla`

Wrapper around Epsilla client library

Gem requirements:

gem "epsilla-ruby", "~> 0.0.3"

Usage:

epsilla = Langchain::Vectorsearch::Epsilla.new(url:, db_name:, db_path:, index_name:, llm:)

Initialize Epsilla client

Parameters:

url (String) —

URL to connect to the Epsilla db instance, protocol://host:port
db_name (String) —

The name of the database to use
db_path (String) —

The path to the database to use
index_name (String) —

The name of the Epsilla table to use
llm (Object) —

The LLM client to use

# File 'lib/langchain/vectorsearch/epsilla.rb', line 23

def initialize(url:, db_name:, db_path:, index_name:, llm:)
  depends_on "epsilla-ruby", req: "epsilla"

  uri = URI.parse(url)
  protocol = uri.scheme
  host = uri.host
  port = uri.port

  @client = ::Epsilla::Client.new(protocol, host, port)

  Timeout.timeout(5) do
    status_code, response = @client.database.load_db(db_name, db_path)

    if status_code != 200
      if status_code == 409 || (status_code == 500 && response["message"].include?("already loaded"))
        # When db is already loaded, Epsilla may return HTTP 409 Conflict.
        # This behavior is changed in https://github.com/epsilla-cloud/vectordb/pull/95
        # Old behavior (HTTP 500) is preserved for backwards compatibility.
        # It does not prevent us from using the db.
        Langchain.logger.debug("#{self.class} - Database already loaded")
      else
        raise "Failed to load database: #{response}"
      end
    end
  end

  @client.database.use_db(db_name)

  @db_name = db_name
  @db_path = db_path
  @table_name = index_name

  @vector_dimensions = llm.default_dimensions

  super(llm: llm)
end

Instance Method Details

#add_texts(texts:, ids: nil) ⇒ `Object`

Add a list of texts to the database

Parameters:

texts (Array<String>) —

The list of texts to add
ids (Array<String>) (defaults to: nil) —

The unique ids to add to the index, in the same order as the texts; if nil, it will be random uuids

# File 'lib/langchain/vectorsearch/epsilla.rb', line 83

def add_texts(texts:, ids: nil)
  validated_ids = ids
  if ids.nil?
    validated_ids = texts.map { SecureRandom.uuid }
  elsif ids.length != texts.length
    raise "The number of ids must match the number of texts"
  end

  data = texts.map.with_index do |text, idx|
    {Doc: text, Embedding: llm.embed(text: text).embedding, ID: validated_ids[idx]}
  end

  status_code, response = @client.database.insert(@table_name, data)
  raise "Failed to insert texts: #{response}" if status_code != 200
  JSON.parse(response)
end

#ask(question:, k: 4) {|String| ... } ⇒ `String`

Ask a question and return the answer

Parameters:

question (String) —

The question to ask
k (Integer) (defaults to: 4) —

The number of results to have in context

Yields:

(String) —

Stream responses back one String at a time

Returns:

(String) —

The answer to the question

# File 'lib/langchain/vectorsearch/epsilla.rb', line 130

def ask(question:, k: 4, &block)
  search_results = similarity_search(query: question, k: k)

  context = search_results.map do |result|
    result.to_s
  end
  context = context.join("\n---\n")

  prompt = generate_rag_prompt(question: question, context: context)

  messages = [{role: "user", content: prompt}]
  response = llm.chat(messages: messages, &block)

  response.context = context
  response
end

#create_default_schema ⇒ `Object`

Create a table using the index_name passed in the constructor

# File 'lib/langchain/vectorsearch/epsilla.rb', line 61

def create_default_schema
  status_code, response = @client.database.create_table(@table_name, [
    {"name" => "ID", "dataType" => "STRING", "primaryKey" => true},
    {"name" => "Doc", "dataType" => "STRING"},
    {"name" => "Embedding", "dataType" => "VECTOR_FLOAT", "dimensions" => @vector_dimensions}
  ])
  raise "Failed to create table: #{response}" if status_code != 200

  response
end

#destroy_default_schema ⇒ `Object`

Drop the table using the index_name passed in the constructor

# File 'lib/langchain/vectorsearch/epsilla.rb', line 73

def destroy_default_schema
  status_code, response = @client.database.drop_table(@table_name)
  raise "Failed to drop table: #{response}" if status_code != 200

  response
end

#similarity_search(query:, k: 4) ⇒ `String`

Search for similar texts

Parameters:

query (String) —

The text to search for
k (Integer) (defaults to: 4) —

The number of results to return

Returns:

(String) —

The response from the server

# File 'lib/langchain/vectorsearch/epsilla.rb', line 104

def similarity_search(query:, k: 4)
  embedding = llm.embed(text: query).embedding

  similarity_search_by_vector(
    embedding: embedding,
    k: k
  )
end

#similarity_search_by_vector(embedding:, k: 4) ⇒ `String`

Search for entries by embedding

Parameters:

embedding (Array<Float>) —

The embedding to search for
k (Integer) (defaults to: 4) —

The number of results to return

Returns:

(String) —

The response from the server

# File 'lib/langchain/vectorsearch/epsilla.rb', line 117

def similarity_search_by_vector(embedding:, k: 4)
  status_code, response = @client.database.query(@table_name, "Embedding", embedding, ["Doc"], k, false)
  raise "Failed to do similarity search: #{response}" if status_code != 200

  data = JSON.parse(response)["result"]
  data.map { |result| result["Doc"] }
end

Class: Langchain::Vectorsearch::Epsilla

Constant Summary

Constants inherited from Base

Instance Attribute Summary

Attributes inherited from Base

Instance Method Summary collapse

Methods inherited from Base

Methods included from DependencyHelper

Constructor Details

#initialize(url:, db_name:, db_path:, index_name:, llm:) ⇒ Epsilla

Instance Method Details

#add_texts(texts:, ids: nil) ⇒ Object

#ask(question:, k: 4) {|String| ... } ⇒ String

#create_default_schema ⇒ Object

#destroy_default_schema ⇒ Object

#similarity_search(query:, k: 4) ⇒ String

#similarity_search_by_vector(embedding:, k: 4) ⇒ String

#initialize(url:, db_name:, db_path:, index_name:, llm:) ⇒ `Epsilla`

#add_texts(texts:, ids: nil) ⇒ `Object`

#ask(question:, k: 4) {|String| ... } ⇒ `String`

#create_default_schema ⇒ `Object`

#destroy_default_schema ⇒ `Object`

#similarity_search(query:, k: 4) ⇒ `String`

#similarity_search_by_vector(embedding:, k: 4) ⇒ `String`