Class: Langchain::Vectorsearch::Pgvector

Inherits:

Base

Object
Base
Langchain::Vectorsearch::Pgvector

show all

Defined in:: lib/langchain/vectorsearch/pgvector.rb

Constant Summary collapse

OPERATORS = The operators supported by the PostgreSQL vector search adapter

{
  "cosine_distance" => "cosine",
  "euclidean_distance" => "euclidean",
  "inner_product_distance" => "inner_product"
}

DEFAULT_OPERATOR =

"cosine_distance"

Constants inherited from Base

Base::DEFAULT_METRIC

Instance Attribute Summary collapse

#db ⇒ Object readonly

Returns the value of attribute db.
#documents_table ⇒ Object readonly

Returns the value of attribute documents_table.
#namespace ⇒ Object readonly

Returns the value of attribute namespace.
#namespace_column ⇒ Object readonly

Returns the value of attribute namespace_column.
#operator ⇒ Object readonly

Returns the value of attribute operator.
#table_name ⇒ Object readonly

Returns the value of attribute table_name.

Attributes inherited from Base

#client, #index_name, #llm

Instance Method Summary collapse

#add_texts(texts:, ids: nil, metadata: nil) ⇒ Array<Integer>

Add a list of texts to the index.
#ask(question:, k: 4) {|String| ... } ⇒ String

Ask a question and return the answer.
#create_default_schema ⇒ Object

Create default schema.
#destroy_default_schema ⇒ Object

Destroy default schema.
#documents_model ⇒ Object
#initialize(url:, index_name:, llm:, namespace: nil) ⇒ Pgvector constructor

A new instance of Pgvector.
#remove_texts(ids:) ⇒ Integer

Remove a list of texts from the index.
#similarity_search(query:, k: 4) ⇒ Array<Hash>

Search for similar texts in the index.
#similarity_search_by_vector(embedding:, k: 4) ⇒ Array<Hash>

Search for similar texts in the index by the passed in vector.
#update_texts(texts:, ids:, metadata: nil) ⇒ Array<Integer>

Update a list of ids and corresponding texts to the index.
#upsert_texts(texts:, ids:, metadata: nil) ⇒ PG::Result

Upsert a list of texts to the index the added or updated texts.

Methods inherited from Base

#add_data, #generate_hyde_prompt, #generate_rag_prompt, #get_default_schema, #similarity_search_with_hyde

Methods included from DependencyHelper

#depends_on

Constructor Details

#initialize(url:, index_name:, llm:, namespace: nil) ⇒ `Pgvector`

Returns a new instance of Pgvector.

Parameters:

url (String) —

The URL of the PostgreSQL database
index_name (String) —

The name of the table to use for the index
llm (Object) —

The LLM client to use
namespace (String) (defaults to: nil) —

The namespace to use for the index when inserting/querying

# File 'lib/langchain/vectorsearch/pgvector.rb', line 30

def initialize(url:, index_name:, llm:, namespace: nil)
  depends_on "sequel"
  depends_on "pgvector"

  @db = Sequel.connect(url)

  @table_name = index_name

  @namespace_column = "namespace"
  @namespace = namespace
  @operator = OPERATORS[DEFAULT_OPERATOR]

  super(llm: llm)
end

Instance Attribute Details

#db ⇒ `Object` (readonly)

Returns the value of attribute db.



24
25
26

# File 'lib/langchain/vectorsearch/pgvector.rb', line 24

def db
  @db
end

#documents_table ⇒ `Object` (readonly)

Returns the value of attribute documents_table.



24
25
26

# File 'lib/langchain/vectorsearch/pgvector.rb', line 24

def documents_table
  @documents_table
end

#namespace ⇒ `Object` (readonly)

Returns the value of attribute namespace.



24
25
26

# File 'lib/langchain/vectorsearch/pgvector.rb', line 24

def namespace
  @namespace
end

#namespace_column ⇒ `Object` (readonly)

Returns the value of attribute namespace_column.



24
25
26

# File 'lib/langchain/vectorsearch/pgvector.rb', line 24

def namespace_column
  @namespace_column
end

#operator ⇒ `Object` (readonly)

Returns the value of attribute operator.



24
25
26

# File 'lib/langchain/vectorsearch/pgvector.rb', line 24

def operator
  @operator
end

#table_name ⇒ `Object` (readonly)

Returns the value of attribute table_name.



24
25
26

# File 'lib/langchain/vectorsearch/pgvector.rb', line 24

def table_name
  @table_name
end

Instance Method Details

#add_texts(texts:, ids: nil, metadata: nil) ⇒ `Array<Integer>`

Add a list of texts to the index

Parameters:

texts (Array<String>) —

The texts to add to the index
ids (Array<String>) (defaults to: nil) —

The ids to add to the index, in the same order as the texts
metadata (Array<Hash>) (defaults to: nil) —

The metadata to associate with each text, in the same order as the texts

Returns:

(Array<Integer>) —

The the ids of the added texts.

# File 'lib/langchain/vectorsearch/pgvector.rb', line 87

def add_texts(texts:, ids: nil, metadata: nil)
  metadata = Array.new(texts.size, {}) if metadata.nil?

  if ids.nil? || ids.empty?
    data = texts.zip(metadata).map do |text, meta|
      {
        content: text,
        vectors: llm.embed(text: text).embedding.to_s,
        namespace: namespace,
        metadata: meta.to_json
      }
    end

    @db[table_name.to_sym].multi_insert(data, return: :primary_key)
  else
    upsert_texts(texts: texts, ids: ids, metadata: metadata)
  end
end

#ask(question:, k: 4) {|String| ... } ⇒ `String`

Ask a question and return the answer

Parameters:

question (String) —

The question to ask
k (Integer) (defaults to: 4) —

The number of results to have in context

Yields:

(String) —

Stream responses back one String at a time

Returns:

(String) —

The answer to the question

# File 'lib/langchain/vectorsearch/pgvector.rb', line 173

def ask(question:, k: 4, &block)
  search_results = similarity_search(query: question, k: k)

  context = search_results.map do |result|
    result.content.to_s
  end
  context = context.join("\n---\n")

  prompt = generate_rag_prompt(question: question, context: context)

  messages = [{role: "user", content: prompt}]
  response = llm.chat(messages: messages, &block)

  response.context = context
  response
end

#create_default_schema ⇒ `Object`

Create default schema

# File 'lib/langchain/vectorsearch/pgvector.rb', line 123

def create_default_schema
  db.run "CREATE EXTENSION IF NOT EXISTS vector"
  namespace_column = @namespace_column
  vector_dimensions = llm.default_dimensions
  db.create_table? table_name.to_sym do
    primary_key :id
    text :content
    column :vectors, "vector(#{vector_dimensions})"
    text namespace_column.to_sym, default: nil
    jsonb :metadata, default: "{}"
  end
end

#destroy_default_schema ⇒ `Object`

Destroy default schema



137
138
139

# File 'lib/langchain/vectorsearch/pgvector.rb', line 137

def destroy_default_schema
  db.drop_table? table_name.to_sym
end

#documents_model ⇒ `Object`

# File 'lib/langchain/vectorsearch/pgvector.rb', line 45

def documents_model
  Class.new(Sequel::Model(table_name.to_sym)) do
    plugin :pgvector, :vectors
  end
end

#remove_texts(ids:) ⇒ `Integer`

Remove a list of texts from the index

Parameters:

ids (Array<Integer>) —

The ids of the texts to remove from the index

Returns:

(Integer) —

The number of texts removed from the index



118
119
120

# File 'lib/langchain/vectorsearch/pgvector.rb', line 118

def remove_texts(ids:)
  @db[table_name.to_sym].where(id: ids).delete
end

#similarity_search(query:, k: 4) ⇒ `Array<Hash>`

Search for similar texts in the index

Parameters:

query (String) —

The text to search for
k (Integer) (defaults to: 4) —

The number of top results to return

Returns:

(Array<Hash>) —

The results of the search

# File 'lib/langchain/vectorsearch/pgvector.rb', line 145

def similarity_search(query:, k: 4)
  embedding = llm.embed(text: query).embedding

  similarity_search_by_vector(
    embedding: embedding,
    k: k
  )
end

#similarity_search_by_vector(embedding:, k: 4) ⇒ `Array<Hash>`

Search for similar texts in the index by the passed in vector. You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.

Parameters:

embedding (Array<Float>) —

The vector to search for
k (Integer) (defaults to: 4) —

The number of top results to return

Returns:

(Array<Hash>) —

The results of the search

# File 'lib/langchain/vectorsearch/pgvector.rb', line 159

def similarity_search_by_vector(embedding:, k: 4)
  db.transaction do # BEGIN
    documents_model
      .select(:content, :metadata)
      .nearest_neighbors(:vectors, embedding, distance: operator).limit(k)
      .where(namespace_column.to_sym => namespace)
  end
end

#update_texts(texts:, ids:, metadata: nil) ⇒ `Array<Integer>`

Update a list of ids and corresponding texts to the index

Parameters:

texts (Array<String>) —

The texts to add to the index
ids (Array<String>) —

The ids to add to the index, in the same order as the texts
metadata (Array<Hash>) (defaults to: nil) —

The metadata to associate with each text, in the same order as the texts

Returns:

(Array<Integer>) —

The ids of the updated texts.



111
112
113

# File 'lib/langchain/vectorsearch/pgvector.rb', line 111

def update_texts(texts:, ids:, metadata: nil)
  upsert_texts(texts: texts, ids: ids, metadata: metadata)
end

#upsert_texts(texts:, ids:, metadata: nil) ⇒ `PG::Result`

Upsert a list of texts to the index the added or updated texts.

Parameters:

texts (Array<String>) —

The texts to add to the index
ids (Array<Integer>) —

The ids of the objects to add to the index, in the same order as the texts
metadata (Array<Hash>) (defaults to: nil) —

The metadata to associate with each text, in the same order as the texts

Returns:

(PG::Result) —

The response from the database including the ids of

# File 'lib/langchain/vectorsearch/pgvector.rb', line 57

def upsert_texts(texts:, ids:, metadata: nil)
  metadata = Array.new(texts.size, {}) if metadata.nil?

  data = texts.zip(ids, metadata).flat_map do |text, id, meta|
    {
      id: id,
      content: text,
      vectors: llm.embed(text: text).embedding.to_s,
      namespace: namespace,
      metadata: meta.to_json
    }
  end
  # @db[table_name.to_sym].multi_insert(data, return: :primary_key)
  @db[table_name.to_sym]
    .insert_conflict(
      target: :id,
      update: {
        content: Sequel[:excluded][:content],
        vectors: Sequel[:excluded][:vectors],
        metadata: Sequel[:excluded][:metadata]
      }
    )
    .multi_insert(data, return: :primary_key)
end

Class: Langchain::Vectorsearch::Pgvector

Constant Summary collapse

Constants inherited from Base

Instance Attribute Summary collapse

Attributes inherited from Base

Instance Method Summary collapse

Methods inherited from Base

Methods included from DependencyHelper

Constructor Details

#initialize(url:, index_name:, llm:, namespace: nil) ⇒ Pgvector

Instance Attribute Details

#db ⇒ Object (readonly)

#documents_table ⇒ Object (readonly)

#namespace ⇒ Object (readonly)

#namespace_column ⇒ Object (readonly)

#operator ⇒ Object (readonly)

#table_name ⇒ Object (readonly)

Instance Method Details

#add_texts(texts:, ids: nil, metadata: nil) ⇒ Array<Integer>

#ask(question:, k: 4) {|String| ... } ⇒ String

#create_default_schema ⇒ Object

#destroy_default_schema ⇒ Object

#documents_model ⇒ Object

#remove_texts(ids:) ⇒ Integer

#similarity_search(query:, k: 4) ⇒ Array<Hash>

#similarity_search_by_vector(embedding:, k: 4) ⇒ Array<Hash>

#update_texts(texts:, ids:, metadata: nil) ⇒ Array<Integer>

#upsert_texts(texts:, ids:, metadata: nil) ⇒ PG::Result

#initialize(url:, index_name:, llm:, namespace: nil) ⇒ `Pgvector`

#db ⇒ `Object` (readonly)

#documents_table ⇒ `Object` (readonly)

#namespace ⇒ `Object` (readonly)

#namespace_column ⇒ `Object` (readonly)

#operator ⇒ `Object` (readonly)

#table_name ⇒ `Object` (readonly)

#add_texts(texts:, ids: nil, metadata: nil) ⇒ `Array<Integer>`

#ask(question:, k: 4) {|String| ... } ⇒ `String`

#create_default_schema ⇒ `Object`

#destroy_default_schema ⇒ `Object`

#documents_model ⇒ `Object`

#remove_texts(ids:) ⇒ `Integer`

#similarity_search(query:, k: 4) ⇒ `Array<Hash>`

#similarity_search_by_vector(embedding:, k: 4) ⇒ `Array<Hash>`

#update_texts(texts:, ids:, metadata: nil) ⇒ `Array<Integer>`

#upsert_texts(texts:, ids:, metadata: nil) ⇒ `PG::Result`