Class: Langchain::Vectorsearch::Pgvector

Inherits:
Base
  • Object
show all
Defined in:
lib/langchain/vectorsearch/pgvector.rb

Constant Summary collapse

OPERATORS =

The operators supported by the PostgreSQL vector search adapter

{
  "cosine_distance" => "cosine",
  "euclidean_distance" => "euclidean",
  "inner_product_distance" => "inner_product"
}
DEFAULT_OPERATOR =
"cosine_distance"

Constants inherited from Base

Base::DEFAULT_METRIC

Instance Attribute Summary collapse

Attributes inherited from Base

#client, #index_name, #llm

Instance Method Summary collapse

Methods inherited from Base

#add_data, #generate_hyde_prompt, #generate_rag_prompt, #get_default_schema, #similarity_search_with_hyde

Methods included from DependencyHelper

#depends_on

Constructor Details

#initialize(url:, index_name:, llm:, namespace: nil) ⇒ Pgvector

Returns a new instance of Pgvector.

Parameters:

  • url (String)

    The URL of the PostgreSQL database

  • index_name (String)

    The name of the table to use for the index

  • llm (Object)

    The LLM client to use

  • namespace (String) (defaults to: nil)

    The namespace to use for the index when inserting/querying



30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/langchain/vectorsearch/pgvector.rb', line 30

def initialize(url:, index_name:, llm:, namespace: nil)
  depends_on "sequel"
  depends_on "pgvector"

  @db = Sequel.connect(url)

  @table_name = index_name

  @namespace_column = "namespace"
  @namespace = namespace
  @operator = OPERATORS[DEFAULT_OPERATOR]

  super(llm: llm)
end

Instance Attribute Details

#dbObject (readonly)

Returns the value of attribute db.



24
25
26
# File 'lib/langchain/vectorsearch/pgvector.rb', line 24

def db
  @db
end

#documents_tableObject (readonly)

Returns the value of attribute documents_table.



24
25
26
# File 'lib/langchain/vectorsearch/pgvector.rb', line 24

def documents_table
  @documents_table
end

#namespaceObject (readonly)

Returns the value of attribute namespace.



24
25
26
# File 'lib/langchain/vectorsearch/pgvector.rb', line 24

def namespace
  @namespace
end

#namespace_columnObject (readonly)

Returns the value of attribute namespace_column.



24
25
26
# File 'lib/langchain/vectorsearch/pgvector.rb', line 24

def namespace_column
  @namespace_column
end

#operatorObject (readonly)

Returns the value of attribute operator.



24
25
26
# File 'lib/langchain/vectorsearch/pgvector.rb', line 24

def operator
  @operator
end

#table_nameObject (readonly)

Returns the value of attribute table_name.



24
25
26
# File 'lib/langchain/vectorsearch/pgvector.rb', line 24

def table_name
  @table_name
end

Instance Method Details

#add_texts(texts:, ids: nil) ⇒ Array<Integer>

Add a list of texts to the index

Parameters:

  • texts (Array<String>)

    The texts to add to the index

  • ids (Array<String>) (defaults to: nil)

    The ids to add to the index, in the same order as the texts

Returns:

  • (Array<Integer>)

    The the ids of the added texts.



73
74
75
76
77
78
79
80
81
82
83
# File 'lib/langchain/vectorsearch/pgvector.rb', line 73

def add_texts(texts:, ids: nil)
  if ids.nil? || ids.empty?
    data = texts.map do |text|
      {content: text, vectors: llm.embed(text: text).embedding.to_s, namespace: namespace}
    end

    @db[table_name.to_sym].multi_insert(data, return: :primary_key)
  else
    upsert_texts(texts: texts, ids: ids)
  end
end

#ask(question:, k: 4) {|String| ... } ⇒ String

Ask a question and return the answer

Parameters:

  • question (String)

    The question to ask

  • k (Integer) (defaults to: 4)

    The number of results to have in context

Yields:

  • (String)

    Stream responses back one String at a time

Returns:

  • (String)

    The answer to the question



149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
# File 'lib/langchain/vectorsearch/pgvector.rb', line 149

def ask(question:, k: 4, &block)
  search_results = similarity_search(query: question, k: k)

  context = search_results.map do |result|
    result.content.to_s
  end
  context = context.join("\n---\n")

  prompt = generate_rag_prompt(question: question, context: context)

  messages = [{role: "user", content: prompt}]
  response = llm.chat(messages: messages, &block)

  response.context = context
  response
end

#create_default_schemaObject

Create default schema



101
102
103
104
105
106
107
108
109
110
111
# File 'lib/langchain/vectorsearch/pgvector.rb', line 101

def create_default_schema
  db.run "CREATE EXTENSION IF NOT EXISTS vector"
  namespace_column = @namespace_column
  vector_dimensions = llm.default_dimensions
  db.create_table? table_name.to_sym do
    primary_key :id
    text :content
    column :vectors, "vector(#{vector_dimensions})"
    text namespace_column.to_sym, default: nil
  end
end

#destroy_default_schemaObject

Destroy default schema



114
115
116
# File 'lib/langchain/vectorsearch/pgvector.rb', line 114

def destroy_default_schema
  db.drop_table? table_name.to_sym
end

#documents_modelObject



45
46
47
48
49
# File 'lib/langchain/vectorsearch/pgvector.rb', line 45

def documents_model
  Class.new(Sequel::Model(table_name.to_sym)) do
    plugin :pgvector, :vectors
  end
end

#remove_texts(ids:) ⇒ Integer

Remove a list of texts from the index

Parameters:

  • ids (Array<Integer>)

    The ids of the texts to remove from the index

Returns:

  • (Integer)

    The number of texts removed from the index



96
97
98
# File 'lib/langchain/vectorsearch/pgvector.rb', line 96

def remove_texts(ids:)
  @db[table_name.to_sym].where(id: ids).delete
end

#similarity_search(query:, k: 4) ⇒ Array<Hash>

Search for similar texts in the index

Parameters:

  • query (String)

    The text to search for

  • k (Integer) (defaults to: 4)

    The number of top results to return

Returns:

  • (Array<Hash>)

    The results of the search



122
123
124
125
126
127
128
129
# File 'lib/langchain/vectorsearch/pgvector.rb', line 122

def similarity_search(query:, k: 4)
  embedding = llm.embed(text: query).embedding

  similarity_search_by_vector(
    embedding: embedding,
    k: k
  )
end

#similarity_search_by_vector(embedding:, k: 4) ⇒ Array<Hash>

Search for similar texts in the index by the passed in vector. You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.

Parameters:

  • embedding (Array<Float>)

    The vector to search for

  • k (Integer) (defaults to: 4)

    The number of top results to return

Returns:

  • (Array<Hash>)

    The results of the search



136
137
138
139
140
141
142
# File 'lib/langchain/vectorsearch/pgvector.rb', line 136

def similarity_search_by_vector(embedding:, k: 4)
  db.transaction do # BEGIN
    documents_model
      .nearest_neighbors(:vectors, embedding, distance: operator).limit(k)
      .where(namespace_column.to_sym => namespace)
  end
end

#update_texts(texts:, ids:) ⇒ Array<Integer>

Update a list of ids and corresponding texts to the index

Parameters:

  • texts (Array<String>)

    The texts to add to the index

  • ids (Array<String>)

    The ids to add to the index, in the same order as the texts

Returns:

  • (Array<Integer>)

    The ids of the updated texts.



89
90
91
# File 'lib/langchain/vectorsearch/pgvector.rb', line 89

def update_texts(texts:, ids:)
  upsert_texts(texts: texts, ids: ids)
end

#upsert_texts(texts:, ids:) ⇒ PG::Result

Upsert a list of texts to the index the added or updated texts.

Parameters:

  • texts (Array<String>)

    The texts to add to the index

  • ids (Array<Integer>)

    The ids of the objects to add to the index, in the same order as the texts

Returns:

  • (PG::Result)

    The response from the database including the ids of



56
57
58
59
60
61
62
63
64
65
66
67
# File 'lib/langchain/vectorsearch/pgvector.rb', line 56

def upsert_texts(texts:, ids:)
  data = texts.zip(ids).flat_map do |(text, id)|
    {id: id, content: text, vectors: llm.embed(text: text).embedding.to_s, namespace: namespace}
  end
  # @db[table_name.to_sym].multi_insert(data, return: :primary_key)
  @db[table_name.to_sym]
    .insert_conflict(
      target: :id,
      update: {content: Sequel[:excluded][:content], vectors: Sequel[:excluded][:vectors]}
    )
    .multi_insert(data, return: :primary_key)
end