Class: Langchain::Vectorsearch::Pgvector

Inherits:
Base
  • Object
show all
Defined in:
lib/langchain/vectorsearch/pgvector.rb

Constant Summary collapse

OPERATORS =

The operators supported by the PostgreSQL vector search adapter

{
  "cosine_distance" => "cosine",
  "euclidean_distance" => "euclidean",
  "inner_product_distance" => "inner_product"
}
DEFAULT_OPERATOR =
"cosine_distance"

Constants inherited from Base

Base::DEFAULT_METRIC

Instance Attribute Summary collapse

Attributes inherited from Base

#client, #index_name, #llm

Instance Method Summary collapse

Methods inherited from Base

#add_data, #generate_hyde_prompt, #generate_rag_prompt, #get_default_schema, #similarity_search_with_hyde

Methods included from DependencyHelper

#depends_on

Constructor Details

#initialize(url:, index_name:, llm:, namespace: nil) ⇒ Pgvector

Returns a new instance of Pgvector.

Parameters:

  • url (String)

    The URL of the PostgreSQL database

  • index_name (String)

    The name of the table to use for the index

  • llm (Object)

    The LLM client to use

  • namespace (String) (defaults to: nil)

    The namespace to use for the index when inserting/querying



30
31
32
33
34
35
36
37
38
39
40
41
42
43
# File 'lib/langchain/vectorsearch/pgvector.rb', line 30

def initialize(url:, index_name:, llm:, namespace: nil)
  depends_on "sequel"
  depends_on "pgvector"

  @db = Sequel.connect(url)

  @table_name = index_name

  @namespace_column = "namespace"
  @namespace = namespace
  @operator = OPERATORS[DEFAULT_OPERATOR]

  super(llm: llm)
end

Instance Attribute Details

#dbObject (readonly)

Returns the value of attribute db.



24
25
26
# File 'lib/langchain/vectorsearch/pgvector.rb', line 24

def db
  @db
end

#documents_tableObject (readonly)

Returns the value of attribute documents_table.



24
25
26
# File 'lib/langchain/vectorsearch/pgvector.rb', line 24

def documents_table
  @documents_table
end

#namespaceObject (readonly)

Returns the value of attribute namespace.



24
25
26
# File 'lib/langchain/vectorsearch/pgvector.rb', line 24

def namespace
  @namespace
end

#namespace_columnObject (readonly)

Returns the value of attribute namespace_column.



24
25
26
# File 'lib/langchain/vectorsearch/pgvector.rb', line 24

def namespace_column
  @namespace_column
end

#operatorObject (readonly)

Returns the value of attribute operator.



24
25
26
# File 'lib/langchain/vectorsearch/pgvector.rb', line 24

def operator
  @operator
end

#table_nameObject (readonly)

Returns the value of attribute table_name.



24
25
26
# File 'lib/langchain/vectorsearch/pgvector.rb', line 24

def table_name
  @table_name
end

Instance Method Details

#add_texts(texts:, ids: nil, metadata: nil) ⇒ Array<Integer>

Add a list of texts to the index

Parameters:

  • texts (Array<String>)

    The texts to add to the index

  • ids (Array<String>) (defaults to: nil)

    The ids to add to the index, in the same order as the texts

  • metadata (Array<Hash>) (defaults to: nil)

    The metadata to associate with each text, in the same order as the texts

Returns:

  • (Array<Integer>)

    The the ids of the added texts.



87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# File 'lib/langchain/vectorsearch/pgvector.rb', line 87

def add_texts(texts:, ids: nil, metadata: nil)
   = Array.new(texts.size, {}) if .nil?

  if ids.nil? || ids.empty?
    data = texts.zip().map do |text, meta|
      {
        content: text,
        vectors: llm.embed(text: text).embedding.to_s,
        namespace: namespace,
        metadata: meta.to_json
      }
    end

    @db[table_name.to_sym].multi_insert(data, return: :primary_key)
  else
    upsert_texts(texts: texts, ids: ids, metadata: )
  end
end

#ask(question:, k: 4) {|String| ... } ⇒ String

Ask a question and return the answer

Parameters:

  • question (String)

    The question to ask

  • k (Integer) (defaults to: 4)

    The number of results to have in context

Yields:

  • (String)

    Stream responses back one String at a time

Returns:

  • (String)

    The answer to the question



173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
# File 'lib/langchain/vectorsearch/pgvector.rb', line 173

def ask(question:, k: 4, &block)
  search_results = similarity_search(query: question, k: k)

  context = search_results.map do |result|
    result.content.to_s
  end
  context = context.join("\n---\n")

  prompt = generate_rag_prompt(question: question, context: context)

  messages = [{role: "user", content: prompt}]
  response = llm.chat(messages: messages, &block)

  response.context = context
  response
end

#create_default_schemaObject

Create default schema



123
124
125
126
127
128
129
130
131
132
133
134
# File 'lib/langchain/vectorsearch/pgvector.rb', line 123

def create_default_schema
  db.run "CREATE EXTENSION IF NOT EXISTS vector"
  namespace_column = @namespace_column
  vector_dimensions = llm.default_dimensions
  db.create_table? table_name.to_sym do
    primary_key :id
    text :content
    column :vectors, "vector(#{vector_dimensions})"
    text namespace_column.to_sym, default: nil
    jsonb :metadata, default: "{}"
  end
end

#destroy_default_schemaObject

Destroy default schema



137
138
139
# File 'lib/langchain/vectorsearch/pgvector.rb', line 137

def destroy_default_schema
  db.drop_table? table_name.to_sym
end

#documents_modelObject



45
46
47
48
49
# File 'lib/langchain/vectorsearch/pgvector.rb', line 45

def documents_model
  Class.new(Sequel::Model(table_name.to_sym)) do
    plugin :pgvector, :vectors
  end
end

#remove_texts(ids:) ⇒ Integer

Remove a list of texts from the index

Parameters:

  • ids (Array<Integer>)

    The ids of the texts to remove from the index

Returns:

  • (Integer)

    The number of texts removed from the index



118
119
120
# File 'lib/langchain/vectorsearch/pgvector.rb', line 118

def remove_texts(ids:)
  @db[table_name.to_sym].where(id: ids).delete
end

#similarity_search(query:, k: 4) ⇒ Array<Hash>

Search for similar texts in the index

Parameters:

  • query (String)

    The text to search for

  • k (Integer) (defaults to: 4)

    The number of top results to return

Returns:

  • (Array<Hash>)

    The results of the search



145
146
147
148
149
150
151
152
# File 'lib/langchain/vectorsearch/pgvector.rb', line 145

def similarity_search(query:, k: 4)
  embedding = llm.embed(text: query).embedding

  similarity_search_by_vector(
    embedding: embedding,
    k: k
  )
end

#similarity_search_by_vector(embedding:, k: 4) ⇒ Array<Hash>

Search for similar texts in the index by the passed in vector. You must generate your own vector using the same LLM that generated the embeddings stored in the Vectorsearch DB.

Parameters:

  • embedding (Array<Float>)

    The vector to search for

  • k (Integer) (defaults to: 4)

    The number of top results to return

Returns:

  • (Array<Hash>)

    The results of the search



159
160
161
162
163
164
165
166
# File 'lib/langchain/vectorsearch/pgvector.rb', line 159

def similarity_search_by_vector(embedding:, k: 4)
  db.transaction do # BEGIN
    documents_model
      .select(:content, :metadata)
      .nearest_neighbors(:vectors, embedding, distance: operator).limit(k)
      .where(namespace_column.to_sym => namespace)
  end
end

#update_texts(texts:, ids:, metadata: nil) ⇒ Array<Integer>

Update a list of ids and corresponding texts to the index

Parameters:

  • texts (Array<String>)

    The texts to add to the index

  • ids (Array<String>)

    The ids to add to the index, in the same order as the texts

  • metadata (Array<Hash>) (defaults to: nil)

    The metadata to associate with each text, in the same order as the texts

Returns:

  • (Array<Integer>)

    The ids of the updated texts.



111
112
113
# File 'lib/langchain/vectorsearch/pgvector.rb', line 111

def update_texts(texts:, ids:, metadata: nil)
  upsert_texts(texts: texts, ids: ids, metadata: )
end

#upsert_texts(texts:, ids:, metadata: nil) ⇒ PG::Result

Upsert a list of texts to the index the added or updated texts.

Parameters:

  • texts (Array<String>)

    The texts to add to the index

  • ids (Array<Integer>)

    The ids of the objects to add to the index, in the same order as the texts

  • metadata (Array<Hash>) (defaults to: nil)

    The metadata to associate with each text, in the same order as the texts

Returns:

  • (PG::Result)

    The response from the database including the ids of



57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
# File 'lib/langchain/vectorsearch/pgvector.rb', line 57

def upsert_texts(texts:, ids:, metadata: nil)
   = Array.new(texts.size, {}) if .nil?

  data = texts.zip(ids, ).flat_map do |text, id, meta|
    {
      id: id,
      content: text,
      vectors: llm.embed(text: text).embedding.to_s,
      namespace: namespace,
      metadata: meta.to_json
    }
  end
  # @db[table_name.to_sym].multi_insert(data, return: :primary_key)
  @db[table_name.to_sym]
    .insert_conflict(
      target: :id,
      update: {
        content: Sequel[:excluded][:content],
        vectors: Sequel[:excluded][:vectors],
        metadata: Sequel[:excluded][:metadata]
      }
    )
    .multi_insert(data, return: :primary_key)
end