Class: Langchain::Vectorsearch::Pinecone

Inherits:
Base
  • Object
show all
Defined in:
lib/langchain/vectorsearch/pinecone.rb

Constant Summary

Constants inherited from Base

Base::DEFAULT_METRIC

Instance Attribute Summary

Attributes inherited from Base

#client, #index_name, #llm

Instance Method Summary collapse

Methods inherited from Base

#generate_hyde_prompt, #generate_rag_prompt, #remove_texts, #similarity_search_with_hyde

Methods included from DependencyHelper

#depends_on

Constructor Details

#initialize(environment:, api_key:, index_name:, llm:, base_uri: nil) ⇒ Pinecone

Initialize the Pinecone client

Parameters:

  • environment (String)

    The environment to use

  • api_key (String)

    The API key to use

  • index_name (String)

    The name of the index to use

  • llm (Object)

    The LLM client to use



20
21
22
23
24
25
26
27
28
29
30
31
32
33
# File 'lib/langchain/vectorsearch/pinecone.rb', line 20

def initialize(environment:, api_key:, index_name:, llm:, base_uri: nil)
  depends_on "pinecone"

  ::Pinecone.configure do |config|
    config.api_key = api_key
    config.environment = environment
    config.base_uri = base_uri if base_uri
  end

  @client = ::Pinecone::Client.new
  @index_name = index_name

  super(llm: llm)
end

Instance Method Details

#add_data(paths:, namespace: "", options: {}, chunker: Langchain::Chunker::Text) ⇒ Object

Raises:

  • (ArgumentError)


68
69
70
71
72
73
74
75
76
77
78
79
80
81
# File 'lib/langchain/vectorsearch/pinecone.rb', line 68

def add_data(paths:, namespace: "", options: {}, chunker: Langchain::Chunker::Text)
  raise ArgumentError, "Paths must be provided" if Array(paths).empty?

  texts = Array(paths)
    .flatten
    .map do |path|
      data = Langchain::Loader.new(path, options, chunker: chunker)&.load&.chunks
      data.map { |chunk| chunk.text }
    end

  texts.flatten!

  add_texts(texts: texts, namespace: namespace)
end

#add_texts(texts:, ids: [], namespace: "", metadata: nil) ⇒ Hash

Add a list of texts to the index

Parameters:

  • texts (Array<String>)

    The list of texts to add

  • ids (Array<Integer>) (defaults to: [])

    The list of IDs to add

  • namespace (String) (defaults to: "")

    The namespace to add the texts to

  • metadata (Hash) (defaults to: nil)

    The metadata to use for the texts

Returns:

  • (Hash)

    The response from the server



54
55
56
57
58
59
60
61
62
63
64
65
66
# File 'lib/langchain/vectorsearch/pinecone.rb', line 54

def add_texts(texts:, ids: [], namespace: "", metadata: nil)
  vectors = texts.map.with_index do |text, i|
    {
      id: ids[i] ? ids[i].to_s : SecureRandom.uuid,
      metadata:  || {content: text},
      values: llm.embed(text: text).embedding
    }
  end

  index = client.index(index_name)

  index.upsert(vectors: vectors, namespace: namespace)
end

#ask(question:, namespace: "", filter: nil, k: 4) {|String| ... } ⇒ String

Ask a question and return the answer

Parameters:

  • question (String)

    The question to ask

  • namespace (String) (defaults to: "")

    The namespace to search in

  • k (Integer) (defaults to: 4)

    The number of results to have in context

  • filter (String) (defaults to: nil)

    The filter to use

Yields:

  • (String)

    Stream responses back one String at a time

Returns:

  • (String)

    The answer to the question



174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
# File 'lib/langchain/vectorsearch/pinecone.rb', line 174

def ask(question:, namespace: "", filter: nil, k: 4, &block)
  search_results = similarity_search(query: question, namespace: namespace, filter: filter, k: k)

  context = search_results.map do |result|
    result.dig("metadata").to_s
  end
  context = context.join("\n---\n")

  prompt = generate_rag_prompt(question: question, context: context)

  messages = [{role: "user", content: prompt}]
  response = llm.chat(messages: messages, &block)

  response.context = context
  response
end

#create_default_schemaHash

Create the index with the default schema

Returns:

  • (Hash)

    The response from the server



103
104
105
106
107
108
109
# File 'lib/langchain/vectorsearch/pinecone.rb', line 103

def create_default_schema
  client.create_index(
    metric: DEFAULT_METRIC,
    name: index_name,
    dimension: llm.default_dimensions
  )
end

#destroy_default_schemaHash

Delete the index

Returns:

  • (Hash)

    The response from the server



113
114
115
# File 'lib/langchain/vectorsearch/pinecone.rb', line 113

def destroy_default_schema
  client.delete_index(index_name)
end

#find(ids: [], namespace: "") ⇒ Hash

Find records by ids

Parameters:

  • ids (Array<Integer>) (defaults to: [])

    The ids to find

  • namespace (defaults to: "")

    String The namespace to search through

Returns:

  • (Hash)

    The response from the server

Raises:

  • (ArgumentError)


39
40
41
42
43
44
45
46
# File 'lib/langchain/vectorsearch/pinecone.rb', line 39

def find(ids: [], namespace: "")
  raise ArgumentError, "Ids must be provided" if Array(ids).empty?

  client.index(index_name).fetch(
    ids: ids,
    namespace: namespace
  )
end

#get_default_schemaPinecone::Vector

Get the default schema

Returns:

  • (Pinecone::Vector)

    The default schema



119
120
121
# File 'lib/langchain/vectorsearch/pinecone.rb', line 119

def get_default_schema
  index
end

#similarity_search(query:, k: 4, namespace: "", filter: nil) ⇒ Array

Search for similar texts

Parameters:

  • query (String)

    The text to search for

  • k (Integer) (defaults to: 4)

    The number of results to return

  • namespace (String) (defaults to: "")

    The namespace to search in

  • filter (String) (defaults to: nil)

    The filter to use

Returns:

  • (Array)

    The list of results



129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
# File 'lib/langchain/vectorsearch/pinecone.rb', line 129

def similarity_search(
  query:,
  k: 4,
  namespace: "",
  filter: nil
)
  embedding = llm.embed(text: query).embedding

  similarity_search_by_vector(
    embedding: embedding,
    k: k,
    namespace: namespace,
    filter: filter
  )
end

#similarity_search_by_vector(embedding:, k: 4, namespace: "", filter: nil) ⇒ Array

Search for similar texts by embedding

Parameters:

  • embedding (Array<Float>)

    The embedding to search for

  • k (Integer) (defaults to: 4)

    The number of results to return

  • namespace (String) (defaults to: "")

    The namespace to search in

  • filter (String) (defaults to: nil)

    The filter to use

Returns:

  • (Array)

    The list of results



151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
# File 'lib/langchain/vectorsearch/pinecone.rb', line 151

def similarity_search_by_vector(embedding:, k: 4, namespace: "", filter: nil)
  index = client.index(index_name)

  query_params = {
    vector: embedding,
    namespace: namespace,
    filter: filter,
    top_k: k,
    include_values: true,
    include_metadata: true
  }.compact

  response = index.query(query_params)
  response.dig("matches")
end

#update_texts(texts:, ids:, namespace: "", metadata: nil) ⇒ Array

Update a list of texts in the index

Parameters:

  • texts (Array<String>)

    The list of texts to update

  • ids (Array<Integer>)

    The list of IDs to update

  • namespace (String) (defaults to: "")

    The namespace to update the texts in

  • metadata (Hash) (defaults to: nil)

    The metadata to use for the texts

Returns:

  • (Array)

    The response from the server



89
90
91
92
93
94
95
96
97
98
99
# File 'lib/langchain/vectorsearch/pinecone.rb', line 89

def update_texts(texts:, ids:, namespace: "", metadata: nil)
  texts.map.with_index do |text, i|
    # Pinecone::Vector#update ignore args when it is empty
    index.update(
      namespace: namespace,
      id: ids[i].to_s,
      values: llm.embed(text: text).embedding,
      set_metadata: 
    )
  end
end