Class: Ollama::Documents

Inherits:
Object
  • Object
show all
Includes:
Utils::Math, Utils::Width
Defined in:
lib/ollama/documents.rb,
lib/ollama/documents.rb

Defined Under Namespace

Modules: Splitters Classes: MemoryCache, Record, RedisCache

Instance Attribute Summary collapse

Instance Method Summary collapse

Methods included from Utils::Width

truncate, width, wrap

Methods included from Utils::Math

#cosine_similarity, #norm

Constructor Details

#initialize(ollama:, model:, model_options: nil, collection: :default, cache: MemoryCache, redis_url: nil) ⇒ Documents

Returns a new instance of Documents.



31
32
33
34
# File 'lib/ollama/documents.rb', line 31

def initialize(ollama:, model:, model_options: nil, collection: :default, cache: MemoryCache, redis_url: nil)
  @ollama, @model, @model_options, @collection = ollama, model, model_options, collection
  @cache, @redis_url = connect_cache(cache), redis_url
end

Instance Attribute Details

#collectionObject

Returns the value of attribute collection.



36
37
38
# File 'lib/ollama/documents.rb', line 36

def collection
  @collection
end

#modelObject (readonly)

Returns the value of attribute model.



36
37
38
# File 'lib/ollama/documents.rb', line 36

def model
  @model
end

#ollamaObject (readonly)

Returns the value of attribute ollama.



36
37
38
# File 'lib/ollama/documents.rb', line 36

def ollama
  @ollama
end

Instance Method Details

#[](text) ⇒ Object



71
72
73
# File 'lib/ollama/documents.rb', line 71

def [](text)
  @cache[key(text)]
end

#[]=(text, record) ⇒ Object



75
76
77
# File 'lib/ollama/documents.rb', line 75

def []=(text, record)
  @cache[key(text)] = record
end

#add(inputs, batch_size: 10, source: nil, tags: []) ⇒ Object Also known as: <<



43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
# File 'lib/ollama/documents.rb', line 43

def add(inputs, batch_size: 10, source: nil, tags: [])
  inputs = Array(inputs)
  tags   = Ollama::Utils::Tags.new(tags)
  source and tags.add File.basename(source).gsub(/\?.*/, '')
  inputs.map! { |i|
    text = i.respond_to?(:read) ? i.read : i.to_s
    text
  }
  inputs.reject! { |i| exist?(i) }
  inputs.empty? and return self
  batches = inputs.each_slice(batch_size).
    with_infobar(
      label: "Add #{truncate(tags.to_s, percentage: 25)}",
      total: inputs.size
    )
  batches.each do |batch|
    embeddings = fetch_embeddings(model:, options: @model_options, input: batch)
    batch.zip(embeddings) do |text, embedding|
      norm       = norm(embedding)
      self[text] = Record[text:, embedding:, norm:, source:, tags: tags.to_a]
    end
    infobar.progress by: batch.size
  end
  infobar.newline
  self
end

#clear(tags: nil) ⇒ Object



91
92
93
94
95
96
97
98
99
100
101
102
103
# File 'lib/ollama/documents.rb', line 91

def clear(tags: nil)
  if tags
    tags = Ollama::Utils::Tags.new(Array(tags)).to_a
    @cache.each do |key, record|
      if (tags & record.tags).size >= 1
        @cache.delete(@cache.unpre(key))
      end
    end
  else
    @cache.clear
  end
  self
end

#collectionsObject



139
140
141
142
143
144
145
146
147
148
149
# File 'lib/ollama/documents.rb', line 139

def collections
  case @cache
  when MemoryCache
    [ @collection ]
  when RedisCache
    prefix = '%s-' % self.class
    Documents::RedisCache.new(prefix:, url: @redis_url).map { _1[/#{prefix}(.*)-/, 1] }.uniq
  else
    []
  end
end

#delete(text) ⇒ Object



83
84
85
# File 'lib/ollama/documents.rb', line 83

def delete(text)
  @cache.delete(key(text))
end

#exist?(text) ⇒ Boolean

Returns:

  • (Boolean)


79
80
81
# File 'lib/ollama/documents.rb', line 79

def exist?(text)
  @cache.key?(key(text))
end

#find(string, tags: nil, prompt: nil) ⇒ Object



105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# File 'lib/ollama/documents.rb', line 105

def find(string, tags: nil, prompt: nil)
  needle      = convert_to_vector(string, prompt:)
  needle_norm = norm(needle)
  records = @cache
  if tags
    tags = Ollama::Utils::Tags.new(tags).to_a
    records = records.select { |_key, record| (tags & record.tags).size >= 1 }
  end
  records = records.sort_by { |key, record|
    record.key        = key
    record.similarity = cosine_similarity(
      a: needle,
      b: record.embedding,
      a_norm: needle_norm,
      b_norm: record.norm,
    )
  }
  records.transpose.last&.reverse.to_a
end

#find_where(string, text_size: nil, text_count: nil, **opts) ⇒ Object



125
126
127
128
129
130
131
132
133
134
135
136
137
# File 'lib/ollama/documents.rb', line 125

def find_where(string, text_size: nil, text_count: nil, **opts)
  records = find(string, **opts)
  size, count = 0, 0
  records.take_while do |record|
    if text_size and (size += record.text.size) > text_size
      next false
    end
    if text_count and (count += 1) > text_count
      next false
    end
    true
  end
end

#sizeObject



87
88
89
# File 'lib/ollama/documents.rb', line 87

def size
  @cache.size
end

#tagsObject



151
152
153
# File 'lib/ollama/documents.rb', line 151

def tags
  @cache.inject(Ollama::Utils::Tags.new) { |t, (_, record)| t.merge(record.tags) }
end