Class: Langchain::Chunker::RecursiveText

Inherits:
Base
  • Object
show all
Defined in:
lib/langchain/chunker/recursive_text.rb

Overview

Recursive text chunker. Preferentially splits on separators.

Usage:

Langchain::Chunker::RecursiveText.new(text).chunks

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text, chunk_size: 1000, chunk_overlap: 200, separators: ["\n\n"]) ⇒ RecursiveText

Returns a new instance of RecursiveText.

Parameters:

  • text (String)
  • chunk_size (Integer) (defaults to: 1000)
  • chunk_overlap (Integer) (defaults to: 200)
  • separators (Array<String>) (defaults to: ["\n\n"])


18
19
20
21
22
23
# File 'lib/langchain/chunker/recursive_text.rb', line 18

def initialize(text, chunk_size: 1000, chunk_overlap: 200, separators: ["\n\n"])
  @text = text
  @chunk_size = chunk_size
  @chunk_overlap = chunk_overlap
  @separators = separators
end

Instance Attribute Details

#chunk_overlapObject (readonly)

Returns the value of attribute chunk_overlap.



12
13
14
# File 'lib/langchain/chunker/recursive_text.rb', line 12

def chunk_overlap
  @chunk_overlap
end

#chunk_sizeObject (readonly)

Returns the value of attribute chunk_size.



12
13
14
# File 'lib/langchain/chunker/recursive_text.rb', line 12

def chunk_size
  @chunk_size
end

#separatorsObject (readonly)

Returns the value of attribute separators.



12
13
14
# File 'lib/langchain/chunker/recursive_text.rb', line 12

def separators
  @separators
end

#textObject (readonly)

Returns the value of attribute text.



12
13
14
# File 'lib/langchain/chunker/recursive_text.rb', line 12

def text
  @text
end

Instance Method Details

#chunksArray<Langchain::Chunk>

Returns:



26
27
28
29
30
31
32
33
34
35
36
# File 'lib/langchain/chunker/recursive_text.rb', line 26

def chunks
  splitter = Baran::RecursiveCharacterTextSplitter.new(
    chunk_size: chunk_size,
    chunk_overlap: chunk_overlap,
    separators: separators
  )

  splitter.chunks(text).map do |chunk|
    Langchain::Chunk.new(text: chunk[:text])
  end
end