Class: Langchain::Chunker::Text

Inherits:
Base
  • Object
show all
Defined in:
lib/langchain/chunker/text.rb

Overview

Simple text chunker

Usage:

Langchain::Chunker::Text.new(text).chunks

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text, chunk_size: 1000, chunk_overlap: 200, separator: "\n\n") ⇒ Text

Returns a new instance of Text.

Parameters:

  • text (String)
  • chunk_size (Integer) (defaults to: 1000)
  • chunk_overlap (Integer) (defaults to: 200)
  • separator (String) (defaults to: "\n\n")


18
19
20
21
22
23
# File 'lib/langchain/chunker/text.rb', line 18

def initialize(text, chunk_size: 1000, chunk_overlap: 200, separator: "\n\n")
  @text = text
  @chunk_size = chunk_size
  @chunk_overlap = chunk_overlap
  @separator = separator
end

Instance Attribute Details

#chunk_overlapObject (readonly)

Returns the value of attribute chunk_overlap.



12
13
14
# File 'lib/langchain/chunker/text.rb', line 12

def chunk_overlap
  @chunk_overlap
end

#chunk_sizeObject (readonly)

Returns the value of attribute chunk_size.



12
13
14
# File 'lib/langchain/chunker/text.rb', line 12

def chunk_size
  @chunk_size
end

#separatorObject (readonly)

Returns the value of attribute separator.



12
13
14
# File 'lib/langchain/chunker/text.rb', line 12

def separator
  @separator
end

#textObject (readonly)

Returns the value of attribute text.



12
13
14
# File 'lib/langchain/chunker/text.rb', line 12

def text
  @text
end

Instance Method Details

#chunksArray<Langchain::Chunk>

Returns:



26
27
28
29
30
31
32
33
34
35
36
# File 'lib/langchain/chunker/text.rb', line 26

def chunks
  splitter = Baran::CharacterTextSplitter.new(
    chunk_size: chunk_size,
    chunk_overlap: chunk_overlap,
    separator: separator
  )

  splitter.chunks(text).map do |chunk|
    Langchain::Chunk.new(text: chunk[:text])
  end
end