Class: BxBuilderChain::Chunker::Text

Inherits:
Object
  • Object
show all
Defined in:
lib/bx_builder_chain/chunker/text.rb

Overview

Simple text chunker

Usage:

BxBuilderChain::Chunker::Text.new(text).chunks

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(text, chunk_size: 1024, chunk_overlap: 64, separator: "\n\n") ⇒ Text

Returns a new instance of Text.

Parameters:

  • text (String)
  • chunk_size (Integer) (defaults to: 1024)
  • chunk_overlap (Integer) (defaults to: 64)
  • separator (String) (defaults to: "\n\n")


20
21
22
23
24
25
# File 'lib/bx_builder_chain/chunker/text.rb', line 20

def initialize(text, chunk_size: 1024, chunk_overlap: 64, separator: "\n\n")
  @text = text
  @chunk_size = chunk_size
  @chunk_overlap = chunk_overlap
  @separator = separator
end

Instance Attribute Details

#chunk_overlapObject (readonly)

Returns the value of attribute chunk_overlap.



14
15
16
# File 'lib/bx_builder_chain/chunker/text.rb', line 14

def chunk_overlap
  @chunk_overlap
end

#chunk_sizeObject (readonly)

Returns the value of attribute chunk_size.



14
15
16
# File 'lib/bx_builder_chain/chunker/text.rb', line 14

def chunk_size
  @chunk_size
end

#separatorObject (readonly)

Returns the value of attribute separator.



14
15
16
# File 'lib/bx_builder_chain/chunker/text.rb', line 14

def separator
  @separator
end

#textObject (readonly)

Returns the value of attribute text.



14
15
16
# File 'lib/bx_builder_chain/chunker/text.rb', line 14

def text
  @text
end

Instance Method Details

#chunksArray<String>

Returns:

  • (Array<String>)


28
29
30
31
32
33
34
35
# File 'lib/bx_builder_chain/chunker/text.rb', line 28

def chunks
  splitter = Baran::CharacterTextSplitter.new(
    chunk_size: chunk_size,
    chunk_overlap: chunk_overlap,
    separator: separator
  )
  splitter.chunks(text)
end