Class: Roseflow::Text::RecursiveCharacterSplitter

Inherits:
Splitter
  • Object
show all
Defined in:
lib/roseflow/text/recursive_character_splitter.rb

Direct Known Subclasses

WordSplitter

Constant Summary collapse

SEPARATORS =
["\n\n", "\n", " ", ""]

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(separators = nil, **kwargs) ⇒ RecursiveCharacterSplitter

Returns a new instance of RecursiveCharacterSplitter.



10
11
12
13
# File 'lib/roseflow/text/recursive_character_splitter.rb', line 10

def initialize(separators = nil, **kwargs)
  super(**kwargs)
  @separators = separators || SEPARATORS
end

Instance Attribute Details

#chunk_overlapObject (readonly)

Returns the value of attribute chunk_overlap.



15
16
17
# File 'lib/roseflow/text/recursive_character_splitter.rb', line 15

def chunk_overlap
  @chunk_overlap
end

#chunk_sizeObject (readonly)

Returns the value of attribute chunk_size.



15
16
17
# File 'lib/roseflow/text/recursive_character_splitter.rb', line 15

def chunk_size
  @chunk_size
end

Instance Method Details

#split(text) ⇒ Object



17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
# File 'lib/roseflow/text/recursive_character_splitter.rb', line 17

def split(text)
  segments = text.split(find_separator(text))
  current_size = 0
  results = [[]]

  segments.each do |segment|
    if current_size + segment.size > chunk_size
      overlap = [results.last.last(chunk_overlap), segment].flatten
      current_size = overlap.sum(&:size) + chunk_overlap
      results << overlap
    else
      current_size += segment.size + results.last.size
      results.last << segment
    end
  end

  results.map { |r| r.join(" ") }
end