Class: Roseflow::Text::RecursiveCharacterSplitter
- Defined in:
- lib/roseflow/text/recursive_character_splitter.rb
Direct Known Subclasses
Constant Summary collapse
- SEPARATORS =
["\n\n", "\n", " ", ""]
Instance Attribute Summary collapse
-
#chunk_overlap ⇒ Object
readonly
Returns the value of attribute chunk_overlap.
-
#chunk_size ⇒ Object
readonly
Returns the value of attribute chunk_size.
Instance Method Summary collapse
-
#initialize(separators = nil, **kwargs) ⇒ RecursiveCharacterSplitter
constructor
A new instance of RecursiveCharacterSplitter.
- #split(text) ⇒ Object
Constructor Details
#initialize(separators = nil, **kwargs) ⇒ RecursiveCharacterSplitter
Returns a new instance of RecursiveCharacterSplitter.
10 11 12 13 |
# File 'lib/roseflow/text/recursive_character_splitter.rb', line 10 def initialize(separators = nil, **kwargs) super(**kwargs) @separators = separators || SEPARATORS end |
Instance Attribute Details
#chunk_overlap ⇒ Object (readonly)
Returns the value of attribute chunk_overlap.
15 16 17 |
# File 'lib/roseflow/text/recursive_character_splitter.rb', line 15 def chunk_overlap @chunk_overlap end |
#chunk_size ⇒ Object (readonly)
Returns the value of attribute chunk_size.
15 16 17 |
# File 'lib/roseflow/text/recursive_character_splitter.rb', line 15 def chunk_size @chunk_size end |
Instance Method Details
#split(text) ⇒ Object
17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
# File 'lib/roseflow/text/recursive_character_splitter.rb', line 17 def split(text) segments = text.split(find_separator(text)) current_size = 0 results = [[]] segments.each do |segment| if current_size + segment.size > chunk_size overlap = [results.last.last(chunk_overlap), segment].flatten current_size = overlap.sum(&:size) + chunk_overlap results << overlap else current_size += segment.size + results.last.size results.last << segment end end results.map { |r| r.join(" ") } end |