Class: Documentrix::Documents::Splitters::Character
- Inherits:
-
Object
- Object
- Documentrix::Documents::Splitters::Character
- Defined in:
- lib/documentrix/documents/splitters/character.rb
Constant Summary collapse
- DEFAULT_SEPARATOR =
/(?:\r?\n){2,}/
Instance Method Summary collapse
-
#initialize(separator: DEFAULT_SEPARATOR, include_separator: false, combining_string: "\n\n", chunk_size: 4096) ⇒ Character
constructor
A new instance of Character.
- #split(text) ⇒ Object
Constructor Details
#initialize(separator: DEFAULT_SEPARATOR, include_separator: false, combining_string: "\n\n", chunk_size: 4096) ⇒ Character
Returns a new instance of Character.
5 6 7 8 9 10 11 |
# File 'lib/documentrix/documents/splitters/character.rb', line 5 def initialize(separator: DEFAULT_SEPARATOR, include_separator: false, combining_string: "\n\n", chunk_size: 4096) @separator, @include_separator, @combining_string, @chunk_size = separator, include_separator, combining_string, chunk_size if include_separator @separator = Regexp.new("(#@separator)") end end |
Instance Method Details
#split(text) ⇒ Object
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
# File 'lib/documentrix/documents/splitters/character.rb', line 13 def split(text) texts = [] text.split(@separator) do |t| if @include_separator && t =~ @separator texts.last&.concat t else texts.push(t) end end result = [] current_text = +'' texts.each do |t| if current_text.size + t.size < @chunk_size current_text << t << @combining_string else current_text.empty? or result << current_text current_text = t end end current_text.empty? or result << current_text result end |