Class: Documentrix::Documents::Splitters::RecursiveCharacter
- Inherits:
-
Object
- Object
- Documentrix::Documents::Splitters::RecursiveCharacter
- Defined in:
- lib/documentrix/documents/splitters/character.rb
Constant Summary collapse
- DEFAULT_SEPARATORS =
[ /(?:\r?\n){2,}/, /\r?\n/, /\b/, //, ].freeze
Instance Method Summary collapse
-
#initialize(separators: DEFAULT_SEPARATORS, include_separator: false, combining_string: "\n\n", chunk_size: 4096) ⇒ RecursiveCharacter
constructor
A new instance of RecursiveCharacter.
- #split(text, separators: @separators) ⇒ Object
Constructor Details
#initialize(separators: DEFAULT_SEPARATORS, include_separator: false, combining_string: "\n\n", chunk_size: 4096) ⇒ RecursiveCharacter
Returns a new instance of RecursiveCharacter.
45 46 47 48 49 50 |
# File 'lib/documentrix/documents/splitters/character.rb', line 45 def initialize(separators: DEFAULT_SEPARATORS, include_separator: false, combining_string: "\n\n", chunk_size: 4096) separators.empty? and raise ArgumentError, "non-empty array of separators required" @separators, @include_separator, @combining_string, @chunk_size = separators, include_separator, combining_string, chunk_size end |
Instance Method Details
#split(text, separators: @separators) ⇒ Object
52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 |
# File 'lib/documentrix/documents/splitters/character.rb', line 52 def split(text, separators: @separators) separators.empty? and return [ text ] separators = separators.dup separator = separators.shift texts = Character.new( separator:, include_separator: @include_separator, combining_string: @combining_string, chunk_size: @chunk_size ).split(text) texts.count == 0 and return [ text ] texts.inject([]) do |r, t| if t.size > @chunk_size r.concat(split(t, separators:)) else r.concat([ t ]) end end end |