Class: StringSplitter

Inherits:
Object
  • Object
show all
Defined in:
lib/string_splitter.rb,
lib/string_splitter/version.rb

Overview

This class extends the functionality of String#split by:

- providing full control over which splits are accepted or rejected

- adding support for splitting from right-to-left

- encapsulating splitting options/preferences in the splitter rather
  than trying to cram them into overloaded method parameters

These enhancements allow splits to handle many cases that otherwise require bigger guns, e.g. regex matching or parsing.

Implementation-wise, we split the string with a scanner which works in a similar way to String#split and parse the resulting tokens into an array of Split objects with the following fields:

- captures:  separator substrings captured by parentheses in the delimiter pattern
- count:     the number of splits
- index:     the 0-based index of the split in the array
- lhs:       the string to the left of the separator (back to the previous split candidate)
- position:  the 1-based index of the split in the array (alias: pos)
- rhs:       the string to the right of the separator (up to the next split candidate)
- rindex:    the 0-based index of the split relative to the end of the array
- rposition: the 1-based index of the split relative to the end of the array (alias: rpos)
- separator: the string matched by the delimiter pattern/string

Constant Summary collapse

ACCEPT_ALL =

terminology: the delimiter is what we provide and the separators are what we get back (if we capture them). e.g. for:

ss.split("foo:bar::baz", /(\W+)/)

the delimiter is /(W)/ and the separators are “:” and “::”

->(_split) { true }
DEFAULT_DELIMITER =
/\s+/.freeze
REMOVE =
[].freeze
Split =
Value.new(:captures, :count, :index, :lhs, :rhs, :separator) do
  def position
    index + 1
  end

  alias_method :pos, :position

  # 0-based index relative to the end of the array, e.g. for 5 items:
  #
  #  index | rindex
  #  ------|-------
  #    0   |   4
  #    1   |   3
  #    2   |   2
  #    3   |   1
  #    4   |   0
  def rindex
    count - position
  end

  # 1-based position relative to the end of the array, e.g. for 5 items:
  #
  #   position | rposition
  #  ----------|----------
  #      1     |    5
  #      2     |    4
  #      3     |    3
  #      4     |    2
  #      5     |    1
  def rposition
    count + 1 - position
  end

  alias_method :rpos, :rposition
end
VERSION =
'0.7.0'

Instance Attribute Summary collapse

Instance Method Summary collapse

Constructor Details

#initialize(default_delimiter: DEFAULT_DELIMITER, include_captures: true, remove_empty: false, remove_empty_fields: remove_empty, spread_captures: true) ⇒ StringSplitter

Returns a new instance of StringSplitter.



91
92
93
94
95
96
97
98
99
100
101
102
# File 'lib/string_splitter.rb', line 91

def initialize(
  default_delimiter: DEFAULT_DELIMITER,
  include_captures: true,
  remove_empty: false, # TODO remove this
  remove_empty_fields: remove_empty,
  spread_captures: true
)
  @default_delimiter = default_delimiter
  @include_captures = include_captures
  @remove_empty_fields = remove_empty_fields
  @spread_captures = spread_captures
end

Instance Attribute Details

#default_delimiterObject (readonly)

Returns the value of attribute default_delimiter.



104
105
106
# File 'lib/string_splitter.rb', line 104

def default_delimiter
  @default_delimiter
end

#include_capturesObject (readonly)

Returns the value of attribute include_captures.



104
105
106
# File 'lib/string_splitter.rb', line 104

def include_captures
  @include_captures
end

#remove_empty_fieldsObject (readonly) Also known as: remove_empty

Returns the value of attribute remove_empty_fields.



104
105
106
# File 'lib/string_splitter.rb', line 104

def remove_empty_fields
  @remove_empty_fields
end

#spread_capturesObject (readonly)

Returns the value of attribute spread_captures.



104
105
106
# File 'lib/string_splitter.rb', line 104

def spread_captures
  @spread_captures
end

Instance Method Details

#rsplit(string, delimiter = @default_delimiter, at: nil, except: nil, select: at, reject: except, &block) ⇒ Object



150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
# File 'lib/string_splitter.rb', line 150

def rsplit(
  string,
  delimiter = @default_delimiter,
  at: nil, # alias for select
  except: nil, # alias for reject
  select: at,
  reject: except,
  &block
)
  result, splits, count, accept = init(
    string: string,
    delimiter: delimiter,
    select: select,
    reject: reject,
    block: block
  )

  return result unless splits

  splits.reverse_each.with_index do |hash, index|
    split = Split.with(hash.merge({ count: count, index: index }))
    result.unshift(split.rhs) if result.empty?

    if accept.call(split)
      # [lhs + captures] + result
      result.unshift(split.lhs, split.captures)
    else
      # prepend the lhs
      result[0] = split.lhs + split.separator + result[0]
    end
  end

  render(result)
end

#split(string, delimiter = @default_delimiter, at: nil, except: nil, select: at, reject: except, &block) ⇒ Object Also known as: lsplit



114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
# File 'lib/string_splitter.rb', line 114

def split(
  string,
  delimiter = @default_delimiter,
  at: nil, # alias for select
  except: nil, # alias for reject
  select: at,
  reject: except,
  &block
)
  result, splits, count, accept = init(
    string: string,
    delimiter: delimiter,
    select: select,
    reject: reject,
    block: block
  )

  return result unless splits

  splits.each_with_index do |hash, index|
    split = Split.with(hash.merge({ count: count, index: index }))
    result << split.lhs if result.empty?

    if accept.call(split)
      result << split.captures << split.rhs
    else
      # append the rhs
      result[-1] = result[-1] + split.separator + split.rhs
    end
  end

  render(result)
end