Method: Enumerable#slice_before

Defined in:
enum.c

#slice_before(pattern) ⇒ Object #slice_before {|elt| ... } ⇒ Object

With argument pattern, returns an enumerator that uses the pattern to partition elements into arrays (“slices”). An element begins a new slice if element === pattern (or if it is the first element).

a = %w[foo bar fop for baz fob fog bam foy]
e = a.slice_before(/ba/) # => #<Enumerator: ...>
e.each {|array| p array }

Output:

["foo"]
["bar", "fop", "for"]
["baz", "fob", "fog"]
["bam", "foy"]

With a block, returns an enumerator that uses the block to partition elements into arrays. An element begins a new slice if its block return is a truthy value (or if it is the first element):

e = (1..20).slice_before {|i| i % 4 == 2 } # => #<Enumerator: ...>
e.each {|array| p array }

Output:

[1]
[2, 3, 4, 5]
[6, 7, 8, 9]
[10, 11, 12, 13]
[14, 15, 16, 17]
[18, 19, 20]

Other methods of the Enumerator class and Enumerable module, such as to_a, map, etc., are also usable.

For example, iteration over ChangeLog entries can be implemented as follows:

# iterate over ChangeLog entries.
open("ChangeLog") { |f|
  f.slice_before(/\A\S/).each { |e| pp e }
}

# same as above.  block is used instead of pattern argument.
open("ChangeLog") { |f|
  f.slice_before { |line| /\A\S/ === line }.each { |e| pp e }
}

“svn proplist -R” produces multiline output for each file. They can be chunked as follows:

IO.popen([{"LC_ALL"=>"C"}, "svn", "proplist", "-R"]) { |f|
  f.lines.slice_before(/\AProp/).each { |lines| p lines }
}
#=> ["Properties on '.':\n", "  svn:ignore\n", "  svk:merge\n"]
#   ["Properties on 'goruby.c':\n", "  svn:eol-style\n"]
#   ["Properties on 'complex.c':\n", "  svn:mime-type\n", "  svn:eol-style\n"]
#   ["Properties on 'regparse.c':\n", "  svn:eol-style\n"]
#   ...

If the block needs to maintain state over multiple elements, local variables can be used. For example, three or more consecutive increasing numbers can be squashed as follows (see chunk_while for a better way):

a = [0, 2, 3, 4, 6, 7, 9]
prev = a[0]
p a.slice_before { |e|
  prev, prev2 = e, prev
  prev2 + 1 != e
}.map { |es|
  es.length <= 2 ? es.join(",") : "#{es.first}-#{es.last}"
}.join(",")
#=> "0,2-4,6,7,9"

However local variables should be used carefully if the result enumerator is enumerated twice or more. The local variables should be initialized for each enumeration. Enumerator.new can be used to do it.

# Word wrapping.  This assumes all characters have same width.
def wordwrap(words, maxwidth)
  Enumerator.new {|y|
    # cols is initialized in Enumerator.new.
    cols = 0
    words.slice_before { |w|
      cols += 1 if cols != 0
      cols += w.length
      if maxwidth < cols
        cols = w.length
        true
      else
        false
      end
    }.each {|ws| y.yield ws }
  }
end
text = (1..20).to_a.join(" ")
enum = wordwrap(text.split(/\s+/), 10)
puts "-"*10
enum.each { |ws| puts ws.join(" ") } # first enumeration.
puts "-"*10
enum.each { |ws| puts ws.join(" ") } # second enumeration generates same result as the first.
puts "-"*10
#=> ----------
#   1 2 3 4 5
#   6 7 8 9 10
#   11 12 13
#   14 15 16
#   17 18 19
#   20
#   ----------
#   1 2 3 4 5
#   6 7 8 9 10
#   11 12 13
#   14 15 16
#   17 18 19
#   20
#   ----------

mbox contains series of mails which start with Unix From line. So each mail can be extracted by slice before Unix From line.

# parse mbox
open("mbox") { |f|
  f.slice_before { |line|
    line.start_with? "From "
  }.each { |mail|
    unix_from = mail.shift
    i = mail.index("\n")
    header = mail[0...i]
    body = mail[(i+1)..-1]
    body.pop if body.last == "\n"
    fields = header.slice_before { |line| !" \t".include?(line[0]) }.to_a
    p unix_from
    pp fields
    pp body
  }
}

# split mails in mbox (slice before Unix From line after an empty line)
open("mbox") { |f|
  emp = true
  f.slice_before { |line|
    prevemp = emp
    emp = line == "\n"
    prevemp && line.start_with?("From ")
  }.each { |mail|
    mail.pop if mail.last == "\n"
    pp mail
  }
}

Overloads:

  • #slice_before {|elt| ... } ⇒ Object

    Yields:

    • (elt)


4222
4223
4224
4225
4226
4227
4228
4229
4230
4231
4232
4233
4234
4235
4236
4237
4238
4239
4240
4241
4242
# File 'enum.c', line 4222

static VALUE
enum_slice_before(int argc, VALUE *argv, VALUE enumerable)
{
    VALUE enumerator;

    if (rb_block_given_p()) {
        if (argc != 0)
            rb_error_arity(argc, 0, 0);
        enumerator = rb_obj_alloc(rb_cEnumerator);
        rb_ivar_set(enumerator, id_slicebefore_sep_pred, rb_block_proc());
    }
    else {
        VALUE sep_pat;
        rb_scan_args(argc, argv, "1", &sep_pat);
        enumerator = rb_obj_alloc(rb_cEnumerator);
        rb_ivar_set(enumerator, id_slicebefore_sep_pat, sep_pat);
    }
    rb_ivar_set(enumerator, id_slicebefore_enumerable, enumerable);
    rb_block_call(enumerator, idInitialize, 0, 0, slicebefore_i, enumerator);
    return enumerator;
}