Method: Enumerable#slice_before
- Defined in:
- enum.c
#slice_before(pattern) ⇒ Object #slice_before {|elt| ... } ⇒ Object
With argument pattern, returns an enumerator that uses the pattern to partition elements into arrays (“slices”). An element begins a new slice if element === pattern (or if it is the first element).
a = %w[foo bar fop for baz fob fog bam foy]
e = a.slice_before(/ba/) # => #<Enumerator: ...>
e.each {|array| p array }
Output:
["foo"]
["bar", "fop", "for"]
["baz", "fob", "fog"]
["bam", "foy"]
With a block, returns an enumerator that uses the block to partition elements into arrays. An element begins a new slice if its block return is a truthy value (or if it is the first element):
e = (1..20).slice_before {|i| i % 4 == 2 } # => #<Enumerator: ...>
e.each {|array| p array }
Output:
[1]
[2, 3, 4, 5]
[6, 7, 8, 9]
[10, 11, 12, 13]
[14, 15, 16, 17]
[18, 19, 20]
Other methods of the Enumerator class and Enumerable module, such as to_a, map, etc., are also usable.
For example, iteration over ChangeLog entries can be implemented as follows:
# iterate over ChangeLog entries.
open("ChangeLog") { |f|
f.slice_before(/\A\S/).each { |e| pp e }
}
# same as above. block is used instead of pattern argument.
open("ChangeLog") { |f|
f.slice_before { |line| /\A\S/ === line }.each { |e| pp e }
}
“svn proplist -R” produces multiline output for each file. They can be chunked as follows:
IO.popen([{"LC_ALL"=>"C"}, "svn", "proplist", "-R"]) { |f|
f.lines.slice_before(/\AProp/).each { |lines| p lines }
}
#=> ["Properties on '.':\n", " svn:ignore\n", " svk:merge\n"]
# ["Properties on 'goruby.c':\n", " svn:eol-style\n"]
# ["Properties on 'complex.c':\n", " svn:mime-type\n", " svn:eol-style\n"]
# ["Properties on 'regparse.c':\n", " svn:eol-style\n"]
# ...
If the block needs to maintain state over multiple elements, local variables can be used. For example, three or more consecutive increasing numbers can be squashed as follows (see chunk_while for a better way):
a = [0, 2, 3, 4, 6, 7, 9]
prev = a[0]
p a.slice_before { |e|
prev, prev2 = e, prev
prev2 + 1 != e
}.map { |es|
es.length <= 2 ? es.join(",") : "#{es.first}-#{es.last}"
}.join(",")
#=> "0,2-4,6,7,9"
However local variables should be used carefully if the result enumerator is enumerated twice or more. The local variables should be initialized for each enumeration. Enumerator.new can be used to do it.
# Word wrapping. This assumes all characters have same width.
def wordwrap(words, maxwidth)
Enumerator.new {|y|
# cols is initialized in Enumerator.new.
cols = 0
words.slice_before { |w|
cols += 1 if cols != 0
cols += w.length
if maxwidth < cols
cols = w.length
true
else
false
end
}.each {|ws| y.yield ws }
}
end
text = (1..20).to_a.join(" ")
enum = wordwrap(text.split(/\s+/), 10)
puts "-"*10
enum.each { |ws| puts ws.join(" ") } # first enumeration.
puts "-"*10
enum.each { |ws| puts ws.join(" ") } # second enumeration generates same result as the first.
puts "-"*10
#=> ----------
# 1 2 3 4 5
# 6 7 8 9 10
# 11 12 13
# 14 15 16
# 17 18 19
# 20
# ----------
# 1 2 3 4 5
# 6 7 8 9 10
# 11 12 13
# 14 15 16
# 17 18 19
# 20
# ----------
mbox contains series of mails which start with Unix From line. So each mail can be extracted by slice before Unix From line.
# parse mbox
open("mbox") { |f|
f.slice_before { |line|
line.start_with? "From "
}.each { |mail|
unix_from = mail.shift
i = mail.index("\n")
header = mail[0...i]
body = mail[(i+1)..-1]
body.pop if body.last == "\n"
fields = header.slice_before { |line| !" \t".include?(line[0]) }.to_a
p unix_from
pp fields
pp body
}
}
# split mails in mbox (slice before Unix From line after an empty line)
open("mbox") { |f|
emp = true
f.slice_before { |line|
prevemp = emp
emp = line == "\n"
prevemp && line.start_with?("From ")
}.each { |mail|
mail.pop if mail.last == "\n"
pp mail
}
}
4222 4223 4224 4225 4226 4227 4228 4229 4230 4231 4232 4233 4234 4235 4236 4237 4238 4239 4240 4241 4242 |
# File 'enum.c', line 4222 static VALUE enum_slice_before(int argc, VALUE *argv, VALUE enumerable) { VALUE enumerator; if (rb_block_given_p()) { if (argc != 0) rb_error_arity(argc, 0, 0); enumerator = rb_obj_alloc(rb_cEnumerator); rb_ivar_set(enumerator, id_slicebefore_sep_pred, rb_block_proc()); } else { VALUE sep_pat; rb_scan_args(argc, argv, "1", &sep_pat); enumerator = rb_obj_alloc(rb_cEnumerator); rb_ivar_set(enumerator, id_slicebefore_sep_pat, sep_pat); } rb_ivar_set(enumerator, id_slicebefore_enumerable, enumerable); rb_block_call(enumerator, idInitialize, 0, 0, slicebefore_i, enumerator); return enumerator; } |