% svn diff --diff-cmd diff -x '-u -p' Index: enum.c =================================================================== --- enum.c (revision 45900) +++ enum.c (working copy) @@ -3083,6 +3083,187 @@ enum_slice_before(int argc, VALUE *argv, return enumerator; } + +struct slicebetween_arg { + VALUE pat1; + VALUE pat2; + VALUE pred; + VALUE prev_elt; + VALUE prev_elts; + VALUE yielder; +}; + +static VALUE +slicebetween_ii(RB_BLOCK_CALL_FUNC_ARGLIST(i, _memo)) +{ +#define UPDATE_MEMO ((memo = MEMO_FOR(struct slicebetween_arg, _memo)), 1) + struct slicebetween_arg *memo; + int split_p; + UPDATE_MEMO; + + ENUM_WANT_SVALUE(); + + if (NIL_P(memo->prev_elts)) { + /* The first element */ + memo->prev_elt = i; + memo->prev_elts = rb_ary_new3(1, i); + } + else { + split_p = 1; + if (!NIL_P(memo->pat1)) { + split_p = RTEST(rb_funcall(memo->pat1, id_eqq, 1, memo->prev_elt)); + UPDATE_MEMO; + } + if (split_p && !NIL_P(memo->pat2)) { + split_p = RTEST(rb_funcall(memo->pat2, id_eqq, 1, i)); + UPDATE_MEMO; + } + if (split_p && !NIL_P(memo->pred)) { + split_p = RTEST(rb_funcall(memo->pred, id_call, 2, memo->prev_elt, i)); + UPDATE_MEMO; + } + + if (split_p) { + rb_funcall(memo->yielder, id_lshift, 1, memo->prev_elts); + UPDATE_MEMO; + memo->prev_elts = rb_ary_new3(1, i); + } + else { + rb_ary_push(memo->prev_elts, i); + } + + memo->prev_elt = i; + } + + return Qnil; +#undef UPDATE_MEMO +} + +static VALUE +slicebetween_i(RB_BLOCK_CALL_FUNC_ARGLIST(yielder, enumerator)) +{ + VALUE enumerable; + VALUE arg; + struct slicebetween_arg *memo = NEW_MEMO_FOR(struct slicebetween_arg, arg); + + enumerable = rb_ivar_get(enumerator, rb_intern("slicebetween_enum")); + memo->pat1 = rb_ivar_get(enumerator, rb_intern("slicebetween_pat1")); + memo->pat2 = rb_ivar_get(enumerator, rb_intern("slicebetween_pat2")); + memo->pred = rb_attr_get(enumerator, rb_intern("slicebetween_pred")); + memo->prev_elt = Qnil; + memo->prev_elts = Qnil; + memo->yielder = yielder; + + rb_block_call(enumerable, id_each, 0, 0, slicebetween_ii, arg); + memo = MEMO_FOR(struct slicebetween_arg, arg); + if (!NIL_P(memo->prev_elts)) + rb_funcall(memo->yielder, id_lshift, 1, memo->prev_elts); + return Qnil; +} + +/* + * call-seq: + * enum.slice_between(pattern_before, pattern_after=nil) -> an_enumerator + * enum.slice_between {|elt_before, elt_after| bool } -> an_enumerator + * + * Creates an enumerator for each chunked elements. + * The beginnings of chunks are defined by _pattern_before_ and _pattern_after_, or the block. + * + * One of _pattern_before_ and _pattern_after_ should be non-nil. + * ArgumentError is raised otherwise. + * + * This method split each chunk using adjacent elements, _elt_before_ and _elt_after_, + * in the receiver enumerator. + * If the patterns are given, + * this method split chunks between _elt_before_ and _elt_after_ where + * _pattern_before_ === _elt_before_ and + * _pattern_after_ === _elt_after_. + * (Prguments not given are ignored to this test. + * If the block is given. + * this method split chunks between _elt_before_ and _elt_after_ where + * the block returns true. + * + * For each split opportunity, _pattern_before_ test follows _pattern_after_ test. + * If _pattern_before_ is failed, _pattern_after_ is not tested. + * + * _pattern_before_ is not tested for the last element. + * _pattern_after_ is not tested for the first element. + * The block is called the length of the receiver enumerator minus one. + * + * The result enumerator yields the chunked elements as an array. + * So +each+ method can be called as follows: + * + * enum.slice_between(pattern_before, pattern_after).each { |ary| ... } + * enum.slice_between { |elt_before, elt_after| bool }.each { |ary| ... } + * + * Other methods of the Enumerator class and Enumerable module, + * such as map, etc., are also usable. + * + * For example, one-by-one increasing subsequence can be chunked as follows: + * + * a = [1,2,4,9,10,11,12,15,16,19,20,21] + * b = a.slice_between {|i, j| i+1 != j } + * p b.to_a #=> [[1, 2], [4], [9, 10, 11, 12], [15, 16], [19, 20, 21]] + * c = b.map {|a| a.length < 3 ? a : "#{a.first}-#{a.last}" } + * p c #=> [[1, 2], [4], "9-12", [15, 16], "19-21"] + * d = c.join(",") + * p d #=> "1,2,4,9-12,15,16,19-21" + * + * Increasing subsequence can be chunked as follows: + * + * a = [0, 9, 2, 2, 3, 2, 7, 5, 9, 5] + * p a.slice_between {|i, j| i > j }.to_a + * #=> [[0, 9], [2, 2, 3], [2, 7], [5, 9], [5]] + * + * Adjacent evens and odds can be chunked as follows: + * (Enumerable#chunk is another way to do it.) + * + * a = [7, 5, 9, 2, 0, 7, 9, 4, 2, 0] + * p a.slice_between {|i, j| i.even? != j.even? }.to_a + * #=> [[7, 5, 9], [2, 0], [7, 9], [4, 2, 0]] + * + * Mbox contains series of mails which start with Unix From line and end + * with an empty line. + * So each mail can be extracted by slice after an empty line before Unix From line. + * + * # split mails in mbox (slice before Unix From line after an empty line) + * open("mbox") { |f| + * f.slice_between("\n", /\AFrom /).each { |mail| + * mail.pop if mail.last == "\n" + * pp mail + * } + * } + * + */ +static VALUE +enum_slice_between(int argc, VALUE *argv, VALUE enumerable) +{ + VALUE enumerator; + VALUE pat1, pat2, pred = Qnil; + + rb_scan_args(argc, argv, "02", &pat1, &pat2); + if (rb_block_given_p()) { + pred = rb_block_proc(); + } + + if (NIL_P(pat1) && NIL_P(pat2) && NIL_P(pred)) { + rb_raise(rb_eArgError, "no pattan/block given"); + } + + if ((!NIL_P(pat1) || !NIL_P(pat2)) && !NIL_P(pred)) { + rb_raise(rb_eArgError, "both pattan and block are given"); + } + + enumerator = rb_obj_alloc(rb_cEnumerator); + rb_ivar_set(enumerator, rb_intern("slicebetween_pat1"), pat1); + rb_ivar_set(enumerator, rb_intern("slicebetween_pat2"), pat2); + rb_ivar_set(enumerator, rb_intern("slicebetween_pred"), pred); + rb_ivar_set(enumerator, rb_intern("slicebetween_enum"), enumerable); + + rb_block_call(enumerator, idInitialize, 0, 0, slicebetween_i, enumerator); + return enumerator; +} + /* * The Enumerable mixin provides collection classes with * several traversal and searching methods, and with the ability to @@ -3151,6 +3332,7 @@ Init_Enumerable(void) rb_define_method(rb_mEnumerable, "cycle", enum_cycle, -1); rb_define_method(rb_mEnumerable, "chunk", enum_chunk, -1); rb_define_method(rb_mEnumerable, "slice_before", enum_slice_before, -1); + rb_define_method(rb_mEnumerable, "slice_between", enum_slice_between, -1); id_next = rb_intern("next"); id_call = rb_intern("call"); Index: enumerator.c =================================================================== --- enumerator.c (revision 45900) +++ enumerator.c (working copy) @@ -2036,6 +2036,7 @@ InitVM_Enumerator(void) rb_define_method(rb_cLazy, "lazy", lazy_lazy, 0); rb_define_method(rb_cLazy, "chunk", lazy_super, -1); rb_define_method(rb_cLazy, "slice_before", lazy_super, -1); + rb_define_method(rb_cLazy, "slice_between", lazy_super, -1); rb_define_alias(rb_cLazy, "force", "to_a"); Index: test/ruby/test_enum.rb =================================================================== --- test/ruby/test_enum.rb (revision 45900) +++ test/ruby/test_enum.rb (working copy) @@ -531,6 +531,72 @@ class TestEnumerable < Test::Unit::TestC assert_not_warn{ss.slice_before(/\A...\z/).to_a} end + def test_slice_between0 + assert_raise(ArgumentError) { [].slice_between } + end + + def test_slice_between1 + e = [].slice_between {|a, b| flunk "should not be called" } + assert_equal([], e.to_a) + + e = [1,2].slice_between(1) + assert_equal([[1], [2]], e.to_a) + + e = [1,2].slice_between(3) + assert_equal([[1, 2]], e.to_a) + + e = [1,2].slice_between(nil, 2) + assert_equal([[1], [2]], e.to_a) + + e = [1,2].slice_between(nil, 3) + assert_equal([[1, 2]], e.to_a) + + e = [1,2].slice_between {|a,b| true } + assert_equal([[1], [2]], e.to_a) + + e = [1,2].slice_between {|a,b| false } + assert_equal([[1, 2]], e.to_a) + end + + def test_slice_between2 + e = [1,2].slice_between(1, 2) + assert_equal([[1], [2]], e.to_a) + + e = [1,2].slice_between(3, 2) + assert_equal([[1, 2]], e.to_a) + + e = [1,2].slice_between(1, 3) + assert_equal([[1, 2]], e.to_a) + end + + def test_slice_between_both_pattern_and_block + assert_raise(ArgumentError) { [].slice_between(1, 2) {|a, b| true } } + assert_raise(ArgumentError) { [].slice_between(1) {|a, b| true } } + assert_raise(ArgumentError) { [].slice_between(nil, 2) {|a, b| true } } + end + + def test_slice_between_contiguously_increasing_integers + e = [1,4,9,10,11,12,15,16,19,20,21].slice_between {|i, j| i+1 != j } + assert_equal([[1], [4], [9,10,11,12], [15,16], [19,20,21]], e.to_a) + end + + def test_slice_between_mails + mail1 = ["From foo\n", + "\n", + "Body start\n", + "From x to b, baz\n", + "Body end\n", + "\n"] + mail2 = ["From foo\n", + "\n", + "Body start\n", + "Another text\n", + "Body end\n", + "\n"] + e = (mail1 + mail2).slice_between("\n", /\AFrom /) + assert_equal([mail1, mail2], e.to_a) + end + def test_detect @obj = ('a'..'z') assert_equal('c', @obj.detect {|x| x == 'c' }) Index: test/ruby/test_lazy_enumerator.rb =================================================================== --- test/ruby/test_lazy_enumerator.rb (revision 45900) +++ test/ruby/test_lazy_enumerator.rb (working copy) @@ -470,6 +470,7 @@ EOS bug7507 = '[ruby-core:51510]' { slice_before: //, + slice_between: //, with_index: nil, cycle: nil, each_with_object: 42,