Feature #7368 » patch2.diff
| string.c | ||
|---|---|---|
|     return rb_str_split_m(1, &sep, str); | ||
| } | ||
| static VALUE rb_str_valid_encoding_p(VALUE str); | ||
| static void | ||
| line_yield(VALUE str, const char *sub, const char *subend) | ||
| { | ||
|     long len = RSTRING_LEN(str); | ||
|     const char *ptr = RSTRING_PTR(str); | ||
|     VALUE line = rb_str_new5(str, sub, subend - sub); | ||
|     OBJ_INFECT(line, str); | ||
|     rb_enc_cr_str_copy_for_substr(line, str); | ||
|     rb_yield(line); | ||
|     str_mod_check(str, ptr, len); | ||
|     return; | ||
| } | ||
| static void | ||
| str_each_line_valid(VALUE str, VALUE rs, unsigned int newline, rb_encoding *enc) | ||
| { | ||
|     int n, rspara = 0; | ||
|     long pos, rslen; | ||
|     const char *pend, *sub, *subend, *search_start, *hit=NULL; | ||
|     const char *adjusted, *rsptr; | ||
|     search_start = sub = subend = RSTRING_PTR(str); | ||
|     pend = RSTRING_END(str); | ||
|     rslen = RSTRING_LEN(rs); | ||
|     if (rslen == 0) { | ||
| 	rspara = 1; | ||
| 	rs = rb_usascii_str_new("\n\n", 2); | ||
| 	if (!rb_enc_asciicompat(enc)) | ||
| 	    rs = rb_str_encode(rs, rb_enc_from_encoding(enc), 0, Qnil); | ||
| 	rslen = RSTRING_LEN(rs); | ||
|     } | ||
|     rsptr = RSTRING_PTR(rs); | ||
|     while (search_start < pend) { | ||
| 	pos = rb_memsearch(rsptr, rslen, search_start, pend - search_start, enc); | ||
| 	if (pos < 0) break; | ||
| 	hit = search_start + pos; | ||
| 	adjusted = rb_enc_right_char_head(sub, hit, pend, enc); | ||
| 	if (hit == adjusted) { | ||
| 	    subend = hit + rslen; | ||
| 	    if (rspara) { | ||
| 		rb_enc_codepoint_len(subend, pend, &n, enc); | ||
| 		while (subend < pend && rb_enc_codepoint(subend, pend, enc) == newline) | ||
| 		    subend += n; | ||
| 	    } | ||
| 	    line_yield(str, sub, subend); | ||
| 	    search_start = sub = subend; | ||
| 	} | ||
| 	else { | ||
| 	    search_start = adjusted; | ||
| 	} | ||
|     } | ||
|     if (subend < pend) line_yield(str, subend, pend); | ||
|     return; | ||
| } | ||
| static void | ||
| str_each_line_invalid(VALUE str, VALUE rs, unsigned int newline, rb_encoding *enc) | ||
| { | ||
|     int n; | ||
|     long rslen; | ||
|     const char *sub, *subend, *pend, *rsptr; | ||
|     sub = subend = RSTRING_PTR(str); | ||
|     pend = RSTRING_END(str); | ||
|     rsptr = RSTRING_PTR(rs); | ||
|     rslen = RSTRING_LEN(rs); | ||
|     while (sub < pend) { | ||
| 	unsigned int c = rb_enc_codepoint_len(sub, pend, &n, enc); | ||
|       again: | ||
| 	if (rslen == 0 && c == newline) { | ||
| 	    subend += n; | ||
| 	    if (subend < pend && (c = rb_enc_codepoint_len(subend, pend, &n, enc)) != newline) { | ||
| 		goto again; | ||
| 	    } | ||
| 	    while (subend < pend && rb_enc_codepoint(subend, pend, enc) == newline) { | ||
| 		subend += n; | ||
| 	    } | ||
| 	    subend -= n; | ||
| 	} | ||
| 	if (c == newline && | ||
| 	    (rslen <= 1 || | ||
| 	    (pend - subend >= rslen && memcmp(rsptr, subend, rslen) == 0))) { | ||
| 	    subend += rslen ? rslen : n; | ||
| 	    line_yield(str, sub, subend); | ||
| 	    sub = subend; | ||
| 	} | ||
| 	subend += n; | ||
|     } | ||
|     if (subend < pend) line_yield(str, subend, pend); | ||
|     return; | ||
| } | ||
| /* | ||
|  *  call-seq: | ||
| ... | ... | |
|     rb_encoding *enc; | ||
|     VALUE rs; | ||
|     unsigned int newline; | ||
|     const char *p, *pend, *s, *ptr; | ||
|     long len, rslen; | ||
|     VALUE line; | ||
|     int n; | ||
|     VALUE orig = str; | ||
|     if (argc == 0) { | ||
|     if (argc == 0) | ||
| 	rs = rb_rs; | ||
|     } | ||
|     else { | ||
|     else | ||
| 	rb_scan_args(argc, argv, "01", &rs); | ||
|     } | ||
|     RETURN_ENUMERATOR(str, argc, argv); | ||
|     if (NIL_P(rs)) { | ||
| 	rb_yield(str); | ||
| 	return orig; | ||
|     } | ||
|     str = rb_str_new4(str); | ||
|     ptr = p = s = RSTRING_PTR(str); | ||
|     pend = p + RSTRING_LEN(str); | ||
|     len = RSTRING_LEN(str); | ||
|     StringValue(rs); | ||
|     if (rs == rb_default_rs) { | ||
| 	enc = rb_enc_get(str); | ||
| 	while (p < pend) { | ||
| 	    char *p0; | ||
| 	    p = memchr(p, '\n', pend - p); | ||
| 	    if (!p) break; | ||
| 	    p0 = rb_enc_left_char_head(s, p, pend, enc); | ||
| 	    if (!rb_enc_is_newline(p0, pend, enc)) { | ||
| 		p++; | ||
| 		continue; | ||
| 	    } | ||
| 	    p = p0 + rb_enc_mbclen(p0, pend, enc); | ||
| 	    line = rb_str_new5(str, s, p - s); | ||
| 	    OBJ_INFECT(line, str); | ||
| 	    rb_enc_cr_str_copy_for_substr(line, str); | ||
| 	    rb_yield(line); | ||
| 	    str_mod_check(str, ptr, len); | ||
| 	    s = p; | ||
| 	} | ||
| 	goto finish; | ||
|     } | ||
|     str = rb_str_new4(str); | ||
|     enc = rb_enc_check(str, rs); | ||
|     rslen = RSTRING_LEN(rs); | ||
|     if (rslen == 0) { | ||
| 	newline = '\n'; | ||
|     if (rs == rb_rs) { | ||
| 	enc = rb_enc_get(str); | ||
| 	rs = rb_str_encode(rb_rs, rb_enc_from_encoding(enc), 0, Qnil); | ||
|     } | ||
|     else { | ||
| 	newline = rb_enc_codepoint(RSTRING_PTR(rs), RSTRING_END(rs), enc); | ||
| 	enc = rb_enc_check(str, rs); | ||
|     } | ||
|     while (p < pend) { | ||
| 	unsigned int c = rb_enc_codepoint_len(p, pend, &n, enc); | ||
|       again: | ||
| 	if (rslen == 0 && c == newline) { | ||
| 	    p += n; | ||
| 	    if (p < pend && (c = rb_enc_codepoint_len(p, pend, &n, enc)) != newline) { | ||
| 		goto again; | ||
| 	    } | ||
| 	    while (p < pend && rb_enc_codepoint(p, pend, enc) == newline) { | ||
| 		p += n; | ||
| 	    } | ||
| 	    p -= n; | ||
| 	} | ||
| 	if (c == newline && | ||
| 	    (rslen <= 1 || | ||
| 	     (pend - p >= rslen && memcmp(RSTRING_PTR(rs), p, rslen) == 0))) { | ||
| 	    line = rb_str_new5(str, s, p - s + (rslen ? rslen : n)); | ||
| 	    OBJ_INFECT(line, str); | ||
| 	    rb_enc_cr_str_copy_for_substr(line, str); | ||
| 	    rb_yield(line); | ||
| 	    str_mod_check(str, ptr, len); | ||
| 	    s = p + (rslen ? rslen : n); | ||
| 	} | ||
| 	p += n; | ||
|     } | ||
|     if (RSTRING_LEN(rs) == 0) | ||
| 	newline = '\n'; | ||
|     else | ||
| 	newline = rb_enc_codepoint(RSTRING_PTR(rs), RSTRING_END(rs), enc); | ||
|   finish: | ||
|     if (s != pend) { | ||
| 	line = rb_str_new5(str, s, pend - s); | ||
| 	OBJ_INFECT(line, str); | ||
| 	rb_enc_cr_str_copy_for_substr(line, str); | ||
| 	rb_yield(line); | ||
| 	RB_GC_GUARD(str); | ||
|     } | ||
|     if (rb_str_valid_encoding_p(str) && rb_str_valid_encoding_p(rs)) | ||
| 	str_each_line_valid(str, rs, newline, enc); | ||
|     else | ||
| 	str_each_line_invalid(str, rs, newline, enc); | ||
|     return orig; | ||
| } | ||