Project

General

Profile

Feature #7368 » patch2.diff

Glass_saga (Masaki Matsushita), 11/20/2012 10:01 PM

View differences:

string.c
return rb_str_split_m(1, &sep, str);
}
static VALUE rb_str_valid_encoding_p(VALUE str);
static void
line_yield(VALUE str, const char *sub, const char *subend)
{
long len = RSTRING_LEN(str);
const char *ptr = RSTRING_PTR(str);
VALUE line = rb_str_new5(str, sub, subend - sub);
OBJ_INFECT(line, str);
rb_enc_cr_str_copy_for_substr(line, str);
rb_yield(line);
str_mod_check(str, ptr, len);
return;
}
static void
str_each_line_valid(VALUE str, VALUE rs, unsigned int newline, rb_encoding *enc)
{
int n, rspara = 0;
long pos, rslen;
const char *pend, *sub, *subend, *search_start, *hit=NULL;
const char *adjusted, *rsptr;
search_start = sub = subend = RSTRING_PTR(str);
pend = RSTRING_END(str);
rslen = RSTRING_LEN(rs);
if (rslen == 0) {
rspara = 1;
rs = rb_usascii_str_new("\n\n", 2);
if (!rb_enc_asciicompat(enc))
rs = rb_str_encode(rs, rb_enc_from_encoding(enc), 0, Qnil);
rslen = RSTRING_LEN(rs);
}
rsptr = RSTRING_PTR(rs);
while (search_start < pend) {
pos = rb_memsearch(rsptr, rslen, search_start, pend - search_start, enc);
if (pos < 0) break;
hit = search_start + pos;
adjusted = rb_enc_right_char_head(sub, hit, pend, enc);
if (hit == adjusted) {
subend = hit + rslen;
if (rspara) {
rb_enc_codepoint_len(subend, pend, &n, enc);
while (subend < pend && rb_enc_codepoint(subend, pend, enc) == newline)
subend += n;
}
line_yield(str, sub, subend);
search_start = sub = subend;
}
else {
search_start = adjusted;
}
}
if (subend < pend) line_yield(str, subend, pend);
return;
}
static void
str_each_line_invalid(VALUE str, VALUE rs, unsigned int newline, rb_encoding *enc)
{
int n;
long rslen;
const char *sub, *subend, *pend, *rsptr;
sub = subend = RSTRING_PTR(str);
pend = RSTRING_END(str);
rsptr = RSTRING_PTR(rs);
rslen = RSTRING_LEN(rs);
while (sub < pend) {
unsigned int c = rb_enc_codepoint_len(sub, pend, &n, enc);
again:
if (rslen == 0 && c == newline) {
subend += n;
if (subend < pend && (c = rb_enc_codepoint_len(subend, pend, &n, enc)) != newline) {
goto again;
}
while (subend < pend && rb_enc_codepoint(subend, pend, enc) == newline) {
subend += n;
}
subend -= n;
}
if (c == newline &&
(rslen <= 1 ||
(pend - subend >= rslen && memcmp(rsptr, subend, rslen) == 0))) {
subend += rslen ? rslen : n;
line_yield(str, sub, subend);
sub = subend;
}
subend += n;
}
if (subend < pend) line_yield(str, subend, pend);
return;
}
/*
* call-seq:
......
rb_encoding *enc;
VALUE rs;
unsigned int newline;
const char *p, *pend, *s, *ptr;
long len, rslen;
VALUE line;
int n;
VALUE orig = str;
if (argc == 0) {
if (argc == 0)
rs = rb_rs;
}
else {
else
rb_scan_args(argc, argv, "01", &rs);
}
RETURN_ENUMERATOR(str, argc, argv);
if (NIL_P(rs)) {
rb_yield(str);
return orig;
}
str = rb_str_new4(str);
ptr = p = s = RSTRING_PTR(str);
pend = p + RSTRING_LEN(str);
len = RSTRING_LEN(str);
StringValue(rs);
if (rs == rb_default_rs) {
enc = rb_enc_get(str);
while (p < pend) {
char *p0;
p = memchr(p, '\n', pend - p);
if (!p) break;
p0 = rb_enc_left_char_head(s, p, pend, enc);
if (!rb_enc_is_newline(p0, pend, enc)) {
p++;
continue;
}
p = p0 + rb_enc_mbclen(p0, pend, enc);
line = rb_str_new5(str, s, p - s);
OBJ_INFECT(line, str);
rb_enc_cr_str_copy_for_substr(line, str);
rb_yield(line);
str_mod_check(str, ptr, len);
s = p;
}
goto finish;
}
str = rb_str_new4(str);
enc = rb_enc_check(str, rs);
rslen = RSTRING_LEN(rs);
if (rslen == 0) {
newline = '\n';
if (rs == rb_rs) {
enc = rb_enc_get(str);
rs = rb_str_encode(rb_rs, rb_enc_from_encoding(enc), 0, Qnil);
}
else {
newline = rb_enc_codepoint(RSTRING_PTR(rs), RSTRING_END(rs), enc);
enc = rb_enc_check(str, rs);
}
while (p < pend) {
unsigned int c = rb_enc_codepoint_len(p, pend, &n, enc);
again:
if (rslen == 0 && c == newline) {
p += n;
if (p < pend && (c = rb_enc_codepoint_len(p, pend, &n, enc)) != newline) {
goto again;
}
while (p < pend && rb_enc_codepoint(p, pend, enc) == newline) {
p += n;
}
p -= n;
}
if (c == newline &&
(rslen <= 1 ||
(pend - p >= rslen && memcmp(RSTRING_PTR(rs), p, rslen) == 0))) {
line = rb_str_new5(str, s, p - s + (rslen ? rslen : n));
OBJ_INFECT(line, str);
rb_enc_cr_str_copy_for_substr(line, str);
rb_yield(line);
str_mod_check(str, ptr, len);
s = p + (rslen ? rslen : n);
}
p += n;
}
if (RSTRING_LEN(rs) == 0)
newline = '\n';
else
newline = rb_enc_codepoint(RSTRING_PTR(rs), RSTRING_END(rs), enc);
finish:
if (s != pend) {
line = rb_str_new5(str, s, pend - s);
OBJ_INFECT(line, str);
rb_enc_cr_str_copy_for_substr(line, str);
rb_yield(line);
RB_GC_GUARD(str);
}
if (rb_str_valid_encoding_p(str) && rb_str_valid_encoding_p(rs))
str_each_line_valid(str, rs, newline, enc);
else
str_each_line_invalid(str, rs, newline, enc);
return orig;
}
(2-2/3)