Project

General

Profile

Bug #8698 » patch.diff

Glass_saga (Masaki Matsushita), 07/28/2013 05:14 PM

View differences:

string.c
rb_str_enumerate_lines(int argc, VALUE *argv, VALUE str, int wantarray)
{
rb_encoding *enc;
VALUE rs;
unsigned int newline;
const char *p, *pend, *s, *ptr;
long len, rslen;
VALUE line;
int n;
VALUE orig = str;
VALUE line, rs, orig = str;
const char *ptr, *pend, *subptr, *subend, *rsptr, *hit, *adjusted;
long pos, len, rslen;
int paragraph_mode = 0;
VALUE UNINITIALIZED_VAR(ary);
if (argc == 0) {
if (argc == 0)
rs = rb_rs;
}
else {
else
rb_scan_args(argc, argv, "01", &rs);
}
if (rb_block_given_p()) {
if (wantarray) {
......
return orig;
}
}
str = rb_str_new4(str);
ptr = p = s = RSTRING_PTR(str);
pend = p + RSTRING_LEN(str);
ptr = subptr = RSTRING_PTR(str);
pend = RSTRING_END(str);
len = RSTRING_LEN(str);
StringValue(rs);
if (rs == rb_default_rs) {
enc = rb_enc_get(str);
while (p < pend) {
char *p0;
rslen = RSTRING_LEN(rs);
p = memchr(p, '\n', pend - p);
if (!p) break;
p0 = rb_enc_left_char_head(s, p, pend, enc);
if (!rb_enc_is_newline(p0, pend, enc)) {
p++;
continue;
}
p = p0 + rb_enc_mbclen(p0, pend, enc);
line = rb_str_subseq(str, s - ptr, p - s);
if (wantarray)
rb_ary_push(ary, line);
else
rb_yield(line);
str_mod_check(str, ptr, len);
s = p;
}
goto finish;
}
if (rs == rb_default_rs)
enc = rb_enc_get(str);
else
enc = rb_enc_check(str, rs);
enc = rb_enc_check(str, rs);
rslen = RSTRING_LEN(rs);
if (rslen == 0) {
newline = '\n';
rsptr = "\n\n";
rslen = 2;
paragraph_mode = 1;
}
else {
newline = rb_enc_codepoint(RSTRING_PTR(rs), RSTRING_END(rs), enc);
rsptr = RSTRING_PTR(rs);
}
while (p < pend) {
unsigned int c = rb_enc_codepoint_len(p, pend, &n, enc);
if ((rs == rb_default_rs || paragraph_mode) && !rb_enc_asciicompat(enc)) {
rs = rb_str_new(rsptr, rslen);
rs = rb_str_encode(rs, rb_enc_from_encoding(enc), 0, Qnil);
rsptr = RSTRING_PTR(rs);
rslen = RSTRING_LEN(rs);
}
again:
if (rslen == 0 && c == newline) {
p += n;
if (p < pend && (c = rb_enc_codepoint_len(p, pend, &n, enc)) != newline) {
goto again;
}
while (p < pend && rb_enc_codepoint(p, pend, enc) == newline) {
p += n;
while (subptr < pend) {
pos = rb_memsearch(rsptr, rslen, subptr, pend - subptr, enc);
if (pos < 0) break;
hit = subptr + pos;
adjusted = rb_enc_right_char_head(subptr, hit, pend, enc);
if (hit != adjusted) {
subptr = adjusted;
continue;
}
subend = hit + rslen;
if (paragraph_mode) {
while (subend < pend && rb_enc_is_newline(subend, pend, enc)) {
subend += rb_enc_mbclen(subend, pend, enc);
}
p -= n;
}
if (c == newline &&
(rslen <= 1 ||
(pend - p >= rslen && memcmp(RSTRING_PTR(rs), p, rslen) == 0))) {
const char *pp = p + (rslen ? rslen : n);
line = rb_str_subseq(str, s - ptr, pp - s);
if (wantarray)
rb_ary_push(ary, line);
else
rb_yield(line);
line = rb_str_subseq(str, subptr - ptr, subend - subptr);
if (wantarray) {
rb_ary_push(ary, line);
}
else {
rb_yield(line);
str_mod_check(str, ptr, len);
s = pp;
}
p += n;
subptr = subend;
}
finish:
if (s != pend) {
line = rb_str_subseq(str, s - ptr, pend - s);
if (subptr != pend) {
line = rb_str_subseq(str, subptr - ptr, pend - subptr);
if (wantarray)
rb_ary_push(ary, line);
else
test/ruby/test_m17n_comb.rb
def test_str_each_line
combination(STRINGS, STRINGS) {|s1, s2|
if !s1.valid_encoding? || !s2.valid_encoding?
assert_raise(ArgumentError, Encoding::CompatibilityError) { s1.each_line(s2) {} }
next
end
if !s1.ascii_only? && !s2.ascii_only? && s1.encoding != s2.encoding
assert_raise(Encoding::CompatibilityError) { s1.each_line(s2) {} }
next
end
lines = []
enccall(s1, :each_line, s2) {|line|
assert(line.valid_encoding?)
assert_equal(s1.encoding, line.encoding)
lines << line
}
    (1-1/1)