diff --git a/configure.in b/configure.in index 9a679de..4bba5eb 100644 --- a/configure.in +++ b/configure.in @@ -1426,7 +1426,7 @@ AC_CHECK_FUNCS(fmod killpg wait4 waitpid fork spawnv syscall __syscall chroot ge setuid setgid daemon select_large_fdset setenv unsetenv\ mktime timegm gmtime_r clock_gettime gettimeofday poll ppoll\ pread sendfile shutdown sigaltstack dl_iterate_phdr\ - dup3 pipe2 posix_memalign memalign) + dup3 pipe2 posix_memalign memalign memmem) AC_CACHE_CHECK(for unsetenv returns a value, rb_cv_unsetenv_return_value, [AC_TRY_COMPILE([ @@ -2726,6 +2726,39 @@ if test "${universal_binary-no}" = yes ; then AC_MSG_ERROR([failed]) ])]) fi +AC_CACHE_CHECK(for broken memmem, rb_cv_broken_memmem, + [AC_TRY_RUN([ +#include + +int +main() +{ + char *str = "hogefugafoobar"; + char *rs = "foo"; + char *empty = ""; + char *p; + + p = memmem(str, strlen(str), rs, strlen(rs)); + if (p == str+8) { + p = memmem(str, strlen(str), empty, strlen(empty)); + if (p == str) + return 0; + else + return 1; + } + else { + return 1; + } +} + ], + rb_cv_broken_memmem=no, + rb_cv_broken_memmem=yes, + rb_cv_broken_memmem=yes) +]) + +if test "$rb_cv_broken_memmem" = yes; then + AC_DEFINE(BROKEN_MEMMEM, 1) +fi CPPFLAGS="$CPPFLAGS "'$(DEFS)' test -z "$CPPFLAGS" || CPPFLAGS="$CPPFLAGS "; CPPFLAGS="$CPPFLAGS"'${cppflags}' diff --git a/string.c b/string.c index d06a996..9c9cebd 100644 --- a/string.c +++ b/string.c @@ -5967,6 +5967,17 @@ rb_str_split(VALUE str, const char *sep0) return rb_str_split_m(1, &sep, str); } +static VALUE rb_str_valid_encoding_p(VALUE str); + +static const char *line_yield(VALUE str, const char *substr, long sublen){ + VALUE line = rb_str_new5(str, substr, sublen); + + OBJ_INFECT(line, str); + rb_enc_cr_str_copy_for_substr(line, str); + rb_yield(line); + + return substr + sublen; +} /* * call-seq: @@ -6046,10 +6057,7 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str) continue; } p = p0 + rb_enc_mbclen(p0, pend, enc); - line = rb_str_new5(str, s, p - s); - OBJ_INFECT(line, str); - rb_enc_cr_str_copy_for_substr(line, str); - rb_yield(line); + line_yield(str, s, p - s); str_mod_check(str, ptr, len); s = p; } @@ -6058,12 +6066,50 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str) enc = rb_enc_check(str, rs); rslen = RSTRING_LEN(rs); - if (rslen == 0) { + + if (rslen == 0) newline = '\n'; + +#if defined(HAVE_MEMMEM) && !defined(BROKEN_MEMMEM) + if (rb_str_valid_encoding_p(str) && rb_str_valid_encoding_p(rs)) { + int rspara = 0; + long sublen, rest = len; + const char *e, *pp, *rsptr, *ss = s; + + if (rslen == 0) { + rspara = 1; + rs = rb_usascii_str_new("\n\n", 2); + if (!rb_enc_asciicompat(enc)) + rs = rb_str_encode(rs, rb_enc_from_encoding(enc), 0, Qnil); + rslen = RSTRING_LEN(rs); + } + + rsptr = RSTRING_PTR(rs); + + while (e = memmem(ss, rest, rsptr, rslen)) { + if (rspara) { + p = e; + rb_enc_codepoint_len(p, pend, &n, enc); + while (p < pend && rb_enc_codepoint(p, pend, enc) == newline) + p += n; + sublen = p-ss; + } + else { + sublen = e-ss+rslen; + } + pp = rb_enc_left_char_head(ss, e, ss+rest, enc); + if (pp == e) { + s = line_yield(str, s, sublen); + str_mod_check(str, ptr, len); + } + ss += sublen; + rest -= sublen; + } + goto finish; } - else { +#endif + if (rslen != 0) newline = rb_enc_codepoint(RSTRING_PTR(rs), RSTRING_END(rs), enc); - } while (p < pend) { unsigned int c = rb_enc_codepoint_len(p, pend, &n, enc); @@ -6082,12 +6128,8 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str) if (c == newline && (rslen <= 1 || (pend - p >= rslen && memcmp(RSTRING_PTR(rs), p, rslen) == 0))) { - line = rb_str_new5(str, s, p - s + (rslen ? rslen : n)); - OBJ_INFECT(line, str); - rb_enc_cr_str_copy_for_substr(line, str); - rb_yield(line); + s = line_yield(str, s, p - s + (rslen ? rslen : n)); str_mod_check(str, ptr, len); - s = p + (rslen ? rslen : n); } p += n; }