diff --git a/configure.in b/configure.in index 9a679de..84dc37d 100644 --- a/configure.in +++ b/configure.in @@ -1426,7 +1426,7 @@ AC_CHECK_FUNCS(fmod killpg wait4 waitpid fork spawnv syscall __syscall chroot ge setuid setgid daemon select_large_fdset setenv unsetenv\ mktime timegm gmtime_r clock_gettime gettimeofday poll ppoll\ pread sendfile shutdown sigaltstack dl_iterate_phdr\ - dup3 pipe2 posix_memalign memalign) + dup3 pipe2 posix_memalign memalign memmem) AC_CACHE_CHECK(for unsetenv returns a value, rb_cv_unsetenv_return_value, [AC_TRY_COMPILE([ @@ -2726,6 +2726,32 @@ if test "${universal_binary-no}" = yes ; then AC_MSG_ERROR([failed]) ])]) fi +AC_CACHE_CHECK(for broken memmem, rb_cv_broken_memmem, + [AC_TRY_RUN([ +#include + +int +main() +{ + char *str = "hogefugafoobar"; + char *rs = "foo"; + char *p; + + p = memmem(str, strlen(str), rs, strlen(rs)); + if (p == str+8) + return 0; + else + return 1; +} + ], + rb_cv_broken_memmem=no, + rb_cv_broken_memmem=yes, + rb_cv_broken_memmem=yes) +]) + +if test "$rb_cv_broken_memmem" = yes; then + AC_DEFINE(BROKEN_MEMMEM, 1) +fi CPPFLAGS="$CPPFLAGS "'$(DEFS)' test -z "$CPPFLAGS" || CPPFLAGS="$CPPFLAGS "; CPPFLAGS="$CPPFLAGS"'${cppflags}' diff --git a/string.c b/string.c index 6355070..41b5b97 100644 --- a/string.c +++ b/string.c @@ -5979,6 +5979,7 @@ rb_str_split(VALUE str, const char *sep0) return rb_str_split_m(1, &sep, str); } +static VALUE rb_str_valid_encoding_p(VALUE str); /* * call-seq: @@ -6070,6 +6071,45 @@ rb_str_each_line(int argc, VALUE *argv, VALUE str) enc = rb_enc_check(str, rs); rslen = RSTRING_LEN(rs); +#if defined(HAVE_MEMMEM) && !defined(BROKEN_MEMMEM) + if (rb_str_valid_encoding_p(str) && rb_str_valid_encoding_p(rs)) { + int rspara = 0; + long sublen, rest = len; + const char *e, *rsptr; + + if (rslen == 0) { + rspara = 1; + newline = '\n'; + rs = rb_usascii_str_new("\n\n", 2); + if (!rb_enc_asciicompat(enc)) + rs = rb_str_encode(rs, rb_enc_from_encoding(enc), 0, Qnil); + rslen = RSTRING_LEN(rs); + } + + rsptr = RSTRING_PTR(rs); + + while (e = memmem(s, rest, rsptr, rslen)) { + if (rspara) { + p = e; + rb_enc_codepoint_len(p, pend, &n, enc); + while (p < pend && rb_enc_codepoint(p, pend, enc) == newline) + p += n; + sublen = p-s; + } + else { + sublen = e-s+rslen; + } + line = rb_str_new5(str, s, sublen); + OBJ_INFECT(line, str); + rb_enc_cr_str_copy_for_substr(line, str); + rb_yield(line); + str_mod_check(str, ptr, len); + s += sublen; + rest -= sublen; + } + goto finish; + } +#endif if (rslen == 0) { newline = '\n'; }