diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index 123f76a..688b4cf 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -316,7 +316,7 @@ void rb_econv_binmode(rb_econv_t *ec); #define ECONV_XML_ATTR_QUOTE_DECORATOR 0x00100000 #if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) -#define ECONV_DEFAULT_NEWLINE_DECORATOR ECONV_UNIVERSAL_NEWLINE_DECORATOR +#define ECONV_DEFAULT_NEWLINE_DECORATOR ECONV_CRLF_NEWLINE_DECORATOR #else #define ECONV_DEFAULT_NEWLINE_DECORATOR 0 #endif diff --git a/io.c b/io.c index ab3f35f..89c57ad 100644 --- a/io.c +++ b/io.c @@ -376,12 +376,45 @@ rb_cloexec_fcntl_dupfd(int fd, int minfd) /* Windows */ # define DEFAULT_TEXTMODE FMODE_TEXTMODE # define TEXTMODE_NEWLINE_DECORATOR_ON_WRITE ECONV_CRLF_NEWLINE_DECORATOR +/* + * CRLF newline is set as default newline decorator. + * If only CRLF newline conversion is needed, we use binary IO process + * with OS's text mode for IO performance improvement. + * If encoding conversion is needed, we use encoding conversion IO + * process and universal newline decorator by default. + */ +#define NEED_READCONV(fptr) ((fptr)->encs.enc2 != NULL || (fptr)->encs.ecflags & ~ECONV_CRLF_NEWLINE_DECORATOR) +#define NEED_WRITECONV(fptr) (((fptr)->encs.enc != NULL && (fptr)->encs.enc != rb_ascii8bit_encoding()) || ((fptr)->encs.ecflags & (ECONV_DECORATOR_MASK|ECONV_STATEFUL_DECORATOR_MASK|(ECONV_NEWLINE_DECORATOR_MASK & ~ECONV_CRLF_NEWLINE_DECORATOR)))) +#define SET_BINARY_MODE(fptr) setmode((fptr)->fd, O_BINARY); + +#define NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr) do {\ + if (NEED_NEWLINE_DECORATOR_ON_READ(fptr)) {\ + if (((fptr)->mode & FMODE_READABLE) &&\ + !((fptr)->encs.ecflags & ECONV_NEWLINE_DECORATOR_MASK)) {\ + setmode((fptr)->fd, O_BINARY);\ + }\ + else {\ + setmode((fptr)->fd, O_TEXT);\ + }\ + }\ +} while(0) + +#define SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags) do {\ + if ((enc2) && ((ecflags) & ECONV_DEFAULT_NEWLINE_DECORATOR)) {\ + (ecflags) &= ~ECONV_DEFAULT_NEWLINE_DECORATOR;\ + (ecflags) |= ECONV_UNIVERSAL_NEWLINE_DECORATOR;\ + }\ +} while(0) + #else /* Unix */ # define DEFAULT_TEXTMODE 0 -#endif #define NEED_READCONV(fptr) ((fptr)->encs.enc2 != NULL || NEED_NEWLINE_DECORATOR_ON_READ(fptr)) #define NEED_WRITECONV(fptr) (((fptr)->encs.enc != NULL && (fptr)->encs.enc != rb_ascii8bit_encoding()) || NEED_NEWLINE_DECORATOR_ON_WRITE(fptr) || ((fptr)->encs.ecflags & (ECONV_DECORATOR_MASK|ECONV_STATEFUL_DECORATOR_MASK))) +#define SET_BINARY_MODE(fptr) 0 +#define NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr) 0 +#define SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags) 0 +#endif #if !defined HAVE_SHUTDOWN && !defined shutdown #define shutdown(a,b) 0 @@ -1055,6 +1088,7 @@ do_writeconv(VALUE str, rb_io_t *fptr) { if (NEED_WRITECONV(fptr)) { VALUE common_encoding = Qnil; + SET_BINARY_MODE(fptr); make_writeconv(fptr); @@ -1082,6 +1116,21 @@ do_writeconv(VALUE str, rb_io_t *fptr) str = rb_econv_str_convert(fptr->writeconv, str, ECONV_PARTIAL_INPUT); } } +#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) + else if (NEED_NEWLINE_DECORATOR_ON_WRITE(fptr)) { + if ((fptr->mode & FMODE_READABLE) && + !(fptr->encs.ecflags & ECONV_NEWLINE_DECORATOR_MASK)) { + setmode(fptr->fd, O_BINARY); + } + else { + setmode(fptr->fd, O_TEXT); + } + if (!rb_enc_asciicompat(rb_enc_get(str))) { + rb_raise(rb_eArgError, "ASCII incompatible string written for text mode IO without encoding conversion: %s", + rb_enc_name(rb_enc_get(str))); + } + } +#endif return str; } @@ -1965,6 +2014,7 @@ read_all(rb_io_t *fptr, long siz, VALUE str) int cr; if (NEED_READCONV(fptr)) { + SET_BINARY_MODE(fptr); io_setstrbuf(&str,0); make_readconv(fptr, 0); while (1) { @@ -1986,6 +2036,7 @@ read_all(rb_io_t *fptr, long siz, VALUE str) } } + NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr); bytes = 0; pos = 0; @@ -2408,6 +2459,7 @@ appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp) long limit = *lp; if (NEED_READCONV(fptr)) { + SET_BINARY_MODE(fptr); make_readconv(fptr, 0); do { const char *p, *e; @@ -2450,6 +2502,7 @@ appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp) return EOF; } + NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr); do { long pending = READ_DATA_PENDING_COUNT(fptr); if (pending > 0) { @@ -2488,6 +2541,7 @@ swallow(rb_io_t *fptr, int term) if (NEED_READCONV(fptr)) { rb_encoding *enc = io_read_encoding(fptr); int needconv = rb_enc_mbminlen(enc) != 1; + SET_BINARY_MODE(fptr); make_readconv(fptr, 0); do { size_t cnt; @@ -2511,6 +2565,7 @@ swallow(rb_io_t *fptr, int term) return FALSE; } + NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr); do { size_t cnt; while ((cnt = READ_DATA_PENDING_COUNT(fptr)) > 0) { @@ -2647,6 +2702,7 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io) } else if (rs == rb_default_rs && limit < 0 && !NEED_READCONV(fptr) && rb_enc_asciicompat(enc = io_read_encoding(fptr))) { + NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr); return rb_io_getline_fast(fptr, enc, io); } else { @@ -2656,6 +2712,7 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io) int rspara = 0; int extra_limit = 16; + SET_BINARY_MODE(fptr); enc = io_read_encoding(fptr); if (!NIL_P(rs)) { @@ -3004,6 +3061,7 @@ io_getc(rb_io_t *fptr, rb_encoding *enc) VALUE str = Qnil; rb_encoding *read_enc = io_read_encoding(fptr); + SET_BINARY_MODE(fptr); make_readconv(fptr, 0); while (1) { @@ -3048,6 +3106,7 @@ io_getc(rb_io_t *fptr, rb_encoding *enc) return str; } + NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr); if (io_fillbuf(fptr) < 0) { return Qnil; } @@ -3162,6 +3221,7 @@ rb_io_each_codepoint(VALUE io) READ_CHECK(fptr); if (NEED_READCONV(fptr)) { + SET_BINARY_MODE(fptr); for (;;) { make_readconv(fptr, 0); for (;;) { @@ -3202,6 +3262,7 @@ rb_io_each_codepoint(VALUE io) rb_yield(UINT2NUM(c)); } } + NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr); enc = io_input_encoding(fptr); for (;;) { if (io_fillbuf(fptr) < 0) { @@ -3405,6 +3466,7 @@ rb_io_ungetc(VALUE io, VALUE c) SafeStringValue(c); } if (NEED_READCONV(fptr)) { + SET_BINARY_MODE(fptr); len = RSTRING_LEN(c); #if SIZEOF_LONG > SIZEOF_INT if (len > INT_MAX) @@ -3424,6 +3486,7 @@ rb_io_ungetc(VALUE io, VALUE c) MEMMOVE(fptr->cbuf.ptr+fptr->cbuf.off, RSTRING_PTR(c), char, len); } else { + NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr); io_ungetbyte(c, fptr); } return Qnil; @@ -4132,6 +4195,14 @@ rb_io_binmode(VALUE io) fptr->mode |= FMODE_BINMODE; fptr->mode &= ~FMODE_TEXTMODE; fptr->writeconv_pre_ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK; +#ifdef O_BINARY + if (!fptr->readconv && fptr->rbuf.ptr != NULL) { + io_seek(fptr, -(fptr->rbuf.len), SEEK_CUR); + fptr->rbuf.len = 0; + fptr->rbuf.off = 0; + } + setmode(fptr->fd, O_BINARY); +#endif return io; } @@ -4151,6 +4222,14 @@ rb_io_ascii8bit_binmode(VALUE io) } fptr->mode |= FMODE_BINMODE; fptr->mode &= ~FMODE_TEXTMODE; +#ifdef O_BINARY + if (fptr->rbuf.ptr != NULL) { + io_seek(fptr, -(fptr->rbuf.len), SEEK_CUR); + fptr->rbuf.len = 0; + fptr->rbuf.off = 0; + } + setmode(fptr->fd, O_BINARY); +#endif fptr->encs.enc = rb_ascii8bit_encoding(); fptr->encs.enc2 = NULL; @@ -4670,6 +4749,7 @@ rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash, ecflags = (fmode & FMODE_READABLE) ? MODE_BTMODE(ECONV_DEFAULT_NEWLINE_DECORATOR, 0, ECONV_UNIVERSAL_NEWLINE_DECORATOR) : 0; + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags); ecopts = Qnil; } else { @@ -4705,13 +4785,14 @@ rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash, ecflags = (fmode & FMODE_READABLE) ? MODE_BTMODE(ECONV_DEFAULT_NEWLINE_DECORATOR, 0, ECONV_UNIVERSAL_NEWLINE_DECORATOR) : 0; - ecflags = rb_econv_prepare_options(opthash, &ecopts, ecflags); if (rb_io_extract_encoding_option(opthash, &enc, &enc2, &fmode)) { if (has_enc) { rb_raise(rb_eArgError, "encoding specified twice"); } } + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags); + ecflags = rb_econv_prepare_options(opthash, &ecopts, ecflags); } validate_enc_binmode(&fmode, ecflags, enc, enc2); @@ -4756,9 +4837,6 @@ rb_sysopen(VALUE fname, int oflags, mode_t perm) int fd; struct sysopen_struct data; -#ifdef O_BINARY - oflags |= O_BINARY; -#endif data.fname = rb_str_encode_ospath(fname); data.oflags = oflags; data.perm = perm; @@ -5395,7 +5473,12 @@ pipe_open(struct rb_exec_arg *eargp, VALUE prog, const char *modestr, int fmode, fptr->encs = *convconfig; } else if (NEED_NEWLINE_DECORATOR_ON_READ(fptr)) { - fptr->encs.ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR; +#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) + fptr->encs.ecflags |= ECONV_CRLF_NEWLINE_DECORATOR; + setmode(fd, O_TEXT); +#else + fptr->encs.ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR; +#endif } fptr->pid = pid; @@ -8179,22 +8262,26 @@ io_encoding_set(rb_io_t *fptr, VALUE v1, VALUE v2, VALUE opt) } else enc = rb_to_encoding(v2); + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags); ecflags = rb_econv_prepare_options(opt, &ecopts, ecflags); } else { if (NIL_P(v1)) { /* Set to default encodings */ rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2); + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags); ecopts = Qnil; } else { tmp = rb_check_string_type(v1); if (!NIL_P(tmp) && rb_enc_asciicompat(rb_enc_get(tmp))) { parse_mode_enc(RSTRING_PTR(tmp), &enc, &enc2, NULL); + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags); ecflags = rb_econv_prepare_options(opt, &ecopts, ecflags); } else { rb_io_ext_int_to_encs(rb_to_encoding(v1), NULL, &enc, &enc2); + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags); ecopts = Qnil; } } @@ -8313,13 +8400,17 @@ rb_io_s_pipe(int argc, VALUE *argv, VALUE klass) extract_binmode(opt, &fmode); #if DEFAULT_TEXTMODE - if ((fptr->mode & FMODE_TEXTMODE) && (fmode & FMODE_BINMODE)) + if ((fptr->mode & FMODE_TEXTMODE) && (fmode & FMODE_BINMODE)) { fptr->mode &= ~FMODE_TEXTMODE; + setmode(fptr->fd, O_BINARY); + } #endif fptr->mode |= fmode; #if DEFAULT_TEXTMODE - if ((fptr2->mode & FMODE_TEXTMODE) && (fmode & FMODE_BINMODE)) + if ((fptr2->mode & FMODE_TEXTMODE) && (fmode & FMODE_BINMODE)) { fptr2->mode &= ~FMODE_TEXTMODE; + setmode(fptr2->fd, O_BINARY); + } #endif fptr2->mode |= fmode; diff --git a/win32/win32.c b/win32/win32.c index 5851377..329d6f1 100644 --- a/win32/win32.c +++ b/win32/win32.c @@ -2261,28 +2261,19 @@ init_stdhandle(void) int keep = 0; #define open_null(fd) \ (((nullfd < 0) ? \ - (nullfd = open("NUL", O_RDWR|O_BINARY)) : 0), \ + (nullfd = open("NUL", O_RDWR)) : 0), \ ((nullfd == (fd)) ? (keep = 1) : dup2(nullfd, fd)), \ (fd)) if (fileno(stdin) < 0) { stdin->_file = open_null(0); } - else { - setmode(fileno(stdin), O_BINARY); - } if (fileno(stdout) < 0) { stdout->_file = open_null(1); } - else { - setmode(fileno(stdout), O_BINARY); - } if (fileno(stderr) < 0) { stderr->_file = open_null(2); } - else { - setmode(fileno(stderr), O_BINARY); - } if (nullfd >= 0 && !keep) close(nullfd); setvbuf(stderr, NULL, _IONBF, 0); } @@ -5385,7 +5376,7 @@ rb_w32_pipe(int fds[2]) MTHREAD_ONLY(EnterCriticalSection(&(_pioinfo(fdRead)->lock))); _set_osfhnd(fdRead, (intptr_t)hRead); - _set_osflags(fdRead, FOPEN | FPIPE | FNOINHERIT); + _set_osflags(fdRead, FOPEN | FPIPE | FNOINHERIT | FTEXT); MTHREAD_ONLY(LeaveCriticalSection(&(_pioinfo(fdRead)->lock))); } while (0)); if (ret) @@ -5617,7 +5608,7 @@ rb_w32_write(int fd, const void *buf, size_t size) return -1; } - if (_osfile(fd) & FTEXT) { + if ((_osfile(fd) & FTEXT) && !(_osfile(fd) & FPIPE)) { return _write(fd, buf, size); }