diff --git a/ChangeLog b/ChangeLog index fb378b4..de2d94b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -191,6 +191,24 @@ Mon Jan 2 18:54:52 2012 NAKAMURA Usaku * win32/configure.bat: disable delayed expansion of enironment variable. [Bug #5517] [ruby-core:40531] +Mon Dec 26 22:01:19 2011 Hiroshi Shirosaki + + * io.c (rb_sys_fail_path): move the definition. + Move above for using it in set_binary_mode_with_seek_cur(). + + * io.c (set_binary_mode_with_seek_cur): fix improper seek cursor. + Seeking file cursor with setting binary mode has possibility to + cause infinite loop. Fixed the bug and refined error handling. + Introduced at r34043. + + And cleanups as below. + Remove unnecessary parentheses of `fptr`. + Use return value of setmode(). + + * test/ruby/test_io_m17n.rb + (TestIO_M17N#test_seek_with_setting_binmode): add a test for above. + [ruby-core:41671] [Bug #5714] + Wed Dec 28 11:22:45 2011 Nobuyoshi Nakada * lib/fileutils.rb (FileUtils::Entry_#entries): use utility method @@ -207,6 +225,103 @@ Wed Dec 14 15:28:31 2011 Nobuyoshi Nakada * transcode.c (str_encode): about the extension of :fallback option since 1.9.3. +Wed Dec 14 21:58:42 2011 NAKAMURA Usaku + + * test/ruby/test_io_m17n.rb + (TestIO_M17N#test_{read_with_binmode_and_get[cs]}): only for Windows. + +Wed Dec 14 19:22:33 2011 NAKAMURA Usaku + + * win32/win32.c, include/ruby/win32.h (rb_w32_fd_is_text): new function. + + * win32/win32.c (init_stdhandle): set default mode of stdin as binmode. + + * io.c (set_binary_mode_with_seek_cur): new function to replace + SET_BINARY_MODE_WITH_SEEK_CUR macro. now returns previous mode of the + fd and take care of LF in rbuf. + + * io.c (do_writeconv): set text mode when needed. + + * io.c (io_read): need to change the mode of the IO to binmode + temporally when the length for IO#read, because IO#read with length + must behave so. + + * test/ruby/test_io_m17n.rb (TestIO_M17N#test_{read_with_length, + read_with_length_binmode,get[cs]_and_read_with_binmode, + read_with_binmode_and_get[cs],read_write_with_binmode}): tests for + above changes. + + all patches are written by Hiroshi Shirosaki. [ruby-core:41496] + [Feature #5714] + +Thu Dec 8 13:26:24 2011 NAKAMURA Usaku + + * test/rexml/test_order.rb (OrderTester#test_more_ordering): use + GZip::GzReader.open instead of GZip::GzReader.new with File.new. + fixed a test error on Windows introduced at r33946. + +Mon Dec 5 10:18:45 2011 NAKAMURA Usaku + + * ext/zlib/zlib.c (rb_gzreader_initialize): revert a part of r33937. + 1st, to change the mode of an IO is very sensitive problem, so + the maintainer of this library should judge it. + 2nd, usually Zlib::GzReader.new is not called directly. #initialize + is called via .open, and in the method the I/O is opened in binary + mode, so there is no problem without changing the mode in #initialize. + +Sun Dec 4 10:15:00 2011 Luis Lavena + + * ext/zlib/zlib.c (rb_gzreader_initialize): use binary mode by default + under Windows. Patch by Hiroshi Shirosaki. [ruby-core:40706] + [Feature #5562] + + * include/ruby/encoding.h (void rb_econv_binmode): define NEWLINE + decorator. + + * io.c (rb_cloexec_fcntl_dupfd): Introduce NEED_READCONV and + NEED_WRITECONV to replace universal newline decorator by CRLF only + when required to improve file reading and writing under Windows. + Patch by Hiroshi Shirosaki. [ruby-core:40706] [Feature #5562] + * io.c (do_writeconv): adjust binary mode if required. + * io.c (read_all, appendline, swallow, rb_io_getline_1): ditto. + * io.c (io_getc, rb_io_each_codepoint, rb_io_ungetc): ditto. + * io.c (rb_io_binmode, rb_io_ascii8bit_binmode): ditto. + * io.c (rb_io_extract_modeenc, rb_sysopen): ditto. + * io.c (pipe_open, prep_stdio, io_encoding_set): ditto. + * io.c (rb_io_s_pipe, copy_stream_body): ditto. + + * test/ruby/test_io_m17n.rb (EOT): add test for pipe and stdin in + binary mode. + + * win32/win32.c (init_stdhandle): remove O_BINARY from stdhandle + initialization. + * win32/win32.c (rb_w32_write): use FTEXT mode accordingly. + +Wed Nov 16 11:34:20 2011 NAKAMURA Usaku + + * io.c (argf_next_argv): wrong timing of setting ecflags. + fixed the failure of TestArgf#test_textmode introduced at r33662. + +Tue Nov 8 02:36:45 2011 NAKAMURA Usaku + + * include/ruby/encoding.h (ECONV_NEWLINE_DECORATOR_READ_MASK, + ECONV_NEWLINE_DECORATOR_WRITE_MASK): new macro. + + * io.c (rb_io_extract_modeenc, pipe_open, prep_stdio, argf_next_argv): + set TEXTMODE_NEWLINE_DECORATOR_ON_WRITE for textmode on creating IO + if the flag is available. + + * io.c (make_writeconv): drop decorators for reading. + + * io.c (make_readconv): drop decorators for writing. + + * io.c (do_writeconv): existing writeconv is not the condition to raise + ArgumentError. should check textmode or not. + + * test/ruby/test_io_m17n.rb + (TestIO_M17N#test_{cr,lf,crlf}_decorator_on_stdout): test above + changes. + Tue Oct 4 06:43:47 2011 Aaron Patterson * ext/psych/lib/psych.rb: update psych version. @@ -498,6 +613,10 @@ Wed Sep 7 23:42:45 2011 Nobuyoshi Nakada * io.c (argf_next_argv): open in default text mode. [ruby-core:39234] [Bug #5268] +Tue Sep 6 12:07:10 2011 Nobuyoshi Nakada + + * transcode.c: enabled econv newline option. + Mon Sep 5 15:06:55 2011 NARUSE, Yui * test/rubygems/test_gem_security.rb diff --git a/include/ruby/encoding.h b/include/ruby/encoding.h index 123f76a..058462f 100644 --- a/include/ruby/encoding.h +++ b/include/ruby/encoding.h @@ -305,6 +305,8 @@ void rb_econv_binmode(rb_econv_t *ec); #define ECONV_DECORATOR_MASK 0x0000ff00 #define ECONV_NEWLINE_DECORATOR_MASK 0x00003f00 +#define ECONV_NEWLINE_DECORATOR_READ_MASK 0x00000f00 +#define ECONV_NEWLINE_DECORATOR_WRITE_MASK 0x00003000 #define ECONV_UNIVERSAL_NEWLINE_DECORATOR 0x00000100 #define ECONV_CRLF_NEWLINE_DECORATOR 0x00001000 @@ -316,7 +318,7 @@ void rb_econv_binmode(rb_econv_t *ec); #define ECONV_XML_ATTR_QUOTE_DECORATOR 0x00100000 #if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) -#define ECONV_DEFAULT_NEWLINE_DECORATOR ECONV_UNIVERSAL_NEWLINE_DECORATOR +#define ECONV_DEFAULT_NEWLINE_DECORATOR ECONV_CRLF_NEWLINE_DECORATOR #else #define ECONV_DEFAULT_NEWLINE_DECORATOR 0 #endif diff --git a/include/ruby/win32.h b/include/ruby/win32.h index 4d8a667..7d3c1d0 100644 --- a/include/ruby/win32.h +++ b/include/ruby/win32.h @@ -303,6 +303,7 @@ extern int rb_w32_stati64(const char *, struct stati64 *); extern int rb_w32_ustati64(const char *, struct stati64 *); extern int rb_w32_access(const char *, int); extern int rb_w32_uaccess(const char *, int); +extern char rb_w32_fd_is_text(int); #ifdef __BORLANDC__ extern int rb_w32_fstati64(int, struct stati64 *); diff --git a/io.c b/io.c index 69c3c9e..c2ae238 100644 --- a/io.c +++ b/io.c @@ -219,25 +219,124 @@ rb_update_max_fd(int fd) # endif #endif +#define rb_sys_fail_path(path) rb_sys_fail(NIL_P(path) ? 0 : RSTRING_PTR(path)) + +static int io_fflush(rb_io_t *); + #define NEED_NEWLINE_DECORATOR_ON_READ(fptr) ((fptr)->mode & FMODE_TEXTMODE) #define NEED_NEWLINE_DECORATOR_ON_WRITE(fptr) ((fptr)->mode & FMODE_TEXTMODE) #if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) /* Windows */ # define DEFAULT_TEXTMODE FMODE_TEXTMODE # define TEXTMODE_NEWLINE_DECORATOR_ON_WRITE ECONV_CRLF_NEWLINE_DECORATOR +/* + * CRLF newline is set as default newline decorator. + * If only CRLF newline conversion is needed, we use binary IO process + * with OS's text mode for IO performance improvement. + * If encoding conversion is needed or a user sets text mode, we use encoding + * conversion IO process and universal newline decorator by default. + */ +#define NEED_READCONV(fptr) ((fptr)->encs.enc2 != NULL || (fptr)->encs.ecflags & ~ECONV_CRLF_NEWLINE_DECORATOR) +#define NEED_WRITECONV(fptr) (((fptr)->encs.enc != NULL && (fptr)->encs.enc != rb_ascii8bit_encoding()) || ((fptr)->encs.ecflags & ((ECONV_DECORATOR_MASK & ~ECONV_CRLF_NEWLINE_DECORATOR)|ECONV_STATEFUL_DECORATOR_MASK))) +#define SET_BINARY_MODE(fptr) setmode((fptr)->fd, O_BINARY) + +#define NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr) do {\ + if (NEED_NEWLINE_DECORATOR_ON_READ(fptr)) {\ + if (((fptr)->mode & FMODE_READABLE) &&\ + !((fptr)->encs.ecflags & ECONV_NEWLINE_DECORATOR_MASK)) {\ + setmode((fptr)->fd, O_BINARY);\ + }\ + else {\ + setmode((fptr)->fd, O_TEXT);\ + }\ + }\ +} while(0) + +#define SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags) do {\ + if ((enc2) && ((ecflags) & ECONV_DEFAULT_NEWLINE_DECORATOR)) {\ + (ecflags) |= ECONV_UNIVERSAL_NEWLINE_DECORATOR;\ + }\ +} while(0) +/* + * We use io_seek to back cursor position when changing mode from text to binary, + * but stdin and pipe cannot seek back. Stdin and pipe read should use encoding + * conversion for working properly with mode change. + */ +/* + * Return previous translation mode. + */ +inline static int set_binary_mode_with_seek_cur(rb_io_t *fptr) { + off_t r, pos; + ssize_t read_size; + long i; + long newlines = 0; + long extra_max; + char *p; + + if (!rb_w32_fd_is_text(fptr->fd)) return O_BINARY; + + if (fptr->rbuf.len == 0 || fptr->mode & FMODE_DUPLEX) { + return setmode(fptr->fd, O_BINARY); + } + + if (io_fflush(fptr) < 0) { + rb_sys_fail(0); + } + errno = 0; + pos = lseek(fptr->fd, 0, SEEK_CUR); + if (pos < 0 && errno) { + if (errno == ESPIPE) + fptr->mode |= FMODE_DUPLEX; + return setmode(fptr->fd, O_BINARY); + } + /* add extra offset for removed '\r' in rbuf */ + extra_max = pos - fptr->rbuf.len; + p = fptr->rbuf.ptr + fptr->rbuf.off; + for (i = 0; i < fptr->rbuf.len; i++) { + if (*p == '\n') newlines++; + if (extra_max == newlines) break; + p++; + } + while (newlines >= 0) { + r = lseek(fptr->fd, pos - fptr->rbuf.len - newlines, SEEK_SET); + if (newlines == 0) break; + if (r < 0) { + newlines--; + continue; + } + read_size = _read(fptr->fd, fptr->rbuf.ptr, fptr->rbuf.len + newlines); + if (read_size < 0) { + rb_sys_fail_path(fptr->pathv); + } + if (read_size == fptr->rbuf.len) { + lseek(fptr->fd, r, SEEK_SET); + break; + } + else { + newlines--; + } + } + fptr->rbuf.off = 0; + fptr->rbuf.len = 0; + return setmode(fptr->fd, O_BINARY); +} +#define SET_BINARY_MODE_WITH_SEEK_CUR(fptr) set_binary_mode_with_seek_cur(fptr) + #else /* Unix */ # define DEFAULT_TEXTMODE 0 -#endif #define NEED_READCONV(fptr) ((fptr)->encs.enc2 != NULL || NEED_NEWLINE_DECORATOR_ON_READ(fptr)) #define NEED_WRITECONV(fptr) (((fptr)->encs.enc != NULL && (fptr)->encs.enc != rb_ascii8bit_encoding()) || NEED_NEWLINE_DECORATOR_ON_WRITE(fptr) || ((fptr)->encs.ecflags & (ECONV_DECORATOR_MASK|ECONV_STATEFUL_DECORATOR_MASK))) +#define SET_BINARY_MODE(fptr) (void)(fptr) +#define NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr) (void)(fptr) +#define SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags) ((void)(enc2), (void)(ecflags)) +#define SET_BINARY_MODE_WITH_SEEK_CUR(fptr) (void)(fptr) +#endif #if !defined HAVE_SHUTDOWN && !defined shutdown #define shutdown(a,b) 0 #endif -#define rb_sys_fail_path(path) rb_sys_fail(NIL_P(path) ? 0 : RSTRING_PTR(path)) - #if defined(_WIN32) #define is_socket(fd, path) rb_w32_is_socket(fd) #elif !defined(S_ISSOCK) @@ -285,7 +384,6 @@ rb_io_check_closed(rb_io_t *fptr) } } -static int io_fflush(rb_io_t *); VALUE rb_io_get_io(VALUE io) @@ -750,16 +848,8 @@ make_writeconv(rb_io_t *fptr) fptr->writeconv_initialized = 1; - ecflags = fptr->encs.ecflags; + ecflags = fptr->encs.ecflags & ~ECONV_NEWLINE_DECORATOR_READ_MASK; ecopts = fptr->encs.ecopts; -#ifdef TEXTMODE_NEWLINE_DECORATOR_ON_WRITE - if (NEED_NEWLINE_DECORATOR_ON_WRITE(fptr) && - (!(ecflags & ECONV_NEWLINE_DECORATOR_MASK) || - (ecflags & ECONV_UNIVERSAL_NEWLINE_DECORATOR))) { - ecflags &= ~ECONV_UNIVERSAL_NEWLINE_DECORATOR; - ecflags |= TEXTMODE_NEWLINE_DECORATOR_ON_WRITE; - } -#endif if (!fptr->encs.enc || (fptr->encs.enc == rb_ascii8bit_encoding() && !fptr->encs.enc2)) { /* no encoding conversion */ @@ -897,21 +987,26 @@ io_binwrite(VALUE str, const char *ptr, long len, rb_io_t *fptr, int nosync) return len; } +# define MODE_BTMODE(a,b,c) ((fmode & FMODE_BINMODE) ? (b) : \ + (fmode & FMODE_TEXTMODE) ? (c) : (a)) static VALUE do_writeconv(VALUE str, rb_io_t *fptr) { if (NEED_WRITECONV(fptr)) { VALUE common_encoding = Qnil; + SET_BINARY_MODE(fptr); make_writeconv(fptr); if (fptr->writeconv) { +#define fmode (fptr->mode) if (!NIL_P(fptr->writeconv_asciicompat)) common_encoding = fptr->writeconv_asciicompat; - else if (!rb_enc_asciicompat(rb_enc_get(str))) { + else if (MODE_BTMODE(DEFAULT_TEXTMODE,0,1) && !rb_enc_asciicompat(rb_enc_get(str))) { rb_raise(rb_eArgError, "ASCII incompatible string written for text mode IO without encoding conversion: %s", rb_enc_name(rb_enc_get(str))); } +#undef fmode } else { if (fptr->encs.enc2) @@ -929,6 +1024,23 @@ do_writeconv(VALUE str, rb_io_t *fptr) str = rb_econv_str_convert(fptr->writeconv, str, ECONV_PARTIAL_INPUT); } } +#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) +#define fmode (fptr->mode) + else if (MODE_BTMODE(DEFAULT_TEXTMODE,0,1)) { + if ((fptr->mode & FMODE_READABLE) && + !(fptr->encs.ecflags & ECONV_NEWLINE_DECORATOR_MASK)) { + setmode(fptr->fd, O_BINARY); + } + else { + setmode(fptr->fd, O_TEXT); + } + if (!rb_enc_asciicompat(rb_enc_get(str))) { + rb_raise(rb_eArgError, "ASCII incompatible string written for text mode IO without encoding conversion: %s", + rb_enc_name(rb_enc_get(str))); + } + } +#undef fmode +#endif return str; } @@ -1653,7 +1765,7 @@ make_readconv(rb_io_t *fptr, int size) int ecflags; VALUE ecopts; const char *sname, *dname; - ecflags = fptr->encs.ecflags; + ecflags = fptr->encs.ecflags & ~ECONV_NEWLINE_DECORATOR_WRITE_MASK; ecopts = fptr->encs.ecopts; if (fptr->encs.enc2) { sname = rb_enc_name(fptr->encs.enc2); @@ -1822,6 +1934,7 @@ read_all(rb_io_t *fptr, long siz, VALUE str) int cr; if (NEED_READCONV(fptr)) { + SET_BINARY_MODE(fptr); io_setstrbuf(&str,0); make_readconv(fptr, 0); while (1) { @@ -1843,6 +1956,7 @@ read_all(rb_io_t *fptr, long siz, VALUE str) } } + NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr); bytes = 0; pos = 0; @@ -2218,6 +2332,9 @@ io_read(int argc, VALUE *argv, VALUE io) rb_io_t *fptr; long n, len; VALUE length, str; +#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) + int previous_mode; +#endif rb_scan_args(argc, argv, "02", &length, &str); @@ -2238,7 +2355,15 @@ io_read(int argc, VALUE *argv, VALUE io) if (len == 0) return str; READ_CHECK(fptr); +#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) + previous_mode = set_binary_mode_with_seek_cur(fptr); +#endif n = io_fread(str, 0, fptr); +#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) + if (previous_mode == O_TEXT) { + setmode(fptr->fd, O_TEXT); + } +#endif if (n == 0) { if (fptr->fd < 0) return Qnil; rb_str_resize(str, 0); @@ -2265,6 +2390,7 @@ appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp) long limit = *lp; if (NEED_READCONV(fptr)) { + SET_BINARY_MODE(fptr); make_readconv(fptr, 0); do { const char *p, *e; @@ -2307,6 +2433,7 @@ appendline(rb_io_t *fptr, int delim, VALUE *strp, long *lp) return EOF; } + NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr); do { long pending = READ_DATA_PENDING_COUNT(fptr); if (pending > 0) { @@ -2345,6 +2472,7 @@ swallow(rb_io_t *fptr, int term) if (NEED_READCONV(fptr)) { rb_encoding *enc = io_read_encoding(fptr); int needconv = rb_enc_mbminlen(enc) != 1; + SET_BINARY_MODE(fptr); make_readconv(fptr, 0); do { size_t cnt; @@ -2368,6 +2496,7 @@ swallow(rb_io_t *fptr, int term) return FALSE; } + NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr); do { size_t cnt; while ((cnt = READ_DATA_PENDING_COUNT(fptr)) > 0) { @@ -2504,6 +2633,7 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io) } else if (rs == rb_default_rs && limit < 0 && !NEED_READCONV(fptr) && rb_enc_asciicompat(enc = io_read_encoding(fptr))) { + NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr); return rb_io_getline_fast(fptr, enc, io); } else { @@ -2513,6 +2643,7 @@ rb_io_getline_1(VALUE rs, long limit, VALUE io) int rspara = 0; int extra_limit = 16; + SET_BINARY_MODE(fptr); enc = io_read_encoding(fptr); if (!NIL_P(rs)) { @@ -2862,6 +2993,7 @@ io_getc(rb_io_t *fptr, rb_encoding *enc) VALUE str = Qnil; rb_encoding *read_enc = io_read_encoding(fptr); + SET_BINARY_MODE(fptr); make_readconv(fptr, 0); while (1) { @@ -2906,6 +3038,7 @@ io_getc(rb_io_t *fptr, rb_encoding *enc) return str; } + NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr); if (io_fillbuf(fptr) < 0) { return Qnil; } @@ -3020,6 +3153,7 @@ rb_io_each_codepoint(VALUE io) READ_CHECK(fptr); if (NEED_READCONV(fptr)) { + SET_BINARY_MODE(fptr); for (;;) { make_readconv(fptr, 0); for (;;) { @@ -3060,6 +3194,7 @@ rb_io_each_codepoint(VALUE io) rb_yield(UINT2NUM(c)); } } + NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr); enc = io_input_encoding(fptr); for (;;) { if (io_fillbuf(fptr) < 0) { @@ -3263,6 +3398,7 @@ rb_io_ungetc(VALUE io, VALUE c) SafeStringValue(c); } if (NEED_READCONV(fptr)) { + SET_BINARY_MODE(fptr); len = RSTRING_LEN(c); #if SIZEOF_LONG > SIZEOF_INT if (len > INT_MAX) @@ -3282,6 +3418,7 @@ rb_io_ungetc(VALUE io, VALUE c) MEMMOVE(fptr->cbuf.ptr+fptr->cbuf.off, RSTRING_PTR(c), char, len); } else { + NEED_NEWLINE_DECORATOR_ON_READ_CHECK(fptr); io_ungetbyte(c, fptr); } return Qnil; @@ -3990,6 +4127,14 @@ rb_io_binmode(VALUE io) fptr->mode |= FMODE_BINMODE; fptr->mode &= ~FMODE_TEXTMODE; fptr->writeconv_pre_ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK; +#ifdef O_BINARY + if (!fptr->readconv) { + SET_BINARY_MODE_WITH_SEEK_CUR(fptr); + } + else { + setmode(fptr->fd, O_BINARY); + } +#endif return io; } @@ -4009,6 +4154,7 @@ rb_io_ascii8bit_binmode(VALUE io) } fptr->mode |= FMODE_BINMODE; fptr->mode &= ~FMODE_TEXTMODE; + SET_BINARY_MODE_WITH_SEEK_CUR(fptr); fptr->encs.enc = rb_ascii8bit_encoding(); fptr->encs.enc2 = NULL; @@ -4062,8 +4208,6 @@ rb_io_binmode_p(VALUE io) static const char* rb_io_fmode_modestr(int fmode) { -# define MODE_BTMODE(a,b,c) ((fmode & FMODE_BINMODE) ? (b) : \ - (fmode & FMODE_TEXTMODE) ? (c) : (a)) if (fmode & FMODE_APPEND) { if ((fmode & FMODE_READWRITE) == FMODE_READWRITE) { return MODE_BTMODE("a+", "ab+", "at+"); @@ -4528,6 +4672,12 @@ rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash, ecflags = (fmode & FMODE_READABLE) ? MODE_BTMODE(ECONV_DEFAULT_NEWLINE_DECORATOR, 0, ECONV_UNIVERSAL_NEWLINE_DECORATOR) : 0; +#ifdef TEXTMODE_NEWLINE_DECORATOR_ON_WRITE + ecflags |= (fmode & FMODE_WRITABLE) ? + MODE_BTMODE(TEXTMODE_NEWLINE_DECORATOR_ON_WRITE, + 0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE) : 0; +#endif + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags); ecopts = Qnil; } else { @@ -4563,13 +4713,19 @@ rb_io_extract_modeenc(VALUE *vmode_p, VALUE *vperm_p, VALUE opthash, ecflags = (fmode & FMODE_READABLE) ? MODE_BTMODE(ECONV_DEFAULT_NEWLINE_DECORATOR, 0, ECONV_UNIVERSAL_NEWLINE_DECORATOR) : 0; - ecflags = rb_econv_prepare_options(opthash, &ecopts, ecflags); +#ifdef TEXTMODE_NEWLINE_DECORATOR_ON_WRITE + ecflags |= (fmode & FMODE_WRITABLE) ? + MODE_BTMODE(TEXTMODE_NEWLINE_DECORATOR_ON_WRITE, + 0, TEXTMODE_NEWLINE_DECORATOR_ON_WRITE) : 0; +#endif if (rb_io_extract_encoding_option(opthash, &enc, &enc2, &fmode)) { if (has_enc) { rb_raise(rb_eArgError, "encoding specified twice"); } } + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags); + ecflags = rb_econv_prepare_options(opthash, &ecopts, ecflags); } validate_enc_binmode(&fmode, ecflags, enc, enc2); @@ -4614,9 +4770,6 @@ rb_sysopen(VALUE fname, int oflags, mode_t perm) int fd; struct sysopen_struct data; -#ifdef O_BINARY - oflags |= O_BINARY; -#endif data.fname = rb_str_encode_ospath(fname); data.oflags = oflags; data.perm = perm; @@ -5251,9 +5404,21 @@ pipe_open(struct rb_exec_arg *eargp, VALUE prog, const char *modestr, int fmode, fptr->mode = fmode | FMODE_SYNC|FMODE_DUPLEX; if (convconfig) { fptr->encs = *convconfig; +#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) + if (fptr->encs.ecflags & ECONV_DEFAULT_NEWLINE_DECORATOR) { + fptr->encs.ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR; + } +#endif } - else if (NEED_NEWLINE_DECORATOR_ON_READ(fptr)) { - fptr->encs.ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR; + else { + if (NEED_NEWLINE_DECORATOR_ON_READ(fptr)) { + fptr->encs.ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR; + } +#ifdef TEXTMODE_NEWLINE_DECORATOR_ON_WRITE + if (NEED_NEWLINE_DECORATOR_ON_WRITE(fptr)) { + fptr->encs.ecflags |= TEXTMODE_NEWLINE_DECORATOR_ON_WRITE; + } +#endif } fptr->pid = pid; @@ -6421,6 +6586,12 @@ prep_stdio(FILE *f, int fmode, VALUE klass, const char *path) GetOpenFile(io, fptr); fptr->encs.ecflags |= ECONV_DEFAULT_NEWLINE_DECORATOR; +#ifdef TEXTMODE_NEWLINE_DECORATOR_ON_WRITE + fptr->encs.ecflags |= TEXTMODE_NEWLINE_DECORATOR_ON_WRITE; + if (fmode & FMODE_READABLE) { + fptr->encs.ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR; + } +#endif fptr->stdio_file = f; return io; @@ -6958,7 +7129,9 @@ argf_next_argv(VALUE argf) if (stdout_binmode) rb_io_binmode(rb_stdout); } fmode = FMODE_READABLE; - if (!ARGF.binmode) fmode |= DEFAULT_TEXTMODE; + if (!ARGF.binmode) { + fmode |= DEFAULT_TEXTMODE; + } ARGF.current_file = prep_io(fr, fmode, rb_cFile, fn); if (!NIL_P(write_io)) { rb_io_set_write_io(ARGF.current_file, write_io); @@ -6974,6 +7147,9 @@ argf_next_argv(VALUE argf) fptr->encs.ecflags &= ~ECONV_NEWLINE_DECORATOR_MASK; if (!ARGF.binmode) { fptr->encs.ecflags |= ECONV_DEFAULT_NEWLINE_DECORATOR; +#ifdef TEXTMODE_NEWLINE_DECORATOR_ON_WRITE + fptr->encs.ecflags |= TEXTMODE_NEWLINE_DECORATOR_ON_WRITE; +#endif } } ARGF.next_p = 0; @@ -8031,22 +8207,26 @@ io_encoding_set(rb_io_t *fptr, VALUE v1, VALUE v2, VALUE opt) } else enc = rb_to_encoding(v2); + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags); ecflags = rb_econv_prepare_options(opt, &ecopts, ecflags); } else { if (NIL_P(v1)) { /* Set to default encodings */ rb_io_ext_int_to_encs(NULL, NULL, &enc, &enc2); + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags); ecopts = Qnil; } else { tmp = rb_check_string_type(v1); if (!NIL_P(tmp) && rb_enc_asciicompat(rb_enc_get(tmp))) { parse_mode_enc(RSTRING_PTR(tmp), &enc, &enc2, NULL); + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags); ecflags = rb_econv_prepare_options(opt, &ecopts, ecflags); } else { rb_io_ext_int_to_encs(rb_to_encoding(v1), NULL, &enc, &enc2); + SET_UNIVERSAL_NEWLINE_DECORATOR_IF_ENC2(enc2, ecflags); ecopts = Qnil; } } @@ -8165,13 +8345,22 @@ rb_io_s_pipe(int argc, VALUE *argv, VALUE klass) extract_binmode(opt, &fmode); #if DEFAULT_TEXTMODE - if ((fptr->mode & FMODE_TEXTMODE) && (fmode & FMODE_BINMODE)) + if ((fptr->mode & FMODE_TEXTMODE) && (fmode & FMODE_BINMODE)) { fptr->mode &= ~FMODE_TEXTMODE; + setmode(fptr->fd, O_BINARY); + } +#if defined(RUBY_TEST_CRLF_ENVIRONMENT) || defined(_WIN32) + if (fptr->encs.ecflags & ECONV_DEFAULT_NEWLINE_DECORATOR) { + fptr->encs.ecflags |= ECONV_UNIVERSAL_NEWLINE_DECORATOR; + } +#endif #endif fptr->mode |= fmode; #if DEFAULT_TEXTMODE - if ((fptr2->mode & FMODE_TEXTMODE) && (fmode & FMODE_BINMODE)) + if ((fptr2->mode & FMODE_TEXTMODE) && (fmode & FMODE_BINMODE)) { fptr2->mode &= ~FMODE_TEXTMODE; + setmode(fptr2->fd, O_BINARY); + } #endif fptr2->mode |= fmode; @@ -9095,6 +9284,13 @@ copy_stream_body(VALUE arg) } stp->dst_fd = dst_fd; +#ifdef O_BINARY + if (src_fptr) + SET_BINARY_MODE_WITH_SEEK_CUR(src_fptr); + if (dst_fptr) + setmode(dst_fd, O_BINARY); +#endif + if (stp->src_offset == (off_t)-1 && src_fptr && src_fptr->rbuf.len) { size_t len = src_fptr->rbuf.len; VALUE str; diff --git a/test/rexml/test_order.rb b/test/rexml/test_order.rb index 2b66d3a..a87b1c2 100644 --- a/test/rexml/test_order.rb +++ b/test/rexml/test_order.rb @@ -43,7 +43,7 @@ END end # Provided by Tom Talbott def test_more_ordering - doc = REXML::Document.new(Zlib::GzipReader.new(File.new(fixture_path('LostineRiver.kml.gz')), encoding: 'utf-8')) + doc = REXML::Document.new(Zlib::GzipReader.open(fixture_path('LostineRiver.kml.gz'), encoding: 'utf-8')) actual = [ "Head south from Phinney Ave N", "Turn left at N 36th St", diff --git a/test/ruby/test_econv.rb b/test/ruby/test_econv.rb index 765616d..080d027 100644 --- a/test/ruby/test_econv.rb +++ b/test/ruby/test_econv.rb @@ -902,4 +902,10 @@ class TestEncodingConverter < Test::Unit::TestCase "".encode("euc-jp", :undef => :replace, :replace => broken) } end + + def test_newline_option + ec1 = Encoding::Converter.new("", "", universal_newline: true) + ec2 = Encoding::Converter.new("", "", newline: :universal) + assert_equal(ec1, ec2) + end end diff --git a/test/ruby/test_io_m17n.rb b/test/ruby/test_io_m17n.rb index 091b04a..b7460bf 100644 --- a/test/ruby/test_io_m17n.rb +++ b/test/ruby/test_io_m17n.rb @@ -2069,5 +2069,260 @@ EOT } assert(c.ascii_only?, "should be ascii_only #{bug4557}") end -end + def test_default_mode_on_dosish + with_tmpdir { + open("a", "w") {|f| f.write "\n"} + assert_equal("\r\n", IO.binread("a")) + } + end if /mswin|mingw/ =~ RUBY_PLATFORM + + def test_default_mode_on_unix + with_tmpdir { + open("a", "w") {|f| f.write "\n"} + assert_equal("\n", IO.binread("a")) + } + end unless /mswin|mingw/ =~ RUBY_PLATFORM + + def test_text_mode + with_tmpdir { + open("a", "wb") {|f| f.write "\r\n"} + assert_equal("\n", open("a", "rt"){|f| f.read}) + } + end + + def test_binary_mode + with_tmpdir { + open("a", "wb") {|f| f.write "\r\n"} + assert_equal("\r\n", open("a", "rb"){|f| f.read}) + } + end + + def test_default_stdout_stderr_mode + with_pipe do |in_r, in_w| + with_pipe do |out_r, out_w| + pid = Process.spawn({}, EnvUtil.rubybin, in: in_r, out: out_w, err: out_w) + in_r.close + out_w.close + in_w.write <<-EOS + STDOUT.puts "abc" + STDOUT.flush + STDERR.puts "def" + STDERR.flush + EOS + in_w.close + Process.wait pid + assert_equal "abc\r\ndef\r\n", out_r.binmode.read + out_r.close + end + end + end if /mswin|mingw/ =~ RUBY_PLATFORM + + def test_cr_decorator_on_stdout + with_pipe do |in_r, in_w| + with_pipe do |out_r, out_w| + pid = Process.spawn({}, EnvUtil.rubybin, in: in_r, out: out_w) + in_r.close + out_w.close + in_w.write <<-EOS + STDOUT.set_encoding('locale', nil, newline: :cr) + STDOUT.puts "abc" + STDOUT.flush + EOS + in_w.close + Process.wait pid + assert_equal "abc\r", out_r.binmode.read + out_r.close + end + end + end + + def test_lf_decorator_on_stdout + with_pipe do |in_r, in_w| + with_pipe do |out_r, out_w| + pid = Process.spawn({}, EnvUtil.rubybin, in: in_r, out: out_w) + in_r.close + out_w.close + in_w.write <<-EOS + STDOUT.set_encoding('locale', nil, newline: :lf) + STDOUT.puts "abc" + STDOUT.flush + EOS + in_w.close + Process.wait pid + assert_equal "abc\n", out_r.binmode.read + out_r.close + end + end + end + + def test_crlf_decorator_on_stdout + with_pipe do |in_r, in_w| + with_pipe do |out_r, out_w| + pid = Process.spawn({}, EnvUtil.rubybin, in: in_r, out: out_w) + in_r.close + out_w.close + in_w.write <<-EOS + STDOUT.set_encoding('locale', nil, newline: :crlf) + STDOUT.puts "abc" + STDOUT.flush + EOS + in_w.close + Process.wait pid + assert_equal "abc\r\n", out_r.binmode.read + out_r.close + end + end + end + + def test_binmode_with_pipe + with_pipe do |r, w| + src = "a\r\nb\r\nc\r\n" + w.binmode.write src + w.close + + assert_equal("a", r.getc) + assert_equal("\n", r.getc) + r.binmode + assert_equal("b", r.getc) + assert_equal("\r", r.getc) + assert_equal("\n", r.getc) + assert_equal("c", r.getc) + assert_equal("\r", r.getc) + assert_equal("\n", r.getc) + assert_equal(nil, r.getc) + r.close + end + end if /mswin|mingw/ =~ RUBY_PLATFORM + + def test_stdin_binmode + with_pipe do |in_r, in_w| + with_pipe do |out_r, out_w| + pid = Process.spawn({}, EnvUtil.rubybin, '-e', <<-'End', in: in_r, out: out_w) + STDOUT.binmode + STDOUT.write STDIN.getc + STDOUT.write STDIN.getc + STDIN.binmode + STDOUT.write STDIN.getc + STDOUT.write STDIN.getc + STDOUT.write STDIN.getc + STDOUT.write STDIN.getc + STDOUT.write STDIN.getc + STDOUT.write STDIN.getc + STDOUT.write STDIN.getc + End + in_r.close + out_w.close + src = "a\r\nb\r\nc\r\n" + in_w.binmode.write src + in_w.close + Process.wait pid + assert_equal "a\nb\r\nc\r\n", out_r.binmode.read + out_r.close + end + end + end if /mswin|mingw/ =~ RUBY_PLATFORM + + def test_read_with_length + with_tmpdir { + str = "a\nb" + generate_file("tmp", str) + open("tmp", "r") do |f| + assert_equal(str, f.read(3)) + end + } + end if /mswin|mingw/ =~ RUBY_PLATFORM + + def test_read_with_length_binmode + with_tmpdir { + str = "a\r\nb\r\nc\r\n\r\n" + generate_file("tmp", str) + open("tmp", "r") do |f| + # read with length should be binary mode + assert_equal("a\r\n", f.read(3)) # binary + assert_equal("b\nc\n\n", f.read) # text + end + } + end if /mswin|mingw/ =~ RUBY_PLATFORM + + def test_gets_and_read_with_binmode + with_tmpdir { + str = "a\r\nb\r\nc\r\n\n\r\n" + generate_file("tmp", str) + open("tmp", "r") do |f| + assert_equal("a\n", f.gets) # text + assert_equal("b\r\n", f.read(3)) # binary + assert_equal("c\r\n", f.read(3)) # binary + assert_equal("\n\n", f.read) # text + end + } + end if /mswin|mingw/ =~ RUBY_PLATFORM + + def test_getc_and_read_with_binmode + with_tmpdir { + str = "a\r\nb\r\nc\n\n\r\n\r\n" + generate_file("tmp", str) + open("tmp", "r") do |f| + assert_equal("a", f.getc) # text + assert_equal("\n", f.getc) # text + assert_equal("b\r\n", f.read(3)) # binary + assert_equal("c\n\n\n\n", f.read) # text + end + } + end if /mswin|mingw/ =~ RUBY_PLATFORM + + def test_read_with_binmode_and_gets + with_tmpdir { + str = "a\r\nb\r\nc\r\n\r\n" + open("tmp", "wb") { |f| f.write str } + open("tmp", "r") do |f| + assert_equal("a", f.getc) # text + assert_equal("\n", f.getc) # text + assert_equal("b\r\n", f.read(3)) # binary + assert_equal("c\n", f.gets) # text + assert_equal("\n", f.gets) # text + end + } + end if /mswin|mingw/ =~ RUBY_PLATFORM + + def test_read_with_binmode_and_getc + with_tmpdir { + str = "a\r\nb\r\nc\r\n\r\n" + open("tmp", "wb") { |f| f.write str } + open("tmp", "r") do |f| + assert_equal("a", f.getc) # text + assert_equal("\n", f.getc) # text + assert_equal("b\r\n", f.read(3)) # binary + assert_equal("c", f.getc) # text + assert_equal("\n", f.getc) # text + assert_equal("\n", f.getc) # text + end + } + end if /mswin|mingw/ =~ RUBY_PLATFORM + + def test_read_write_with_binmode + with_tmpdir { + str = "a\r\n" + generate_file("tmp", str) + open("tmp", "r+") do |f| + assert_equal("a\r\n", f.read(3)) # binary + f.write("b\n\n"); # text + f.rewind + assert_equal("a\nb\n\n", f.read) # text + f.rewind + assert_equal("a\r\nb\r\n\r\n", f.binmode.read) # binary + end + } + end if /mswin|mingw/ =~ RUBY_PLATFORM + + def test_seek_with_setting_binmode + with_tmpdir { + str = "a\r\nb\r\nc\r\n\r\n\n\n\n\n\n\n\n" + generate_file("tmp", str) + open("tmp", "r") do |f| + assert_equal("a\n", f.gets) # text + assert_equal("b\r\n", f.read(3)) # binary + end + } + end if /mswin|mingw/ =~ RUBY_PLATFORM +end diff --git a/transcode.c b/transcode.c index 4d9462f..f87fd56 100644 --- a/transcode.c +++ b/transcode.c @@ -15,6 +15,8 @@ #include "transcode_data.h" #include +#define ENABLE_ECONV_NEWLINE_OPTION 1 + /* VALUE rb_cEncoding = rb_define_class("Encoding", rb_cObject); */ VALUE rb_eUndefinedConversionError; VALUE rb_eInvalidByteSequenceError; @@ -1888,6 +1890,7 @@ rb_econv_add_converter(rb_econv_t *ec, const char *sname, const char *dname, int return -1; tr = load_transcoder_entry(entry); + if (!tr) return -1; return rb_econv_add_transcoder_at(ec, tr, n); } @@ -3111,11 +3114,15 @@ search_convpath_i(const char *sname, const char *dname, int depth, void *arg) * # [#, #]] * * p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP", universal_newline: true) + * or + * p Encoding::Converter.search_convpath("ISO-8859-1", "EUC-JP", newline: :universal) * #=> [[#, #], * # [#, #], * # "universal_newline"] * * p Encoding::Converter.search_convpath("ISO-8859-1", "UTF-32BE", universal_newline: true) + * or + * p Encoding::Converter.search_convpath("ISO-8859-1", "UTF-32BE", newline: :universal) * #=> [[#, #], * # "universal_newline", * # [#, #]] @@ -3260,6 +3267,9 @@ rb_econv_init_by_convpath(VALUE self, VALUE convpath, * :undef => nil # raise error on undefined conversion (default) * :undef => :replace # replace undefined conversion * :replace => string # replacement string ("?" or "\uFFFD" if not specified) + * :newline => :universal # decorator for converting CRLF and CR to LF + * :newline => :crlf # decorator for converting LF to CRLF + * :newline => :cr # decorator for converting LF to CR * :universal_newline => true # decorator for converting CRLF and CR to LF * :crlf_newline => true # decorator for converting LF to CRLF * :cr_newline => true # decorator for converting LF to CR @@ -3508,6 +3518,45 @@ econv_convpath(VALUE self) return result; } +/* + * call-seq: + * ec == other -> true or false + */ +static VALUE +econv_equal(VALUE self, VALUE other) +{ + rb_econv_t *ec1 = check_econv(self); + rb_econv_t *ec2; + int i; + + if (!rb_typeddata_is_kind_of(other, &econv_data_type)) { + return Qnil; + } + ec2 = DATA_PTR(other); + if (!ec2) return Qfalse; + if (ec1->source_encoding_name != ec2->source_encoding_name && + strcmp(ec1->source_encoding_name, ec2->source_encoding_name)) + return Qfalse; + if (ec1->destination_encoding_name != ec2->destination_encoding_name && + strcmp(ec1->destination_encoding_name, ec2->destination_encoding_name)) + return Qfalse; + if (ec1->flags != ec2->flags) return Qfalse; + if (ec1->replacement_enc != ec2->replacement_enc && + strcmp(ec1->replacement_enc, ec2->replacement_enc)) + return Qfalse; + if (ec1->replacement_len != ec2->replacement_len) return Qfalse; + if (ec1->replacement_str != ec2->replacement_str && + memcmp(ec1->replacement_str, ec2->replacement_str, ec2->replacement_len)) + return Qfalse; + + if (ec1->num_trans != ec2->num_trans) return Qfalse; + for (i = 0; i < ec1->num_trans; i++) { + if (ec1->elems[i].tc->transcoder != ec2->elems[i].tc->transcoder) + return Qfalse; + } + return Qtrue; +} + static VALUE econv_result_to_symbol(rb_econv_result_t res) { @@ -4387,6 +4436,7 @@ Init_transcode(void) rb_define_method(rb_cEncodingConverter, "last_error", econv_last_error, 0); rb_define_method(rb_cEncodingConverter, "replacement", econv_get_replacement, 0); rb_define_method(rb_cEncodingConverter, "replacement=", econv_set_replacement, 1); + rb_define_method(rb_cEncodingConverter, "==", econv_equal, 1); rb_define_const(rb_cEncodingConverter, "INVALID_MASK", INT2FIX(ECONV_INVALID_MASK)); rb_define_const(rb_cEncodingConverter, "INVALID_REPLACE", INT2FIX(ECONV_INVALID_REPLACE)); diff --git a/win32/win32.c b/win32/win32.c index 759762b..f2bf3c2 100644 --- a/win32/win32.c +++ b/win32/win32.c @@ -2128,7 +2128,7 @@ init_stdhandle(void) int keep = 0; #define open_null(fd) \ (((nullfd < 0) ? \ - (nullfd = open("NUL", O_RDWR|O_BINARY)) : 0), \ + (nullfd = open("NUL", O_RDWR)) : 0), \ ((nullfd == (fd)) ? (keep = 1) : dup2(nullfd, fd)), \ (fd)) @@ -2141,15 +2141,9 @@ init_stdhandle(void) if (fileno(stdout) < 0) { stdout->_file = open_null(1); } - else { - setmode(fileno(stdout), O_BINARY); - } if (fileno(stderr) < 0) { stderr->_file = open_null(2); } - else { - setmode(fileno(stderr), O_BINARY); - } if (nullfd >= 0 && !keep) close(nullfd); setvbuf(stderr, NULL, _IONBF, 0); } @@ -5321,7 +5315,8 @@ rb_w32_write(int fd, const void *buf, size_t size) return -1; } - if (_osfile(fd) & FTEXT) { + if ((_osfile(fd) & FTEXT) && + (!(_osfile(fd) & FPIPE) || fd == fileno(stdout) || fd == fileno(stderr))) { return _write(fd, buf, size); } @@ -5822,3 +5817,8 @@ rb_w32_inet_ntop(int af, void *addr, char *numaddr, size_t numaddr_len) } return numaddr; } + +char +rb_w32_fd_is_text(int fd) { + return _osfile(fd) & FTEXT; +}