Index: ext/zlib/zlib.c =================================================================== --- ext/zlib/zlib.c (revision 36146) +++ ext/zlib/zlib.c (working copy) @@ -543,12 +543,14 @@ struct zstream { #define ZSTREAM_FLAG_IN_STREAM 0x2 #define ZSTREAM_FLAG_FINISHED 0x4 #define ZSTREAM_FLAG_CLOSING 0x8 -#define ZSTREAM_FLAG_UNUSED 0x10 +#define ZSTREAM_FLAG_GZFILE 0x10 +#define ZSTREAM_FLAG_UNUSED 0x20 #define ZSTREAM_READY(z) ((z)->flags |= ZSTREAM_FLAG_READY) #define ZSTREAM_IS_READY(z) ((z)->flags & ZSTREAM_FLAG_READY) #define ZSTREAM_IS_FINISHED(z) ((z)->flags & ZSTREAM_FLAG_FINISHED) #define ZSTREAM_IS_CLOSING(z) ((z)->flags & ZSTREAM_FLAG_CLOSING) +#define ZSTREAM_IS_GZFILE(z) ((z)->flags & ZSTREAM_FLAG_GZFILE) /* I think that more better value should be found, but I gave up finding it. B) */ @@ -607,33 +609,51 @@ zstream_init(struct zstream *z, const st static void zstream_expand_buffer(struct zstream *z) { - long inc; - if (NIL_P(z->buf)) { - /* I uses rb_str_new here not rb_str_buf_new because - rb_str_buf_new makes a zero-length string. */ - z->buf = rb_str_new(0, ZSTREAM_INITIAL_BUFSIZE); - z->buf_filled = 0; - z->stream.next_out = (Bytef*)RSTRING_PTR(z->buf); - z->stream.avail_out = ZSTREAM_INITIAL_BUFSIZE; - RBASIC(z->buf)->klass = 0; + zstream_expand_buffer_into(z, ZSTREAM_INITIAL_BUFSIZE); return; } - if (RSTRING_LEN(z->buf) - z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) { - /* to keep other threads from freezing */ - z->stream.avail_out = ZSTREAM_AVAIL_OUT_STEP_MAX; + if (!ZSTREAM_IS_GZFILE(z) && rb_block_given_p()) { + if (z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) { + int state = 0; + VALUE self = (VALUE)z->stream.opaque; + + rb_str_resize(z->buf, z->buf_filled); + RBASIC(z->buf)->klass = rb_cString; + OBJ_INFECT(z->buf, self); + + rb_protect(rb_yield, z->buf, &state); + + z->buf = Qnil; + zstream_expand_buffer_into(z, ZSTREAM_AVAIL_OUT_STEP_MAX); + + if (state) + rb_jump_tag(state); + + return; + } + else { + zstream_expand_buffer_into(z, + ZSTREAM_AVAIL_OUT_STEP_MAX - z->buf_filled); + } } else { - inc = z->buf_filled / 2; - if (inc < ZSTREAM_AVAIL_OUT_STEP_MIN) { - inc = ZSTREAM_AVAIL_OUT_STEP_MIN; - } - rb_str_resize(z->buf, z->buf_filled + inc); - z->stream.avail_out = (inc < ZSTREAM_AVAIL_OUT_STEP_MAX) ? - (int)inc : ZSTREAM_AVAIL_OUT_STEP_MAX; + if (RSTRING_LEN(z->buf) - z->buf_filled >= ZSTREAM_AVAIL_OUT_STEP_MAX) { + /* to keep other threads from freezing */ + z->stream.avail_out = ZSTREAM_AVAIL_OUT_STEP_MAX; + } + else { + long inc = z->buf_filled / 2; + if (inc < ZSTREAM_AVAIL_OUT_STEP_MIN) { + inc = ZSTREAM_AVAIL_OUT_STEP_MIN; + } + rb_str_resize(z->buf, z->buf_filled + inc); + z->stream.avail_out = (inc < ZSTREAM_AVAIL_OUT_STEP_MAX) ? + (int)inc : ZSTREAM_AVAIL_OUT_STEP_MAX; + } + z->stream.next_out = (Bytef*)RSTRING_PTR(z->buf) + z->buf_filled; } - z->stream.next_out = (Bytef*)RSTRING_PTR(z->buf) + z->buf_filled; } static void @@ -691,7 +711,7 @@ zstream_append_buffer(struct zstream *z, static VALUE zstream_detach_buffer(struct zstream *z) { - VALUE dst; + VALUE dst, self = (VALUE)z->stream.opaque; if (NIL_P(z->buf)) { dst = rb_str_new(0, 0); @@ -702,10 +722,18 @@ zstream_detach_buffer(struct zstream *z) RBASIC(dst)->klass = rb_cString; } + OBJ_INFECT(dst, self); + z->buf = Qnil; z->buf_filled = 0; z->stream.next_out = 0; z->stream.avail_out = 0; + + if (!ZSTREAM_IS_GZFILE(z) && rb_block_given_p()) { + rb_yield(dst); + dst = Qnil; + } + return dst; } @@ -871,35 +899,16 @@ zstream_end(struct zstream *z) return Qnil; } -static void -zstream_run(struct zstream *z, Bytef *src, long len, int flush) -{ +static VALUE +zstream_run_loop(VALUE loop_args) { + struct zstream *z; uInt n; - int err; - volatile VALUE guard = Qnil; - - if (NIL_P(z->input) && len == 0) { - z->stream.next_in = (Bytef*)""; - z->stream.avail_in = 0; - } - else { - zstream_append_input(z, src, len); - z->stream.next_in = (Bytef*)RSTRING_PTR(z->input); - z->stream.avail_in = MAX_UINT(RSTRING_LEN(z->input)); - /* keep reference to `z->input' so as not to be garbage collected - after zstream_reset_input() and prevent `z->stream.next_in' - from dangling. */ - guard = z->input; - } + int err, flush; - if (z->stream.avail_out == 0) { - zstream_expand_buffer(z); - } + z = (struct zstream *)((VALUE *)loop_args)[0]; + flush = (int)((VALUE*)loop_args)[1]; for (;;) { - /* VC allocates err and guard to same address. accessing err and guard - in same scope prevents it. */ - RB_GC_GUARD(guard); n = z->stream.avail_out; err = z->func->run(&z->stream, flush); z->buf_filled += n - z->stream.avail_out; @@ -929,7 +938,11 @@ zstream_run(struct zstream *z, Bytef *sr continue; } } - raise_zlib_error(err, z->stream.msg); + + /* Z_BUF_ERROR with Z_FINISH is normal, continue expanding output + * buffer */ + if (flush != Z_FINISH && err != Z_BUF_ERROR) + raise_zlib_error(err, z->stream.msg); } if (z->stream.avail_out > 0) { z->flags |= ZSTREAM_FLAG_IN_STREAM; @@ -938,11 +951,47 @@ zstream_run(struct zstream *z, Bytef *sr zstream_expand_buffer(z); } + return Qnil; +} + +static void +zstream_run(struct zstream *z, Bytef *src, long len, int flush) +{ + int state; + VALUE loop_args[2]; + volatile VALUE guard = Qnil; + + if (NIL_P(z->input) && len == 0) { + z->stream.next_in = (Bytef*)""; + z->stream.avail_in = 0; + } + else { + zstream_append_input(z, src, len); + z->stream.next_in = (Bytef*)RSTRING_PTR(z->input); + z->stream.avail_in = MAX_UINT(RSTRING_LEN(z->input)); + /* keep reference to `z->input' so as not to be garbage collected + after zstream_reset_input() and prevent `z->stream.next_in' + from dangling. */ + guard = z->input; + } + + if (z->stream.avail_out == 0) { + zstream_expand_buffer(z); + } + + loop_args[0] = (VALUE)z; + loop_args[1] = (VALUE)flush; + + rb_protect(zstream_run_loop, (VALUE)loop_args, &state); + zstream_reset_input(z); if (z->stream.avail_in > 0) { zstream_append_input(z, z->stream.next_in, z->stream.avail_in); - guard = Qnil; /* prevent tail call to make guard effective */ + guard = Qnil; /* prevent tail call to make guard effective */ } + + if (state) + rb_jump_tag(state); } static VALUE @@ -1125,20 +1174,22 @@ rb_zstream_reset(VALUE obj) } /* - * Finishes the stream and flushes output buffer. See Zlib::Deflate#finish and - * Zlib::Inflate#finish for details of this behavior. + * call-seq: + * finish -> String + * finish { |chunk| ... } -> nil + * + * Finishes the stream and flushes output buffer. If a block is given each + * chunk is yielded to the block until the input buffer has been flushed to + * the output buffer. */ static VALUE rb_zstream_finish(VALUE obj) { struct zstream *z = get_zstream(obj); - VALUE dst; zstream_run(z, (Bytef*)"", 0, Z_FINISH); - dst = zstream_detach_buffer(z); - OBJ_INFECT(dst, obj); - return dst; + return zstream_detach_buffer(z); } /* @@ -1157,18 +1208,22 @@ rb_zstream_flush_next_in(VALUE obj) } /* - * Flushes output buffer and returns all data in that buffer. + * call-seq: + * flush_next_out -> String + * flush_next_out { |chunk| ... } -> nil + * + * Flushes output buffer and returns all data in that buffer. If a block is + * given each chunk is yielded to the block until the current output buffer + * has been flushed. */ static VALUE rb_zstream_flush_next_out(VALUE obj) { struct zstream *z; - VALUE dst; Data_Get_Struct(obj, struct zstream, z); - dst = zstream_detach_buffer(z); - OBJ_INFECT(dst, obj); - return dst; + + return zstream_detach_buffer(z); } /* @@ -1426,13 +1481,13 @@ deflate_run(VALUE args) /* * Document-method: Zlib::Deflate.deflate * - * call-seq: Zlib.deflate(string[, level]) - * Zlib::Deflate.deflate(string[, level]) + * call-seq: + * Zlib.deflate(string[, level]) + * Zlib::Deflate.deflate(string[, level]) * * Compresses the given +string+. Valid values of level are - * NO_COMPRESSION, BEST_SPEED, - * BEST_COMPRESSION, DEFAULT_COMPRESSION, and an - * integer from 0 to 9 (the default is 6). + * Zlib::NO_COMPRESSION, Zlib::BEST_SPEED, * Zlib::BEST_COMPRESSION, + * Zlib::DEFAULT_COMPRESSION, or an integer from 0 to 9 (the default is 6). * * This method is almost equivalent to the following code: * @@ -1486,17 +1541,19 @@ do_deflate(struct zstream *z, VALUE src, } /* - * Document-method: Zlib#deflate + * Document-method: Zlib::Deflate#deflate * * call-seq: - * deflate(string, flush = Zlib::NO_FLUSH) + * z.deflate(string, flush = Zlib::NO_FLUSH) -> String + * z.deflate(string, flush = Zlib::NO_FLUSH) { |chunk| ... } -> nil * * Inputs +string+ into the deflate stream and returns the output from the * stream. On calling this method, both the input and the output buffers of - * the stream are flushed. + * the stream are flushed. If +string+ is nil, this method finishes the + * stream, just like Zlib::ZStream#finish. * - * If +string+ is nil, this method finishes the stream, just like - * Zlib::ZStream#finish. + * If a block is given consecutive deflated chunks from the +string+ are + * yielded to the block and +nil+ is returned. * * The +flush+ parameter specifies the flush mode. The following constants * may be used: @@ -1513,15 +1570,13 @@ static VALUE rb_deflate_deflate(int argc, VALUE *argv, VALUE obj) { struct zstream *z = get_zstream(obj); - VALUE src, flush, dst; + VALUE src, flush; rb_scan_args(argc, argv, "11", &src, &flush); OBJ_INFECT(obj, src); do_deflate(z, src, ARG_FLUSH(flush)); - dst = zstream_detach_buffer(z); - OBJ_INFECT(dst, obj); - return dst; + return zstream_detach_buffer(z); } /* @@ -1545,10 +1600,13 @@ rb_deflate_addstr(VALUE obj, VALUE src) * Document-method: Zlib::Deflate#flush * * call-seq: - * flush(flush = Zlib::SYNC_FLUSH) + * flush(flush = Zlib::SYNC_FLUSH) -> String + * flush(flush = Zlib::SYNC_FLUSH) { |chunk| ... } -> nil * * This method is equivalent to deflate('', flush). This method is - * just provided to improve the readability of your Ruby program. + * just provided to improve the readability of your Ruby program. If a block + * is given chunks of deflate output are yielded to the block until the buffer + * is flushed. * * See Zlib::Deflate#deflate for detail on the +flush+ constants NO_FLUSH, * SYNC_FLUSH, FULL_FLUSH and FINISH. @@ -1557,7 +1615,7 @@ static VALUE rb_deflate_flush(int argc, VALUE *argv, VALUE obj) { struct zstream *z = get_zstream(obj); - VALUE v_flush, dst; + VALUE v_flush; int flush; rb_scan_args(argc, argv, "01", &v_flush); @@ -1565,10 +1623,7 @@ rb_deflate_flush(int argc, VALUE *argv, if (flush != Z_NO_FLUSH) { /* prevent Z_BUF_ERROR */ zstream_run(z, (Bytef*)"", 0, flush); } - dst = zstream_detach_buffer(z); - - OBJ_INFECT(dst, obj); - return dst; + return zstream_detach_buffer(z); } /* @@ -1738,9 +1793,11 @@ inflate_run(VALUE args) } /* - * Document-method: Zlib::Inflate.inflate + * Document-method: Zlib::inflate * - * call-seq: Zlib::Inflate.inflate(string) + * call-seq: + * Zlib.inflate(string) + * Zlib::Inflate.inflate(string) * * Decompresses +string+. Raises a Zlib::NeedDict exception if a preset * dictionary is needed for decompression. @@ -1816,12 +1873,17 @@ rb_inflate_add_dictionary(VALUE obj, VAL /* * Document-method: Zlib::Inflate#inflate * - * call-seq: inflate(string) + * call-seq: + * inflate(deflate_string) -> String + * inflate(deflate_string) { |chunk| ... } -> nil + * + * Inputs +deflate_string+ into the inflate stream and returns the output from + * the stream. Calling this method, both the input and the output buffer of + * the stream are flushed. If string is +nil+, this method finishes the + * stream, just like Zlib::ZStream#finish. * - * Inputs +string+ into the inflate stream and returns the output from the - * stream. Calling this method, both the input and the output buffer of the - * stream are flushed. If string is +nil+, this method finishes the stream, - * just like Zlib::ZStream#finish. + * If a block is given consecutive inflated chunks from the +deflate_string+ + * are yielded to the block and +nil+ is returned. * * Raises a Zlib::NeedDict exception if a preset dictionary is needed to * decompress. Set the dictionary by Zlib::Inflate#set_dictionary and then @@ -1858,9 +1920,11 @@ rb_inflate_inflate(VALUE obj, VALUE src) dst = zstream_detach_buffer(z); } else { + VALUE self = (VALUE)z->stream.opaque; StringValue(src); zstream_append_buffer2(z, src); dst = rb_str_new(0, 0); + OBJ_INFECT(dst, self); } } else { @@ -1871,7 +1935,6 @@ rb_inflate_inflate(VALUE obj, VALUE src) } } - OBJ_INFECT(dst, obj); return dst; } @@ -2095,6 +2158,7 @@ gzfile_new(klass, funcs, endfunc) obj = Data_Make_Struct(klass, struct gzfile, gzfile_mark, gzfile_free, gz); zstream_init(&gz->z, funcs); + gz->z.flags |= ZSTREAM_FLAG_GZFILE; gz->io = Qnil; gz->level = 0; gz->mtime = 0; Index: test/zlib/test_zlib.rb =================================================================== --- test/zlib/test_zlib.rb (revision 36146) +++ test/zlib/test_zlib.rb (working copy) @@ -39,6 +39,62 @@ if defined? Zlib assert_raise(Zlib::StreamError) { Zlib::Deflate.deflate("foo", 10000) } end + def test_deflate_chunked + original = '' + chunks = [] + r = Random.new 0 + + z = Zlib::Deflate.new + + 2.times do + input = r.bytes(16384) + original << input + z.deflate(input) do |chunk| + chunks << chunk + end + end + + assert_equal [2, 16384, 10], + chunks.map { |chunk| chunk.length } + + final = z.finish + + assert_equal 16388, final.length + + all = chunks.join + all << final + + inflated = Zlib.inflate all + + assert_equal original, inflated + end + + def test_deflate_chunked_break + chunks = [] + r = Random.new 0 + + z = Zlib::Deflate.new + + input = r.bytes(16384) + z.deflate(input) do |chunk| + chunks << chunk + break + end + + assert_equal [2], chunks.map { |chunk| chunk.length } + + final = z.finish + + assert_equal 16393, final.length + + all = chunks.join + all << final + + original = Zlib.inflate all + + assert_equal input, original + end + def test_addstr z = Zlib::Deflate.new z << "foo" @@ -202,6 +258,38 @@ if defined? Zlib assert_equal "foofoofoo", out end + def test_finish_chunked + # zeros = Zlib::Deflate.deflate("0" * 100_000) + zeros = "x\234\355\3011\001\000\000\000\302\240J\353\237\316\032\036@" \ + "\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\257\006\351\247BH" + + chunks = [] + + z = Zlib::Inflate.new + + z.inflate(zeros) do |chunk| + chunks << chunk + break + end + + z.finish do |chunk| + chunks << chunk + end + + assert_equal [16384, 16384, 16384, 16384, 16384, 16384, 1696], + chunks.map { |chunk| chunk.size } + + assert chunks.all? { |chunk| + chunk =~ /\A0+\z/ + } + end + def test_inflate s = Zlib::Deflate.deflate("foo") z = Zlib::Inflate.new @@ -212,6 +300,58 @@ if defined? Zlib z << "foo" # ??? end + def test_inflate_chunked + # s = Zlib::Deflate.deflate("0" * 100_000) + zeros = "x\234\355\3011\001\000\000\000\302\240J\353\237\316\032\036@" \ + "\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\257\006\351\247BH" + + chunks = [] + + z = Zlib::Inflate.new + + z.inflate(zeros) do |chunk| + chunks << chunk + end + + assert_equal [16384, 16384, 16384, 16384, 16384, 16384, 1696], + chunks.map { |chunk| chunk.size } + + assert chunks.all? { |chunk| + chunk =~ /\A0+\z/ + } + end + + def test_inflate_chunked_break + # zeros = Zlib::Deflate.deflate("0" * 100_000) + zeros = "x\234\355\3011\001\000\000\000\302\240J\353\237\316\032\036@" \ + "\001\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\000\000\000\000\000\000\000\000" \ + "\000\000\000\000\000\000\000\257\006\351\247BH" + + chunks = [] + + z = Zlib::Inflate.new + + z.inflate(zeros) do |chunk| + chunks << chunk + break + end + + out = z.inflate nil + + assert_equal 100_000 - chunks.first.length, out.length + end + def test_inflate_dictionary dictionary = "foo"