From 32a69f379087a1588473dc05c7949c00a87ace9d Mon Sep 17 00:00:00 2001 From: Jeremy Evans Date: Tue, 26 Jan 2016 10:32:59 -0800 Subject: [PATCH] Add String.buffer, for creating strings with large capacities If you know you are going to need to create a large string, it's better to create it with a large capacity. Otherwise, ruby will need to continuously resize the string as it grows. For example, if you will be producing a string that is 100000 bytes, String.buffer(100000) will avoid 10 separate resizes compared to using String.new. Performance-wise, String.new is 1.33x slower than String.buffer(100000) if appending in 1000 byte chunks, and 1.64x slower than String.buffer(1000) if appending in 100 byte chunks. To make sure this works correctly with string subclasses, a static rb_str_buf_new_with_class function is added, which both String.buffer and rb_str_buf_new now call. --- string.c | 29 ++++++++++++++++++++++++++--- test/ruby/test_string.rb | 9 +++++++++ 2 files changed, 35 insertions(+), 3 deletions(-) diff --git a/string.c b/string.c index b499fa2..052ee40 100644 --- a/string.c +++ b/string.c @@ -1124,10 +1124,10 @@ str_new_empty(VALUE str) #define STR_BUF_MIN_SIZE 128 -VALUE -rb_str_buf_new(long capa) +static VALUE +rb_str_buf_new_with_class(VALUE obj, long capa) { - VALUE str = str_alloc(rb_cString); + VALUE str = str_alloc(obj); if (capa < STR_BUF_MIN_SIZE) { capa = STR_BUF_MIN_SIZE; @@ -1141,6 +1141,12 @@ rb_str_buf_new(long capa) } VALUE +rb_str_buf_new(long capa) +{ + rb_str_buf_new_with_class(rb_cString, capa); +} + +VALUE rb_str_buf_new_cstr(const char *ptr) { VALUE str; @@ -1971,6 +1977,22 @@ rb_str_s_try_convert(VALUE dummy, VALUE str) return rb_check_string_type(str); } +/* + * call-seq: + * String.buffer(capacity) -> string + * + * Creates empty string with the given internal capacity. This + * can increase performance compared to using String.new, if you + * will be concatenating a large amount of data to the string. + * + * String.buffer(100000) #=> "" + */ +static VALUE +rb_str_s_buffer(VALUE obj, VALUE capacity) +{ + return rb_str_buf_new_with_class(obj, NUM2LONG(capacity)); +} + static char* str_nth_len(const char *p, const char *e, long *nthp, rb_encoding *enc) { @@ -9523,6 +9545,7 @@ Init_String(void) rb_include_module(rb_cString, rb_mComparable); rb_define_alloc_func(rb_cString, empty_str_alloc); rb_define_singleton_method(rb_cString, "try_convert", rb_str_s_try_convert, 1); + rb_define_singleton_method(rb_cString, "buffer", rb_str_s_buffer, 1); rb_define_method(rb_cString, "initialize", rb_str_init, -1); rb_define_method(rb_cString, "initialize_copy", rb_str_replace, 1); rb_define_method(rb_cString, "<=>", rb_str_cmp_m, 1); diff --git a/test/ruby/test_string.rb b/test/ruby/test_string.rb index f1633e3..aa52670 100644 --- a/test/ruby/test_string.rb +++ b/test/ruby/test_string.rb @@ -47,6 +47,15 @@ def test_s_new assert_equal(Encoding::UTF_8, S(src, encoding: "utf-8").encoding) end + def test_s_buffer + assert_equal("", @cls.buffer(1000)) + assert_equal(@cls, @cls.buffer(1000).class) + assert_operator(ObjectSpace.memsize_of(@cls.buffer(10000)), :>, ObjectSpace.memsize_of(@cls.buffer(1000))) + + assert_equal("", @cls.buffer(-1000)) + assert_equal(ObjectSpace.memsize_of(@cls.buffer(-10000)), ObjectSpace.memsize_of(@cls.buffer(-1000))) + end + def test_AREF # '[]' assert_equal("A", S("AooBar")[0]) assert_equal("B", S("FooBaB")[-1]) -- 2.6.4