Project

General

Profile

Actions

Bug #8148

closed

[patch] reduce allocations due to __FILE__ and {class,module}_eval

Added by tmm1 (Aman Karmani) over 11 years ago. Updated over 11 years ago.

Status:
Rejected
Target version:
-
ruby -v:
ruby 2.1.0dev (2013-03-22 trunk 39875) [x86_64-darwin12.2.1]
Backport:
[ruby-core:53635]

Description

In my application, I noticed many long lived "(eval)" strings on the heap.

I also noticed many copies of filename strings when using module_eval(.., FILE, ..). Attached patch adds a new NODE_FILE to re-use the original path string.

./ruby -Ilib -rpp -rforwardable -e '
module Test
extend Forwardable
def_delegators :@test, *("a".."z")
end

GC.start
strings = ObjectSpace.each_object(String).select{ |s| s.frozen? && s =~ /forwardable/ }
p strings.size
pp strings.inject(Hash.new 0){ |h,s| h[s]+=1; h }
'

before:

33
{"/Users/test/code/ruby-trunk/lib/forwardable.rb"=>31,
 "forwardable"=>1,
 "/Users/test/code/ruby\\-trunk/lib/forwardable\\.rb"=>1}

after:

4
{"/Users/test/code/ruby-trunk/lib/forwardable.rb"=>2,
 "forwardable"=>1,
 "/Users/test/code/ruby\\-trunk/lib/forwardable\\.rb"=>1}

diff --git a/compile.c b/compile.c
index 9360f5b..54733d9 100644
--- a/compile.c
+++ b/compile.c
@@ -5149,6 +5149,12 @@ iseq_compile_each(rb_iseq_t *iseq, LINK_ANCHOR *ret, NODE * node, int poped)
}
break;
}

  •  case NODE_FILE:{
    
  • if (!poped) {
  •   ADD_INSN1(ret, line, putobject, iseq->location.path);
    
  • }
  • break;
  •  }
     case NODE_ERRINFO:{
    
    if (!poped) {
    if (iseq->type == ISEQ_TYPE_RESCUE) {
    diff --git a/ext/objspace/objspace.c b/ext/objspace/objspace.c
    index 720c918..2f32208 100644
    --- a/ext/objspace/objspace.c
    +++ b/ext/objspace/objspace.c
    @@ -530,6 +530,7 @@ count_nodes(int argc, VALUE *argv, VALUE os)
    COUNT_NODE(NODE_NIL);
    COUNT_NODE(NODE_TRUE);
    COUNT_NODE(NODE_FALSE);
  •   COUNT_NODE(NODE_FILE);
      COUNT_NODE(NODE_ERRINFO);
      COUNT_NODE(NODE_DEFINED);
      COUNT_NODE(NODE_POSTEXE);
    

diff --git a/node.h b/node.h
index 0fa7254..3f4345e 100644
--- a/node.h
+++ b/node.h
@@ -232,6 +232,8 @@ enum node_type {
#define NODE_PRELUDE NODE_PRELUDE
NODE_LAMBDA,
#define NODE_LAMBDA NODE_LAMBDA

  • NODE_FILE,
    +#define NODE_FILE NODE_FILE
    NODE_LAST
    #define NODE_LAST NODE_LAST
    };
    @@ -450,6 +452,7 @@ typedef struct RNode {
    #define NEW_NIL() NEW_NODE(NODE_NIL,0,0,0)
    #define NEW_TRUE() NEW_NODE(NODE_TRUE,0,0,0)
    #define NEW_FALSE() NEW_NODE(NODE_FALSE,0,0,0)
    +#define NEW_FILE() NEW_NODE(NODE_FILE,0,0,0)
    #define NEW_ERRINFO() NEW_NODE(NODE_ERRINFO,0,0,0)
    #define NEW_DEFINED(e) NEW_NODE(NODE_DEFINED,e,0,0)
    #define NEW_PREEXE(b) NEW_SCOPE(b)
    diff --git a/parse.y b/parse.y
    index 0f0fbe8..a177ca0 100644
    --- a/parse.y
    +++ b/parse.y
    @@ -8429,8 +8429,7 @@ gettable_gen(struct parser_params *parser, ID id)
    case keyword_false:
    return NEW_FALSE();
    case keyword__FILE__:
  • return NEW_STR(rb_external_str_new_with_enc(ruby_sourcefile, strlen(ruby_sourcefile),
  •   				    rb_filesystem_encoding()));
    
  • return NEW_FILE();
    case keyword__LINE__:
    return NEW_LIT(INT2FIX(tokline));
    case keyword__ENCODING__:
    diff --git a/test/ruby/test_literal.rb b/test/ruby/test_literal.rb
    index e4c35e0..1dbcf8e 100644
    --- a/test/ruby/test_literal.rb
    +++ b/test/ruby/test_literal.rb
    @@ -362,6 +362,7 @@ class TestRubyLiteral < Test::Unit::TestCase
    def test__FILE__
    assert_instance_of String, FILE
    assert_equal FILE, FILE
  • assert_equal FILE.object_id, FILE.object_id
    assert_equal 'test_literal.rb', File.basename(FILE)
    end

diff --git a/test/rubygems/test_gem.rb b/test/rubygems/test_gem.rb
index b6c7465..1b2ee37 100644
--- a/test/rubygems/test_gem.rb
+++ b/test/rubygems/test_gem.rb
@@ -1489,7 +1489,7 @@ class TestGem < Gem::TestCase
assert_equal [a,b,c], Gem.detect_gemdeps
end

  • LIB_PATH = File.expand_path "../../../lib".untaint, FILE.untaint
  • LIB_PATH = File.expand_path "../../../lib".untaint, FILE.dup.untaint

    def test_looks_for_gemdeps_files_automatically_on_start
    util_clear_gems
    diff --git a/vm_eval.c b/vm_eval.c
    index 7c18f70..bc54646 100644
    --- a/vm_eval.c
    +++ b/vm_eval.c
    @@ -19,6 +19,8 @@ static VALUE vm_exec(rb_thread_t *th);
    static void vm_set_eval_stack(rb_thread_t * th, VALUE iseqval, const NODE *cref, rb_block_t *base_block);
    static int vm_collect_local_variables_in_heap(rb_thread_t *th, VALUE *dfp, VALUE ary);

+static VALUE eval_file;
+
/* vm_backtrace.c */
VALUE rb_vm_backtrace_str_ary(rb_thread_t *th, int lev, int n);

@@ -1156,7 +1158,7 @@ rb_each(VALUE obj)
}

static VALUE
-eval_string_with_cref(VALUE self, VALUE src, VALUE scope, NODE *cref, const char *volatile file, volatile int line)
+eval_string_with_cref(VALUE self, VALUE src, VALUE scope, NODE *cref, volatile VALUE file, volatile int line)
{
int state;
VALUE result = Qundef;
@@ -1168,8 +1170,8 @@ eval_string_with_cref(VALUE self, VALUE src, VALUE scope, NODE *cref, const char
volatile int parse_in_eval;
volatile int mild_compile_error;

  • if (file == 0) {
  • file = rb_sourcefile();
  • if (!RTEST(file)) {
  • file = rb_sourcefilename();
    line = rb_sourceline();
    }

@@ -1184,8 +1186,8 @@ eval_string_with_cref(VALUE self, VALUE src, VALUE scope, NODE *cref, const char
if (rb_obj_is_kind_of(scope, rb_cBinding)) {
GetBindingPtr(scope, bind);
envval = bind->env;

  •   if (strcmp(file, "(eval)") == 0 && bind->path != Qnil) {
    
  •       file = RSTRING_PTR(bind->path);
    
  •   if (rb_str_cmp(file, eval_file) == 0 && bind->path != Qnil) {
    
  •       file = bind->path;
          line = bind->first_lineno;
      }
      }
    

@@ -1214,7 +1216,7 @@ eval_string_with_cref(VALUE self, VALUE src, VALUE scope, NODE cref, const char
/
make eval iseq */
th->parse_in_eval++;
th->mild_compile_error++;

  • iseqval = rb_iseq_compile_on_base(src, rb_str_new2(file), INT2FIX(line), base_block);
  • iseqval = rb_iseq_compile_on_base(src, file, INT2FIX(line), base_block);
    th->mild_compile_error--;
    th->parse_in_eval--;

@@ -1242,7 +1244,7 @@ eval_string_with_cref(VALUE self, VALUE src, VALUE scope, NODE *cref, const char
if (state) {
if (state == TAG_RAISE) {
VALUE errinfo = th->errinfo;

  •   if (strcmp(file, "(eval)") == 0) {
    
  •   if (rb_str_cmp(file, eval_file) == 0) {
      VALUE mesg, errat, bt2;
      ID id_mesg;
    

@@ -1272,7 +1274,7 @@ eval_string_with_cref(VALUE self, VALUE src, VALUE scope, NODE *cref, const char
}

static VALUE
-eval_string(VALUE self, VALUE src, VALUE scope, const char *file, int line)
+eval_string(VALUE self, VALUE src, VALUE scope, VALUE file, int line)
{
return eval_string_with_cref(self, src, scope, 0, file, line);
}
@@ -1299,7 +1301,7 @@ VALUE
rb_f_eval(int argc, VALUE *argv, VALUE self)
{
VALUE src, scope, vfile, vline;

  • const char *file = "(eval)";
  • VALUE file = eval_file;
    int line = 1;

    rb_scan_args(argc, argv, "13", &src, &scope, &vfile, &vline);
    @@ -1321,7 +1323,7 @@ rb_f_eval(int argc, VALUE *argv, VALUE self)
    }

    if (!NIL_P(vfile))

  • file = RSTRING_PTR(vfile);
  • file = vfile;
    return eval_string(self, src, scope, file, line);
    }

@@ -1329,7 +1331,7 @@ rb_f_eval(int argc, VALUE *argv, VALUE self)
VALUE
ruby_eval_string_from_file(const char *str, const char *filename)
{

  • return eval_string(rb_vm_top_self(), rb_str_new2(str), Qnil, filename, 1);
  • return eval_string(rb_vm_top_self(), rb_str_new2(str), Qnil, rb_str_new2(filename), 1);
    }

struct eval_string_from_file_arg {
@@ -1341,7 +1343,7 @@ static VALUE
eval_string_from_file_helper(void *data)
{
const struct eval_string_from_file_arg const arg = (struct eval_string_from_file_arg)data;

  • return eval_string(rb_vm_top_self(), rb_str_new2(arg->str), Qnil, arg->filename, 1);
  • return eval_string(rb_vm_top_self(), rb_str_new2(arg->str), Qnil, rb_str_new2(arg->filename), 1);
    }

VALUE
@@ -1368,7 +1370,7 @@ ruby_eval_string_from_file_protect(const char *str, const char *filename, int *s
VALUE
rb_eval_string(const char *str)
{

  • return ruby_eval_string_from_file(str, "eval");
  • return eval_string(rb_vm_top_self(), rb_str_new2(str), Qnil, eval_file, 1);
    }

/**
@@ -1509,7 +1511,7 @@ rb_yield_refine_block(VALUE refinement, VALUE refinements)

/* string eval under the class/module context */
static VALUE
-eval_under(VALUE under, VALUE self, VALUE src, const char *file, int line)
+eval_under(VALUE under, VALUE self, VALUE src, VALUE file, int line)
{
NODE *cref = vm_cref_push(GET_THREAD(), under, NOEX_PUBLIC, NULL);

@@ -1534,7 +1536,7 @@ specific_eval(int argc, VALUE *argv, VALUE klass, VALUE self)
return yield_under(klass, self, Qundef);
}
else {

  • const char *file = "(eval)";
  • VALUE file = eval_file;
    int line = 1;

    rb_check_arity(argc, 1, 3);
    @@ -1547,7 +1549,7 @@ specific_eval(int argc, VALUE *argv, VALUE klass, VALUE self)
    if (argc > 2)
    line = NUM2INT(argv[2]);
    if (argc > 1) {

  •   file = StringValuePtr(argv[1]);
    
  •   file = StringValue(argv[1]);
    
    }
    return eval_under(klass, self, argv[0], file, line);
    }
    @@ -1928,6 +1930,9 @@ rb_current_realfilepath(void)
    void
    Init_vm_eval(void)
    {
  • eval_file = rb_str_new2("(eval)");
  • rb_gc_register_address(&eval_file);
  • rb_define_global_function("eval", rb_f_eval, -1);
    rb_define_global_function("local_variables", rb_f_local_variables, 0);
    rb_define_global_function("iterator?", rb_f_block_given_p, 0);

Related issues 1 (0 open1 closed)

Related to Ruby master - Bug #9159: [patch] use rb_fstring for internal stringsClosed11/26/2013Actions

Updated by tmm1 (Aman Karmani) over 11 years ago

  • Assignee set to tmm1 (Aman Karmani)

ko1-san, do you have any opinion on this patch? Is there a simpler solution instead of adding NODE_FILE?

I converted all eval functions inside the VM to use VALUE filename instead of char *filename, so it is easier to re-use existing RString.

  • LIB_PATH = File.expand_path "../../../lib".untaint, FILE.untaint
  • LIB_PATH = File.expand_path "../../../lib".untaint, FILE.dup.untaint

I had to add dup because FILE is frozen now.

-eval_string_with_cref(VALUE self, VALUE src, VALUE scope, NODE *cref, const char *volatile file, volatile int line)
+eval_string_with_cref(VALUE self, VALUE src, VALUE scope, NODE *cref, volatile VALUE file, volatile int line)

I'm not sure why it was using const char* volatile before. Is volatile VALUE necessary here?

   case keyword__FILE__:
  • return NEW_STR(rb_external_str_new_with_enc(ruby_sourcefile, strlen(ruby_sourcefile),
  • 				    rb_filesystem_encoding()));
    
  • return NEW_FILE();
  •  case NODE_FILE:{
    
  • if (!poped) {
  • ADD_INSN1(ret, line, putobject, iseq->location.path);
    

Will iseq->location.path during evaluation always be equal to ruby_sourcefile from parse phase?

Updated by headius (Charles Nutter) over 11 years ago

I'd like to work with you to find more places we could be sharing frozen strings. A change over the summer made defined? results always be shared frozen strings, and apparently that was a source of lots and lots of extra string creation in Rails (according to wycats). There's many other such opportunities.

If course if FILE were always returning shared, frozen Strings, it would be far cheaper; one String ever for a given filename.

Updated by normalperson (Eric Wong) over 11 years ago

"headius (Charles Nutter)" wrote:

I'd like to work with you to find more places we could be sharing
frozen strings. A change over the summer made defined? results always
be shared frozen strings, and apparently that was a source of lots and
lots of extra string creation in Rails (according to wycats). There's
many other such opportunities.

I've considered (but never got around to implementing/testing) caching
for rb_str_dup_frozen. This might help with hashes which use common
strings as keys (e.g. CGI params and HTTP headers).

Perhaps one of you can experiment with this before I can get around
to it..

Updated by tmm1 (Aman Karmani) over 11 years ago

On Wed, Apr 17, 2013 at 11:49 AM, Eric Wong wrote:

"headius (Charles Nutter)" wrote:

I'd like to work with you to find more places we could be sharing
frozen strings. A change over the summer made defined? results always
be shared frozen strings, and apparently that was a source of lots and
lots of extra string creation in Rails (according to wycats). There's
many other such opportunities.

I've considered (but never got around to implementing/testing) caching
for rb_str_dup_frozen. This might help with hashes which use common
strings as keys (e.g. CGI params and HTTP headers).

Perhaps one of you can experiment with this before I can get around
to it..

I actually started on this a few weeks ago. I added a frozen string
cache and was able to remove a large number of duplicated strings from
the Ruby heap.

I'll clean up the patch and open a redmine issue for review.

Aman

Updated by tmm1 (Aman Karmani) over 11 years ago

  • Status changed from Open to Rejected

Is there a simpler solution instead of adding NODE_FILE?

To fix this simple case, NODE_FILE is unnecessary:

def test__FILE__shared
assert_equal FILE.object_id, FILE.object_id
end

Here we can just ensure parse.y generates a NODE_LIT re-using an existing string (instead of a NODE_STR with a new string every time). All instances of FILE encountered in one parse invocation would re-use same string.

But things get tricky when you want to share path name strings across parse invocations, i.e. for dynamically generated code. You need to emit a NODE_FILE to make this work:

def test__FILE__shared
file = "filename"
obj = Object.new
class << obj
class_eval <<-RUBY, file, 0
def filename
FILE
end
RUBY
end
assert_equal file.object_id, obj.filename.object_id
end

Since all the parse.y APIs use char* we have to use RSTRING_PTR, and then end up creating a new RString inside parse.y. The NODE_FILE avoids this by acting as a temporary placeholder that can be swapped out for the original RString in compile.c.

Actions

Also available in: Atom PDF

Like0
Like0Like0Like0Like0Like0