Project

General

Profile

Actions

Backport #8048

closed

require() features_index bloats size of ruby heap

Added by tmm1 (Aman Karmani) almost 12 years ago. Updated over 11 years ago.


Description

The new features_index for require() in 2.0.0 creates a lot of extra ARRAY and STRING objects. In a big rails application, there are ~70k extra strings and ~70k extra arrays on the heap after boot.

$ RAILS_ENV=production ruby -e' puts RUBY_VERSION; require "./config/environment"; p [:NUM_LOADED_FEATURES, $LOADED_FEATURES.size]; GC.start; p ObjectSpace.count_objects.select{ |k,v| [:T_STRING, :T_ARRAY, :T_HASH].include? k } '
1.9.3
[:NUM_LOADED_FEATURES, 2933]
{:T_STRING=>291147, :T_ARRAY=>97402, :T_HASH=>9439}

$ RAILS_ENV=production ruby -e' puts RUBY_VERSION; require "./config/environment"; p [:NUM_LOADED_FEATURES, $LOADED_FEATURES.size]; GC.start; p ObjectSpace.count_objects.select{ |k,v| [:T_STRING, :T_ARRAY, :T_HASH].include? k } '
2.0.0
[:NUM_LOADED_FEATURES, 2945]
{:T_STRING=>363183, :T_ARRAY=>167871, :T_HASH=>9640}

The following patch reduces the number of arrays used by the features_index:

diff --git a/load.c b/load.c
index ea22d5b..1868518 100644
--- a/load.c
+++ b/load.c
@@ -186,11 +186,16 @@ features_index_add_single(VALUE short_feature, VALUE offset)
{
VALUE features_index, this_feature_index;
features_index = get_loaded_features_index_raw();

  • if ((this_feature_index = rb_hash_lookup(features_index, short_feature)) == Qnil) {
  • this_feature_index = rb_ary_new();
  • rb_hash_aset(features_index, short_feature, this_feature_index);
  • this_feature_index = rb_hash_lookup(features_index, short_feature);
  • if (this_feature_index == Qnil) {
  •    rb_hash_aset(features_index, short_feature, offset);
    
  • } else if (RB_TYPE_P(this_feature_index, T_FIXNUM)) {
  •    this_feature_index = rb_ary_new3(2, this_feature_index, offset);
    
  •    rb_hash_aset(features_index, short_feature, this_feature_index);
    
  • } else if (RB_TYPE_P(this_feature_index, T_ARRAY)) {
  •    rb_ary_push(this_feature_index, offset);
    
    }
  • rb_ary_push(this_feature_index, offset);
    }

/* Add to the loaded-features index all the required entries for
@@ -389,8 +394,16 @@ rb_feature_p(const char *feature, const char *ext, int rb, int expanded, const c
or ends in '/'. This includes both match forms above, as well
as any distractors, so we may ignore all other entries in features.
*/

  • for (i = 0; this_feature_index != Qnil && i < RARRAY_LEN(this_feature_index); i++) {
  • long index = FIX2LONG(rb_ary_entry(this_feature_index, i));
  • for (i = 0; this_feature_index != Qnil; i++) {
  • long index;
  • if (RB_TYPE_P(this_feature_index, T_ARRAY)) {
  • if (i >= RARRAY_LEN(this_feature_index)) break;
    
  • index = FIX2LONG(rb_ary_entry(this_feature_index, i));
    
  • } else {
  • if (i > 0) break;
    
  • index = FIX2LONG(this_feature_index);
    
  • }
  • v = RARRAY_PTR(features)[index];
    f = StringValuePtr(v);
    if ((n = RSTRING_LEN(v)) < len) continue;

Related issues 1 (0 open1 closed)

Related to Ruby master - Bug #9201: [patch] remove GC overhead for loaded_features_indexClosednobu (Nobuyoshi Nakada)12/03/2013Actions
Actions #1

Updated by nobu (Nobuyoshi Nakada) almost 12 years ago

  • Status changed from Open to Closed
  • % Done changed from 0 to 100

This issue was solved with changeset r39644.
Aman, thank you for reporting this issue.
Your contribution to Ruby is greatly appreciated.
May Ruby be with you.


load.c: reduce indexes arrays

  • load.c (features_index_add_single, rb_feature_p): store single index
    as Fixnum to reduce the number of arrays for the indexes. based on
    the patch by tmm1 (Aman Gupta) in [ruby-core:53216] [Bug #8048].

Updated by tmm1 (Aman Karmani) almost 12 years ago

Thank you.

Is there some reason loaded_features_index cannot use st_init_strtable()? Will you accept an additional patch to this effect?

Updated by tmm1 (Aman Karmani) almost 12 years ago

diff --git a/load.c b/load.c
index 6330e50..3010b18 100644
--- a/load.c
+++ b/load.c
@@ -171,7 +171,7 @@ reset_loaded_features_snapshot(void)
rb_ary_replace(vm->loaded_features_snapshot, vm->loaded_features);
}

-static VALUE
+static struct st_table *
get_loaded_features_index_raw(void)
{
return GET_VM()->loaded_features_index;
@@ -186,12 +186,19 @@ get_loading_table(void)
static void
features_index_add_single(VALUE short_feature, VALUE offset)
{

  • VALUE features_index, this_feature_index;
  • struct st_table *features_index;
  • VALUE this_feature_index = Qnil;
  • char *short_feature_cstr;
  • Check_Type(offset, T_FIXNUM);
  • Check_Type(short_feature, T_STRING);
  • short_feature_cstr = RSTRING_PTR(short_feature);
  • features_index = get_loaded_features_index_raw();
  • this_feature_index = rb_hash_lookup(features_index, short_feature);
  • st_lookup(features_index, (st_data_t)short_feature_cstr, (st_data_t *)&this_feature_index);

    if (NIL_P(this_feature_index)) {

  • rb_hash_aset(features_index, short_feature, offset);
  • st_insert(features_index, (st_data_t)ruby_strdup(short_feature_cstr), (st_data_t)offset);
    }
    else if (RB_TYPE_P(this_feature_index, T_FIXNUM)) {
    VALUE feature_indexes[2];
    @@ -199,7 +206,7 @@ features_index_add_single(VALUE short_feature, VALUE offset)
    feature_indexes[1] = offset;
    this_feature_index = rb_ary_tmp_new(numberof(feature_indexes));
    rb_ary_cat(this_feature_index, feature_indexes, numberof(feature_indexes));
  • rb_hash_aset(features_index, short_feature, this_feature_index);
  • st_insert(features_index, (st_data_t)short_feature_cstr, (st_data_t)this_feature_index);
    }
    else {
    Check_Type(this_feature_index, T_ARRAY);
    @@ -254,7 +261,14 @@ features_index_add(VALUE feature, VALUE offset)
    }
    }

-static VALUE
+static int
+loaded_features_index_clear_i(st_data_t key, st_data_t val, st_data_t arg)
+{

  • xfree((char*) key);
  • return ST_DELETE;
    +}

+static st_table *
get_loaded_features_index(void)
{
VALUE features;
@@ -264,7 +278,7 @@ get_loaded_features_index(void)
if (!rb_ary_shared_with_p(vm->loaded_features_snapshot, vm->loaded_features)) {
/* The sharing was broken; something (other than us in rb_provide_feature())
modified loaded_features. Rebuild the index. */

  • rb_hash_clear(vm->loaded_features_index);
  • st_foreach(vm->loaded_features_index, loaded_features_index_clear_i, 0);
    features = vm->loaded_features;
    for (i = 0; i < RARRAY_LEN(features); i++) {
    VALUE entry, as_str;
    @@ -357,10 +371,10 @@ loaded_feature_path_i(st_data_t v, st_data_t b, st_data_t f)
    static int
    rb_feature_p(const char *feature, const char *ext, int rb, int expanded, const char **fn)
    {
  • VALUE features, features_index, feature_val, this_feature_index, v, p, load_path = 0;
  • VALUE features, feature_val, this_feature_index = Qnil, v, p, load_path = 0;
    const char *f, *e;
    long i, len, elen, n;
  • st_table *loading_tbl;
  • st_table *loading_tbl, *features_index;
    st_data_t data;
    int type;

@@ -379,7 +393,7 @@ rb_feature_p(const char *feature, const char *ext, int rb, int expanded, const c
features_index = get_loaded_features_index();

 feature_val = rb_str_new(feature, len);
  • this_feature_index = rb_hash_lookup(features_index, feature_val);
  • st_lookup(features_index, (st_data_t)feature, (st_data_t )&this_feature_index);
    /
    We search features for an entry such that either
    "#{features[i]}" == "#{load_path[j]}/#{feature}#{e}"
    for some j, or
    @@ -1120,8 +1134,7 @@ Init_load()
    rb_define_virtual_variable("$LOADED_FEATURES", get_loaded_features, 0);
    vm->loaded_features = rb_ary_new();
    vm->loaded_features_snapshot = rb_ary_tmp_new(0);
  • vm->loaded_features_index = rb_hash_new();
  • RBASIC(vm->loaded_features_index)->klass = 0;
  • vm->loaded_features_index = st_init_strtable();

    rb_define_global_function("load", rb_f_load, -1);
    rb_define_global_function("require", rb_f_require, 1);
    diff --git a/vm.c b/vm.c
    index 9126502..b4132cd 100644
    --- a/vm.c
    +++ b/vm.c
    @@ -1565,7 +1565,6 @@ rb_vm_mark(void *ptr)
    RUBY_MARK_UNLESS_NULL(vm->expanded_load_path);
    RUBY_MARK_UNLESS_NULL(vm->loaded_features);
    RUBY_MARK_UNLESS_NULL(vm->loaded_features_snapshot);

  • RUBY_MARK_UNLESS_NULL(vm->loaded_features_index);
    RUBY_MARK_UNLESS_NULL(vm->top_self);
    RUBY_MARK_UNLESS_NULL(vm->coverages);
    rb_gc_mark_locations(vm->special_exceptions, vm->special_exceptions + ruby_special_error_count);
    @@ -1573,6 +1572,9 @@ rb_vm_mark(void *ptr)
    if (vm->loading_table) {
    rb_mark_tbl(vm->loading_table);
    }
  • if (vm->loaded_features_index) {

  •   rb_mark_tbl(vm->loaded_features_index);
    
  • }

    rb_vm_trace_mark_event_hooks(&vm->event_hooks);

diff --git a/vm_core.h b/vm_core.h
index 1838f2a..2ac57fc 100644
--- a/vm_core.h
+++ b/vm_core.h
@@ -364,7 +364,7 @@ typedef struct rb_vm_struct {
VALUE expanded_load_path;
VALUE loaded_features;
VALUE loaded_features_snapshot;

  • VALUE loaded_features_index;
  • struct st_table *loaded_features_index;
    struct st_table *loading_table;

    /* signal */

Updated by tmm1 (Aman Karmani) almost 12 years ago

  • Check_Type(short_feature, T_STRING);
  • short_feature_cstr = RSTRING_PTR(short_feature);

This should be StringValueCStr(), because short_feature is created from rb_str_substr().

Is it possible for features to contain a NUL byte? Maybe it is preferable to add st_init_bintable() which can accept a string length.

Updated by nobu (Nobuyoshi Nakada) almost 12 years ago

tmm1 (Aman Gupta) wrote:

Is it possible for features to contain a NUL byte?

No, feature names are checked with FilePathValue().

Maybe it is preferable to add st_init_bintable() which can accept a string length.

Since st interfaces take only one parameter for the key, it will need a struct, but we already have such struct, RString.
Perhaps we could make a custom st_table which uses the functions in string.c.

Updated by tmm1 (Aman Karmani) almost 12 years ago

nobu-san,

What do you think of applying StringValueCStr() version of this patch? Since feature names cannot contain NULL bytes, it should be OK I think.

Actions #7

Updated by nagachika (Tomoyuki Chikanaga) over 11 years ago

  • Tracker changed from Bug to Backport
  • Project changed from Ruby master to Backport200
  • Status changed from Closed to Assigned
  • Assignee set to nagachika (Tomoyuki Chikanaga)

I will backport this because #8165 depend on this feature.

Updated by nagachika (Tomoyuki Chikanaga) over 11 years ago

... and add r39646, r39647 to reduce conflict.

Actions #9

Updated by nagachika (Tomoyuki Chikanaga) over 11 years ago

  • Status changed from Assigned to Closed

This issue was solved with changeset r40397.
Aman, thank you for reporting this issue.
Your contribution to Ruby is greatly appreciated.
May Ruby be with you.


merge revision(s) 39644,39646,39647,39874,39898: [Backport #8048]

* load.c (features_index_add_single, rb_feature_p): store single index
  as Fixnum to reduce the number of arrays for the indexes.  based on
  the patch by tmm1 (Aman Gupta) in [ruby-core:53216] [Bug #8048].

* load.c (rb_feature_p), vm_core.h (rb_vm_struct): turn
  loaded_features_index into st_table.  patches by tmm1 (Aman Gupta)
  in [ruby-core:53251] and [ruby-core:53274] [Bug #8048]
Actions

Also available in: Atom PDF

Like0
Like0Like0Like0Like0Like0Like0Like0Like0Like0