From ecd8b412a3f2d606d31486e6819a6623ba0bb8bd Mon Sep 17 00:00:00 2001 From: Aaron Patterson Date: Wed, 27 Feb 2019 15:37:20 -0800 Subject: [PATCH] GC Compaction for MRI --- class.c | 2 + constant.h | 4 +- ext/-test-/memory_location/extconf.rb | 7 + ext/-test-/memory_location/memory_location.c | 25 + gc.c | 1125 +++++++++++++++++- gc.h | 4 + hash.c | 2 + id_table.c | 21 + id_table.h | 3 + include/ruby/intern.h | 5 + include/ruby/ruby.h | 15 +- include/ruby/st.h | 3 +- internal.h | 36 +- iseq.c | 102 +- iseq.h | 2 +- method.h | 26 +- st.c | 23 +- symbol.c | 7 +- symbol.h | 7 + test/-ext-/gc_compact/test_gc_compact.rb | 87 ++ variable.c | 32 +- vm.c | 31 +- vm_core.h | 6 +- vm_eval.c | 1 + vm_method.c | 2 +- 25 files changed, 1461 insertions(+), 117 deletions(-) create mode 100644 ext/-test-/memory_location/extconf.rb create mode 100644 ext/-test-/memory_location/memory_location.c create mode 100644 test/-ext-/gc_compact/test_gc_compact.rb diff --git a/class.c b/class.c index 051632e08c..81ecf07232 100644 --- a/class.c +++ b/class.c @@ -539,6 +539,7 @@ boot_defclass(const char *name, VALUE super) rb_name_class(obj, id); rb_const_set((rb_cObject ? rb_cObject : obj), id, obj); + rb_vm_add_root_module(id, obj); return obj; } @@ -781,6 +782,7 @@ rb_define_module(const char *name) } module = rb_define_module_id(id); rb_vm_add_root_module(id, module); + rb_gc_register_mark_object(module); rb_const_set(rb_cObject, id, module); return module; diff --git a/constant.h b/constant.h index fcccf07384..6c8cda08db 100644 --- a/constant.h +++ b/constant.h @@ -31,8 +31,8 @@ typedef enum { typedef struct rb_const_entry_struct { rb_const_flag_t flag; int line; - const VALUE value; /* should be mark */ - const VALUE file; /* should be mark */ + VALUE value; /* should be mark */ + VALUE file; /* should be mark */ } rb_const_entry_t; VALUE rb_mod_private_constant(int argc, const VALUE *argv, VALUE obj); diff --git a/ext/-test-/memory_location/extconf.rb b/ext/-test-/memory_location/extconf.rb new file mode 100644 index 0000000000..38892a3958 --- /dev/null +++ b/ext/-test-/memory_location/extconf.rb @@ -0,0 +1,7 @@ +# frozen_string_literal: false +$srcs = Dir[File.join($srcdir, "*.{#{SRC_EXT.join(%q{,})}}")] +inits = $srcs.map {|s| File.basename(s, ".*")} +inits.delete("init") +inits.map! {|s|"X(#{s})"} +$defs << "-DTEST_INIT_FUNCS(X)=\"#{inits.join(' ')}\"" +create_makefile("-test-/memory_location") diff --git a/ext/-test-/memory_location/memory_location.c b/ext/-test-/memory_location/memory_location.c new file mode 100644 index 0000000000..6f93d0f03e --- /dev/null +++ b/ext/-test-/memory_location/memory_location.c @@ -0,0 +1,25 @@ +#include "ruby.h" + +#if SIZEOF_LONG == SIZEOF_VOIDP +# define nonspecial_obj_id(obj) (VALUE)((SIGNED_VALUE)(obj)|FIXNUM_FLAG) +# define obj_id_to_ref(objid) ((objid) ^ FIXNUM_FLAG) /* unset FIXNUM_FLAG */ +#elif SIZEOF_LONG_LONG == SIZEOF_VOIDP +# define nonspecial_obj_id(obj) LL2NUM((SIGNED_VALUE)(obj) / 2) +# define obj_id_to_ref(objid) (FIXNUM_P(objid) ? \ + ((objid) ^ FIXNUM_FLAG) : (NUM2PTR(objid) << 1)) +#else +# error not supported +#endif + +static VALUE +rb_memory_location(VALUE self) +{ + return nonspecial_obj_id(self); +} + +void +Init_memory_location(void) +{ + rb_define_method(rb_mKernel, "memory_location", rb_memory_location, 0); +} + diff --git a/gc.c b/gc.c index 9f6dd75400..3281455f0f 100644 --- a/gc.c +++ b/gc.c @@ -29,6 +29,7 @@ #include "ruby_atomic.h" #include "probes.h" #include "id_table.h" +#include "symbol.h" #include #include #include @@ -194,6 +195,9 @@ static ruby_gc_params_t gc_params = { FALSE, }; +static st_table *id_to_obj_tbl; +static st_table *obj_to_id_tbl; + /* GC_DEBUG: * enable to embed GC debugging information. */ @@ -404,6 +408,7 @@ typedef struct RVALUE { VALUE flags; /* always 0 for freed obj */ struct RVALUE *next; } free; + struct RMoved moved; struct RBasic basic; struct RObject object; struct RClass klass; @@ -581,6 +586,7 @@ typedef struct rb_objspace { #if USE_RGENGC size_t minor_gc_count; size_t major_gc_count; + size_t object_id_collisions; #if RGENGC_PROFILE > 0 size_t total_generated_normal_object_count; size_t total_generated_shady_object_count; @@ -635,6 +641,12 @@ typedef struct rb_objspace { size_t error_count; #endif } rgengc; + + struct { + size_t considered_count_table[T_MASK]; + size_t moved_count_table[T_MASK]; + } rcompactor; + #if GC_ENABLE_INCREMENTAL_MARK struct { size_t pooled_slots; @@ -682,6 +694,8 @@ struct heap_page { #if USE_RGENGC bits_t wb_unprotected_bits[HEAP_PAGE_BITMAP_LIMIT]; #endif + /* If set, the object is not movable */ + bits_t pinned_bits[HEAP_PAGE_BITMAP_LIMIT]; /* the following three bitmaps are cleared at the beginning of full GC */ bits_t mark_bits[HEAP_PAGE_BITMAP_LIMIT]; #if USE_RGENGC @@ -706,6 +720,7 @@ struct heap_page { /* getting bitmap */ #define GET_HEAP_MARK_BITS(x) (&GET_HEAP_PAGE(x)->mark_bits[0]) +#define GET_HEAP_PINNED_BITS(x) (&GET_HEAP_PAGE(x)->pinned_bits[0]) #if USE_RGENGC #define GET_HEAP_UNCOLLECTIBLE_BITS(x) (&GET_HEAP_PAGE(x)->uncollectible_bits[0]) #define GET_HEAP_WB_UNPROTECTED_BITS(x) (&GET_HEAP_PAGE(x)->wb_unprotected_bits[0]) @@ -826,7 +841,9 @@ VALUE rb_mGC; int ruby_disable_gc = 0; void rb_iseq_mark(const rb_iseq_t *iseq); +void rb_iseq_update_references(rb_iseq_t *iseq); void rb_iseq_free(const rb_iseq_t *iseq); +void rb_vm_update_references(void *ptr); void rb_gcdebug_print_obj_condition(VALUE obj); @@ -861,8 +878,11 @@ static void gc_sweep_rest(rb_objspace_t *objspace); static void gc_sweep_continue(rb_objspace_t *objspace, rb_heap_t *heap); static inline void gc_mark(rb_objspace_t *objspace, VALUE ptr); +static inline void gc_pin(rb_objspace_t *objspace, VALUE ptr); +static inline void gc_mark_and_pin(rb_objspace_t *objspace, VALUE ptr); static void gc_mark_ptr(rb_objspace_t *objspace, VALUE ptr); NO_SANITIZE("memory", static void gc_mark_maybe(rb_objspace_t *objspace, VALUE ptr)); +static void gc_mark_and_pin_maybe(rb_objspace_t *objspace, VALUE ptr); static void gc_mark_children(rb_objspace_t *objspace, VALUE ptr); static int gc_mark_stacked_objects_incremental(rb_objspace_t *, size_t count); @@ -895,6 +915,14 @@ static inline void gc_prof_sweep_timer_stop(rb_objspace_t *); static inline void gc_prof_set_malloc_info(rb_objspace_t *); static inline void gc_prof_set_heap_info(rb_objspace_t *); +#define TYPED_UPDATE_IF_MOVED(_objspace, _type, _thing) do { \ + if (gc_object_moved_p(_objspace, (VALUE)_thing)) { \ + (_thing) = (_type)RMOVED((_thing))->destination; \ + } \ +} while (0) + +#define UPDATE_IF_MOVED(_objspace, _thing) TYPED_UPDATE_IF_MOVED(_objspace, VALUE, _thing) + #define gc_prof_record(objspace) (objspace)->profile.current_record #define gc_prof_enabled(objspace) ((objspace)->profile.run && (objspace)->profile.current_record) @@ -1020,6 +1048,7 @@ tick(void) #define FL_UNSET2(x,f) FL_CHECK2("FL_UNSET2", x, RBASIC(x)->flags &= ~(f)) #define RVALUE_MARK_BITMAP(obj) MARKED_IN_BITMAP(GET_HEAP_MARK_BITS(obj), (obj)) +#define RVALUE_PIN_BITMAP(obj) MARKED_IN_BITMAP(GET_HEAP_PINNED_BITS(obj), (obj)) #define RVALUE_PAGE_MARKED(page, obj) MARKED_IN_BITMAP((page)->mark_bits, (obj)) #if USE_RGENGC @@ -1113,6 +1142,16 @@ check_rvalue_consistency(const VALUE obj) } #endif +static inline int +gc_object_moved_p(rb_objspace_t * objspace, VALUE obj) +{ + if (RB_SPECIAL_CONST_P(obj)) { + return FALSE; + } else { + return BUILTIN_TYPE(obj) == T_MOVED; + } +} + static inline int RVALUE_MARKED(VALUE obj) { @@ -1120,6 +1159,13 @@ RVALUE_MARKED(VALUE obj) return RVALUE_MARK_BITMAP(obj) != 0; } +static inline int +RVALUE_PINNED(VALUE obj) +{ + check_rvalue_consistency(obj); + return RVALUE_PIN_BITMAP(obj) != 0; +} + #if USE_RGENGC static inline int RVALUE_WB_UNPROTECTED(VALUE obj) @@ -2191,6 +2237,14 @@ obj_free(rb_objspace_t *objspace, VALUE obj) rb_free_generic_ivar((VALUE)obj); FL_UNSET(obj, FL_EXIVAR); } + VALUE id; + if (st_lookup(obj_to_id_tbl, (st_data_t)obj, &id)) { +#ifdef GC_COMPACT_DEBUG + fprintf(stderr, "Collecting %p -> %p\n", obj, obj_id_to_ref(id)); +#endif + st_delete(obj_to_id_tbl, (st_data_t *)&obj, 0); + st_delete(id_to_obj_tbl, (st_data_t *)&id, 0); + } #if USE_RGENGC if (RVALUE_WB_UNPROTECTED(obj)) CLEAR_IN_BITMAP(GET_HEAP_WB_UNPROTECTED_BITS(obj), obj); @@ -2359,6 +2413,7 @@ obj_free(rb_objspace_t *objspace, VALUE obj) break; case T_RATIONAL: case T_COMPLEX: + case T_MOVED: break; case T_ICLASS: /* Basically , T_ICLASS shares table with the module */ @@ -2624,6 +2679,7 @@ internal_object_p(VALUE obj) UNEXPECTED_NODE(internal_object_p); break; case T_NONE: + case T_MOVED: case T_IMEMO: case T_ICLASS: case T_ZOMBIE: @@ -3203,10 +3259,15 @@ id2ref(VALUE obj, VALUE objid) if (ptr == Qtrue) return Qtrue; if (ptr == Qfalse) return Qfalse; if (ptr == Qnil) return Qnil; + if (FIXNUM_P(ptr)) return (VALUE)ptr; if (FLONUM_P(ptr)) return (VALUE)ptr; ptr = obj_id_to_ref(objid); + if (st_lookup(id_to_obj_tbl, objid, &ptr)) { + return ptr; + } + if ((ptr % sizeof(RVALUE)) == (4 << 2)) { ID symid = ptr / sizeof(RVALUE); if (rb_id2str(symid) == 0) @@ -3295,6 +3356,36 @@ rb_obj_id(VALUE obj) else if (SPECIAL_CONST_P(obj)) { return LONG2NUM((SIGNED_VALUE)obj); } + VALUE id; + + if (st_lookup(obj_to_id_tbl, (st_data_t)obj, &id)) { +#ifdef GC_COMPACT_DEBUG + fprintf(stderr, "Second time object_id was called on this object: %p\n", obj); +#endif + return id; + } else { + int tries; + id = nonspecial_obj_id(obj); + + while(1) { + /* id is the object id */ + if (st_lookup(id_to_obj_tbl, (st_data_t)id, 0)) { +#ifdef GC_COMPACT_DEBUG + fprintf(stderr, "object_id called on %p, but there was a collision at %d\n", obj, NUM2INT(id)); +#endif + rb_objspace_t *objspace = &rb_objspace; + objspace->profile.object_id_collisions++; + id += 40; + } else { +#ifdef GC_COMPACT_DEBUG + fprintf(stderr, "Initial insert: %p id: %d\n", obj, NUM2INT(id)); +#endif + st_insert(obj_to_id_tbl, (st_data_t)obj, id); + st_insert(id_to_obj_tbl, (st_data_t)id, obj); + return id; + } + } + } return nonspecial_obj_id(obj); } @@ -3417,6 +3508,7 @@ obj_memsize_of(VALUE obj, int use_all_types) break; case T_ZOMBIE: + case T_MOVED: break; default: @@ -3442,6 +3534,43 @@ set_zero(st_data_t key, st_data_t val, st_data_t arg) return ST_CONTINUE; } +static VALUE +type_sym(int type) +{ + switch (type) { +#define COUNT_TYPE(t) case (t): return ID2SYM(rb_intern(#t)); break; + COUNT_TYPE(T_NONE); + COUNT_TYPE(T_OBJECT); + COUNT_TYPE(T_CLASS); + COUNT_TYPE(T_MODULE); + COUNT_TYPE(T_FLOAT); + COUNT_TYPE(T_STRING); + COUNT_TYPE(T_REGEXP); + COUNT_TYPE(T_ARRAY); + COUNT_TYPE(T_HASH); + COUNT_TYPE(T_STRUCT); + COUNT_TYPE(T_BIGNUM); + COUNT_TYPE(T_FILE); + COUNT_TYPE(T_DATA); + COUNT_TYPE(T_MATCH); + COUNT_TYPE(T_COMPLEX); + COUNT_TYPE(T_RATIONAL); + COUNT_TYPE(T_NIL); + COUNT_TYPE(T_TRUE); + COUNT_TYPE(T_FALSE); + COUNT_TYPE(T_SYMBOL); + COUNT_TYPE(T_FIXNUM); + COUNT_TYPE(T_IMEMO); + COUNT_TYPE(T_UNDEF); + COUNT_TYPE(T_NODE); + COUNT_TYPE(T_ICLASS); + COUNT_TYPE(T_ZOMBIE); + COUNT_TYPE(T_MOVED); +#undef COUNT_TYPE + default: return INT2NUM(type); break; + } +} + /* * call-seq: * ObjectSpace.count_objects([result_hash]) -> hash @@ -3523,37 +3652,7 @@ count_objects(int argc, VALUE *argv, VALUE os) rb_hash_aset(hash, ID2SYM(rb_intern("FREE")), SIZET2NUM(freed)); for (i = 0; i <= T_MASK; i++) { - VALUE type; - switch (i) { -#define COUNT_TYPE(t) case (t): type = ID2SYM(rb_intern(#t)); break; - COUNT_TYPE(T_NONE); - COUNT_TYPE(T_OBJECT); - COUNT_TYPE(T_CLASS); - COUNT_TYPE(T_MODULE); - COUNT_TYPE(T_FLOAT); - COUNT_TYPE(T_STRING); - COUNT_TYPE(T_REGEXP); - COUNT_TYPE(T_ARRAY); - COUNT_TYPE(T_HASH); - COUNT_TYPE(T_STRUCT); - COUNT_TYPE(T_BIGNUM); - COUNT_TYPE(T_FILE); - COUNT_TYPE(T_DATA); - COUNT_TYPE(T_MATCH); - COUNT_TYPE(T_COMPLEX); - COUNT_TYPE(T_RATIONAL); - COUNT_TYPE(T_NIL); - COUNT_TYPE(T_TRUE); - COUNT_TYPE(T_FALSE); - COUNT_TYPE(T_SYMBOL); - COUNT_TYPE(T_FIXNUM); - COUNT_TYPE(T_IMEMO); - COUNT_TYPE(T_UNDEF); - COUNT_TYPE(T_ICLASS); - COUNT_TYPE(T_ZOMBIE); -#undef COUNT_TYPE - default: type = INT2NUM(i); break; - } + VALUE type = type_sym(i); if (counts[i]) rb_hash_aset(hash, type, SIZET2NUM(counts[i])); } @@ -4007,6 +4106,11 @@ free_stack_chunks(mark_stack_t *stack) static void push_mark_stack(mark_stack_t *stack, VALUE data) { + if (BUILTIN_TYPE(data) == T_MOVED) { + VALUE dest = (VALUE)RMOVED(data)->destination; + fprintf(stderr, "<%s>", obj_info(dest)); + rb_bug("moved item (%p -> %p (type: %d) should not be marked", (RVALUE *)data, (RVALUE *)dest, BUILTIN_TYPE(dest)); + } if (stack->index == stack->limit) { push_mark_stack_chunk(stack); } @@ -4168,7 +4272,7 @@ mark_locations_array(rb_objspace_t *objspace, register const VALUE *x, register VALUE v; while (n--) { v = *x; - gc_mark_maybe(objspace, v); + gc_mark_and_pin_maybe(objspace, v); x++; } } @@ -4189,6 +4293,16 @@ rb_gc_mark_locations(const VALUE *start, const VALUE *end) gc_mark_locations(&rb_objspace, start, end); } +static void +gc_mark_and_pin_values(rb_objspace_t *objspace, long n, const VALUE *values) +{ + long i; + + for (i=0; inum_entries == 0) return; + st_foreach(tbl, mark_entry_no_pin, (st_data_t)objspace); +} + static void mark_tbl(rb_objspace_t *objspace, st_table *tbl) { @@ -4247,7 +4396,11 @@ mark_keyvalue(st_data_t key, st_data_t value, st_data_t data) { rb_objspace_t *objspace = (rb_objspace_t *)data; - gc_mark(objspace, (VALUE)key); + if (SPECIAL_CONST_P((VALUE)key) || BUILTIN_TYPE((VALUE)key) == T_STRING) { + gc_mark(objspace, (VALUE)key); + } else { + gc_mark_and_pin(objspace, (VALUE)key); + } gc_mark(objspace, (VALUE)value); return ST_CONTINUE; } @@ -4427,6 +4580,25 @@ rb_mark_tbl(st_table *tbl) mark_tbl(&rb_objspace, tbl); } +void +rb_mark_tbl_no_pin(st_table *tbl) +{ + mark_tbl_no_pin(&rb_objspace, tbl); +} + +static void +gc_mark_and_pin_maybe(rb_objspace_t *objspace, VALUE obj) +{ + (void)VALGRIND_MAKE_MEM_DEFINED(&obj, sizeof(obj)); + if (is_pointer_to_heap(objspace, (void *)obj)) { + int type = BUILTIN_TYPE(obj); + if (type != T_MOVED && type != T_ZOMBIE && type != T_NONE) { + gc_pin(objspace, obj); + gc_mark_ptr(objspace, obj); + } + } +} + static void gc_mark_maybe(rb_objspace_t *objspace, VALUE obj) { @@ -4449,7 +4621,7 @@ gc_mark_maybe(rb_objspace_t *objspace, VALUE obj) void rb_gc_mark_maybe(VALUE obj) { - gc_mark_maybe(&rb_objspace, obj); + gc_mark_and_pin_maybe(&rb_objspace, obj); } static inline int @@ -4584,6 +4756,21 @@ gc_mark_ptr(rb_objspace_t *objspace, VALUE obj) } } +static inline void +gc_mark_and_pin(rb_objspace_t *objspace, VALUE obj) +{ + if (!is_markable_object(objspace, obj)) return; + MARK_IN_BITMAP(GET_HEAP_PINNED_BITS(obj), obj); + gc_mark_ptr(objspace, obj); +} + +static inline void +gc_pin(rb_objspace_t *objspace, VALUE obj) +{ + if (!is_markable_object(objspace, obj)) return; + MARK_IN_BITMAP(GET_HEAP_PINNED_BITS(obj), obj); +} + static inline void gc_mark(rb_objspace_t *objspace, VALUE obj) { @@ -4592,11 +4779,17 @@ gc_mark(rb_objspace_t *objspace, VALUE obj) } void -rb_gc_mark(VALUE ptr) +rb_gc_mark_no_pin(VALUE ptr) { gc_mark(&rb_objspace, ptr); } +void +rb_gc_mark(VALUE ptr) +{ + gc_mark_and_pin(&rb_objspace, ptr); +} + /* CAUTION: THIS FUNCTION ENABLE *ONLY BEFORE* SWEEPING. * This function is only for GC_END_MARK timing. */ @@ -4607,6 +4800,12 @@ rb_objspace_marked_object_p(VALUE obj) return RVALUE_MARKED(obj) ? TRUE : FALSE; } +int +rb_objspace_pinned_object_p(VALUE obj) +{ + return RVALUE_PINNED(obj) ? TRUE : FALSE; +} + static inline void gc_mark_set_parent(rb_objspace_t *objspace, VALUE obj) { @@ -4628,9 +4827,9 @@ gc_mark_imemo(rb_objspace_t *objspace, VALUE obj) { const rb_env_t *env = (const rb_env_t *)obj; GC_ASSERT(VM_ENV_ESCAPED_P(env->ep)); - gc_mark_values(objspace, (long)env->env_size, env->env); + gc_mark_and_pin_values(objspace, (long)env->env_size, env->env); VM_ENV_FLAGS_SET(env->ep, VM_ENV_FLAG_WB_REQUIRED); - gc_mark(objspace, (VALUE)rb_vm_env_prev_env(env)); + gc_mark_and_pin(objspace, (VALUE)rb_vm_env_prev_env(env)); gc_mark(objspace, (VALUE)env->iseq); } return; @@ -4715,7 +4914,7 @@ gc_mark_children(rb_objspace_t *objspace, VALUE obj) case T_MODULE: mark_m_tbl(objspace, RCLASS_M_TBL(obj)); if (!RCLASS_EXT(obj)) break; - mark_tbl(objspace, RCLASS_IV_TBL(obj)); + mark_tbl_no_pin(objspace, RCLASS_IV_TBL(obj)); mark_const_tbl(objspace, RCLASS_CONST_TBL(obj)); gc_mark(objspace, RCLASS_SUPER((VALUE)obj)); break; @@ -4848,6 +5047,7 @@ gc_mark_children(rb_objspace_t *objspace, VALUE obj) #if GC_DEBUG rb_gcdebug_print_obj_condition((VALUE)obj); #endif + if (BUILTIN_TYPE(obj) == T_MOVED) rb_bug("rb_gc_mark(): %p is T_MOVED", (void *)obj); if (BUILTIN_TYPE(obj) == T_NONE) rb_bug("rb_gc_mark(): %p is T_NONE", (void *)obj); if (BUILTIN_TYPE(obj) == T_ZOMBIE) rb_bug("rb_gc_mark(): %p is T_ZOMBIE", (void *)obj); rb_bug("rb_gc_mark(): unknown data type 0x%x(%p) %s", @@ -5324,7 +5524,10 @@ verify_internal_consistency_i(void *page_start, void *page_end, size_t stride, v /* count objects */ data->live_object_count++; - rb_objspace_reachable_objects_from(obj, check_children_i, (void *)data); + if (!gc_object_moved_p(objspace, obj)) { + /* moved slots don't have children */ + rb_objspace_reachable_objects_from(obj, check_children_i, (void *)data); + } #if USE_RGENGC /* check health of children */ @@ -6049,6 +6252,7 @@ rgengc_mark_and_rememberset_clear(rb_objspace_t *objspace, rb_heap_t *heap) list_for_each(&heap->pages, page, page_node) { memset(&page->mark_bits[0], 0, HEAP_PAGE_BITMAP_SIZE); + memset(&page->pinned_bits[0], 0, HEAP_PAGE_BITMAP_SIZE); memset(&page->marking_bits[0], 0, HEAP_PAGE_BITMAP_SIZE); memset(&page->uncollectible_bits[0], 0, HEAP_PAGE_BITMAP_SIZE); page->flags.has_uncollectible_shady_objects = FALSE; @@ -6303,7 +6507,7 @@ rb_obj_gc_flags(VALUE obj, ID* flags, size_t max) size_t n = 0; static ID ID_marked; #if USE_RGENGC - static ID ID_wb_protected, ID_old, ID_marking, ID_uncollectible; + static ID ID_wb_protected, ID_old, ID_marking, ID_uncollectible, ID_pinned; #endif if (!ID_marked) { @@ -6314,6 +6518,7 @@ rb_obj_gc_flags(VALUE obj, ID* flags, size_t max) I(old); I(marking); I(uncollectible); + I(pinned); #endif #undef I } @@ -6325,6 +6530,7 @@ rb_obj_gc_flags(VALUE obj, ID* flags, size_t max) if (MARKED_IN_BITMAP(GET_HEAP_MARKING_BITS(obj), obj) && n= 5 + fprintf(stderr, "moving: %s -> ", obj_info(src)); +#endif + + /* Save off bits for current object. */ + marked = rb_objspace_marked_object_p((VALUE)src); + wb_unprotected = RVALUE_WB_UNPROTECTED((VALUE)src); + uncollectible = RVALUE_UNCOLLECTIBLE((VALUE)src); + marking = RVALUE_MARKING((VALUE)src); + + objspace->total_allocated_objects++; + + /* Clear bits for eventual T_MOVED */ + CLEAR_IN_BITMAP(GET_HEAP_MARK_BITS((VALUE)src), (VALUE)src); + CLEAR_IN_BITMAP(GET_HEAP_WB_UNPROTECTED_BITS((VALUE)src), (VALUE)src); + CLEAR_IN_BITMAP(GET_HEAP_UNCOLLECTIBLE_BITS((VALUE)src), (VALUE)src); + CLEAR_IN_BITMAP(GET_HEAP_MARKING_BITS((VALUE)src), (VALUE)src); + + if (FL_TEST(src, FL_EXIVAR)) { + rb_mv_generic_ivar((VALUE)src, (VALUE)dest); + } + + VALUE id; + + /* If the source object's object_id has been seen, we need to update + * the object to object id mapping. */ + if(st_lookup(obj_to_id_tbl, (VALUE)src, &id)) { +#ifdef GC_COMPACT_DEBUG + fprintf(stderr, "Moving insert: %p -> %p\n", src, dest); +#endif + st_delete(obj_to_id_tbl, (st_data_t *)&src, 0); + st_insert(obj_to_id_tbl, (VALUE)dest, id); + st_update(id_to_obj_tbl, (st_data_t)id, update_id_to_obj, (st_data_t)dest); + } + + /* Move the object */ + memcpy(dest, src, sizeof(RVALUE)); + memset(src, 0, sizeof(RVALUE)); + + /* Set bits for object in new location */ + if (marking) { + MARK_IN_BITMAP(GET_HEAP_MARKING_BITS((VALUE)dest), (VALUE)dest); + } else { + CLEAR_IN_BITMAP(GET_HEAP_MARKING_BITS((VALUE)dest), (VALUE)dest); + } + + if (marked) { + MARK_IN_BITMAP(GET_HEAP_MARK_BITS((VALUE)dest), (VALUE)dest); + } else { + CLEAR_IN_BITMAP(GET_HEAP_MARK_BITS((VALUE)dest), (VALUE)dest); + } + + if (wb_unprotected) { + MARK_IN_BITMAP(GET_HEAP_WB_UNPROTECTED_BITS((VALUE)dest), (VALUE)dest); + } else { + CLEAR_IN_BITMAP(GET_HEAP_WB_UNPROTECTED_BITS((VALUE)dest), (VALUE)dest); + } + + if (uncollectible) { + MARK_IN_BITMAP(GET_HEAP_UNCOLLECTIBLE_BITS((VALUE)dest), (VALUE)dest); + } else { + CLEAR_IN_BITMAP(GET_HEAP_UNCOLLECTIBLE_BITS((VALUE)dest), (VALUE)dest); + } + + /* Assign forwarding address */ + src->as.moved.flags = T_MOVED; + src->as.moved.destination = (VALUE)dest; + +#if RGENGC_CHECK_MODE >= 5 + fprintf(stderr, "%s\n", obj_info(dest)); +#endif +} + +struct heap_cursor { + RVALUE *slot; + size_t index; + struct heap_page *page; + rb_objspace_t * objspace; +}; + +static void +advance_cursor(struct heap_cursor *free, struct heap_page **page_list) +{ + rb_objspace_t *objspace = free->objspace; + + if (free->slot == free->page->start + free->page->total_slots - 1) { + free->index++; + free->page = page_list[free->index]; + free->slot = free->page->start; + } else { + free->slot++; + } +} + +static void +retreat_cursor(struct heap_cursor *scan, struct heap_page **page_list) +{ + rb_objspace_t *objspace = scan->objspace; + + if (scan->slot == scan->page->start) { + scan->index--; + scan->page = page_list[scan->index]; + scan->slot = scan->page->start + scan->page->total_slots - 1; + } else { + scan->slot--; + } +} + +static int +not_met(struct heap_cursor *free, struct heap_cursor *scan) +{ + if (free->index < scan->index) + return 1; + + if (free->index > scan->index) + return 0; + + return free->slot < scan->slot; +} + +static void +init_cursors(rb_objspace_t *objspace, struct heap_cursor *free, struct heap_cursor *scan, struct heap_page **page_list) +{ + struct heap_page *page; + page = page_list[0]; + + free->index = 0; + free->page = page; + free->slot = page->start; + free->objspace = objspace; + + page = page_list[heap_allocated_pages - 1]; + scan->index = heap_allocated_pages - 1; + scan->page = page; + scan->slot = page->start + page->total_slots - 1; + scan->objspace = objspace; +} + +int count_pinned(struct heap_page *page) +{ + RVALUE *pstart = page->start; + RVALUE *pend = pstart + page->total_slots; + int pinned = 0; + + VALUE v = (VALUE)pstart; + for(; v != (VALUE)pend; v += sizeof(RVALUE)) { + if (RBASIC(v)->flags && RVALUE_PINNED(v)) { + pinned++; + } + } + + return pinned; +} + +int compare_pinned(const void *left, const void *right) +{ + int left_count = count_pinned(*(struct heap_page * const *)left); + int right_count = count_pinned(*(struct heap_page * const *)right); + return right_count - left_count; +} + +static void +gc_compact_heap(rb_objspace_t *objspace) +{ + struct heap_cursor free_cursor; + struct heap_cursor scan_cursor; + int number_considered; + struct heap_page **page_list; + + memset(objspace->rcompactor.considered_count_table, 0, T_MASK * sizeof(size_t)); + memset(objspace->rcompactor.moved_count_table, 0, T_MASK * sizeof(size_t)); + + page_list = calloc(heap_allocated_pages, sizeof(struct heap_page *)); + memcpy(page_list, heap_pages_sorted, heap_allocated_pages * sizeof(struct heap_page *)); + qsort(page_list, heap_allocated_pages, sizeof(struct heap_page *), compare_pinned); + + init_cursors(objspace, &free_cursor, &scan_cursor, page_list); + + /* Two finger algorithm */ + while (not_met(&free_cursor, &scan_cursor)) { + while(BUILTIN_TYPE(free_cursor.slot) != T_NONE && not_met(&free_cursor, &scan_cursor)) { + advance_cursor(&free_cursor, page_list); + } + + objspace->rcompactor.considered_count_table[BUILTIN_TYPE((VALUE)scan_cursor.slot)]++; + + while(!gc_is_moveable_obj(objspace, (VALUE)scan_cursor.slot) && not_met(&free_cursor, &scan_cursor)) { + retreat_cursor(&scan_cursor, page_list); + objspace->rcompactor.considered_count_table[BUILTIN_TYPE((VALUE)scan_cursor.slot)]++; + } + + if (not_met(&free_cursor, &scan_cursor)) { + objspace->rcompactor.moved_count_table[BUILTIN_TYPE((VALUE)scan_cursor.slot)]++; + gc_move(objspace, (VALUE)scan_cursor.slot, (VALUE)free_cursor.slot); + advance_cursor(&free_cursor, page_list); + retreat_cursor(&scan_cursor, page_list); + } + } + free(page_list); +} + +static void +gc_ref_update_array(rb_objspace_t * objspace, VALUE v) +{ + long i, len; + + if (FL_TEST(v, ELTS_SHARED)) + return; + + len = RARRAY_LEN(v); + if (len > 0) { + VALUE *ptr = (VALUE *)RARRAY_CONST_PTR_TRANSIENT(v); + for(i = 0; i < len; i++) { + UPDATE_IF_MOVED(objspace, ptr[i]); + } + } +} + +static void +gc_ref_update_object(rb_objspace_t * objspace, VALUE v) +{ + uint32_t i, len = ROBJECT_NUMIV(v); + VALUE *ptr = ROBJECT_IVPTR(v); + for (i = 0; i < len; i++) { + UPDATE_IF_MOVED(objspace, ptr[i]); + } +} + +static int +hash_replace_ref(st_data_t *key, st_data_t *value, st_data_t argp, int existing) +{ + rb_objspace_t *objspace; + + if(!SPECIAL_CONST_P((void *)*key) && BUILTIN_TYPE(*key) == T_MOVED) { + *key = (VALUE)RMOVED(*key)->destination; + } + + if(!SPECIAL_CONST_P((void *)*value) && BUILTIN_TYPE(*value) == T_MOVED) { + *value = (VALUE)RMOVED(*value)->destination; + } + + return ST_CONTINUE; +} + +static int +hash_foreach_replace(st_data_t key, st_data_t value, st_data_t argp, int error) +{ + rb_objspace_t *objspace; + + objspace = (rb_objspace_t *)argp; + + if(!SPECIAL_CONST_P((void *)key) && BUILTIN_TYPE(key) == T_MOVED) { + return ST_REPLACE; + } + + if(!SPECIAL_CONST_P((void *)value) && BUILTIN_TYPE(value) == T_MOVED) { + return ST_REPLACE; + } + return ST_CHECK; +} + +static void +gc_update_table_refs(rb_objspace_t * objspace, st_table *ht) +{ + if (st_foreach_with_replace(ht, hash_foreach_replace, hash_replace_ref, (st_data_t)objspace)) { + rb_raise(rb_eRuntimeError, "hash modified during iteration"); + } +} + +void +rb_gc_update_tbl_refs(st_table *ptr) +{ + rb_objspace_t *objspace = &rb_objspace; + gc_update_table_refs(objspace, ptr); +} + +static void +gc_ref_update_hash(rb_objspace_t * objspace, VALUE v) +{ + gc_update_table_refs(objspace, rb_hash_tbl_raw(v)); +} + +void rb_update_st_references(struct st_table *ht) +{ + rb_objspace_t *objspace = &rb_objspace; + gc_update_table_refs(objspace, ht); +} + +static void +gc_ref_update_method_entry(rb_objspace_t *objspace, rb_method_entry_t *me) +{ + rb_method_definition_t *def = me->def; + + UPDATE_IF_MOVED(objspace, me->owner); + UPDATE_IF_MOVED(objspace, me->defined_class); + + if (def) { + switch (def->type) { + case VM_METHOD_TYPE_ISEQ: + if (def->body.iseq.iseqptr) { + TYPED_UPDATE_IF_MOVED(objspace, rb_iseq_t *, def->body.iseq.iseqptr); + } + TYPED_UPDATE_IF_MOVED(objspace, rb_cref_t *, def->body.iseq.cref); + break; + case VM_METHOD_TYPE_ATTRSET: + case VM_METHOD_TYPE_IVAR: + UPDATE_IF_MOVED(objspace, def->body.attr.location); + break; + case VM_METHOD_TYPE_BMETHOD: + UPDATE_IF_MOVED(objspace, def->body.bmethod.proc); + break; + case VM_METHOD_TYPE_ALIAS: + TYPED_UPDATE_IF_MOVED(objspace, struct rb_method_entry_struct *, def->body.alias.original_me); + return; + case VM_METHOD_TYPE_REFINED: + TYPED_UPDATE_IF_MOVED(objspace, struct rb_method_entry_struct *, def->body.refined.orig_me); + UPDATE_IF_MOVED(objspace, def->body.refined.owner); + break; + case VM_METHOD_TYPE_CFUNC: + case VM_METHOD_TYPE_ZSUPER: + case VM_METHOD_TYPE_MISSING: + case VM_METHOD_TYPE_OPTIMIZED: + case VM_METHOD_TYPE_UNDEF: + case VM_METHOD_TYPE_NOTIMPLEMENTED: + break; + } + } +} + +static void +gc_ref_update_imemo(rb_objspace_t *objspace, VALUE obj) +{ + switch(imemo_type(obj)) { + case imemo_env: + { + rb_env_t *env = (rb_env_t *)obj; + TYPED_UPDATE_IF_MOVED(objspace, rb_iseq_t *, env->iseq); + } + break; + break; + case imemo_cref: + UPDATE_IF_MOVED(objspace, RANY(obj)->as.imemo.cref.klass); + TYPED_UPDATE_IF_MOVED(objspace, struct rb_cref_struct *, RANY(obj)->as.imemo.cref.next); + UPDATE_IF_MOVED(objspace, RANY(obj)->as.imemo.cref.refinements); + break; + case imemo_svar: + UPDATE_IF_MOVED(objspace, RANY(obj)->as.imemo.svar.cref_or_me); + UPDATE_IF_MOVED(objspace, RANY(obj)->as.imemo.svar.lastline); + UPDATE_IF_MOVED(objspace, RANY(obj)->as.imemo.svar.backref); + UPDATE_IF_MOVED(objspace, RANY(obj)->as.imemo.svar.others); + break; + case imemo_throw_data: + UPDATE_IF_MOVED(objspace, RANY(obj)->as.imemo.throw_data.throw_obj); + break; + case imemo_ifunc: + if (is_pointer_to_heap(objspace, RANY(obj)->as.imemo.ifunc.data)) { + TYPED_UPDATE_IF_MOVED(objspace, void *, RANY(obj)->as.imemo.ifunc.data); + } + break; + case imemo_memo: + UPDATE_IF_MOVED(objspace, RANY(obj)->as.imemo.memo.v1); + UPDATE_IF_MOVED(objspace, RANY(obj)->as.imemo.memo.v2); + if (is_pointer_to_heap(objspace, (void *)RANY(obj)->as.imemo.memo.u3.value)) { + UPDATE_IF_MOVED(objspace, RANY(obj)->as.imemo.memo.u3.value); + } + break; + case imemo_ment: + gc_ref_update_method_entry(objspace, &RANY(obj)->as.imemo.ment); + break; + case imemo_iseq: + rb_iseq_update_references((rb_iseq_t *)obj); + break; + case imemo_ast: + case imemo_parser_strterm: + case imemo_tmpbuf: + break; + default: + rb_bug("not reachable %d", imemo_type(obj)); + break; + } +} + +static enum rb_id_table_iterator_result +check_id_table_move(ID id, VALUE value, void *data) +{ + if(!SPECIAL_CONST_P((void *)value) && BUILTIN_TYPE(value) == T_MOVED) { + return ID_TABLE_REPLACE; + } + + return ID_TABLE_CONTINUE; +} + +/* Returns the new location of an object, if it moved. Otherwise returns + * the existing location. */ +VALUE +rb_gc_new_location(VALUE value) +{ + if(!SPECIAL_CONST_P((void *)value) && BUILTIN_TYPE(value) == T_MOVED) { + return (VALUE)RMOVED(value)->destination; + } else { + return value; + } +} + +static enum rb_id_table_iterator_result +update_id_table(ID *key, VALUE * value, void *data, int existing) +{ + if(!SPECIAL_CONST_P((void *)*value) && BUILTIN_TYPE(*value) == T_MOVED) { + *value = (VALUE)RMOVED(*value)->destination; + } + + return ID_TABLE_CONTINUE; +} + +static void +update_m_tbl(rb_objspace_t *objspace, struct rb_id_table *tbl) +{ + if (tbl) { + rb_id_table_foreach_with_replace(tbl, check_id_table_move, update_id_table, objspace); + } +} + +static enum rb_id_table_iterator_result +update_const_table(VALUE value, void *data) +{ + rb_const_entry_t *ce = (rb_const_entry_t *)value; + + if(!SPECIAL_CONST_P((void *)ce->value) && BUILTIN_TYPE(ce->value) == T_MOVED) { + ce->value = (VALUE)RMOVED(ce->value)->destination; + } + + if(!SPECIAL_CONST_P((void *)ce->file) && BUILTIN_TYPE(ce->file) == T_MOVED) { + ce->file = (VALUE)RMOVED(ce->file)->destination; + } + + return ID_TABLE_CONTINUE; +} + +static void +update_const_tbl(rb_objspace_t *objspace, struct rb_id_table *tbl) +{ + if (!tbl) return; + rb_id_table_foreach_values(tbl, update_const_table, objspace); +} + +static void +update_subclass_entries(rb_objspace_t *objspace, rb_subclass_entry_t *entry) +{ + while (entry) { + UPDATE_IF_MOVED(objspace, entry->klass); + entry = entry->next; + } +} + +static void +update_class_ext(rb_objspace_t *objspace, rb_classext_t *ext) +{ + UPDATE_IF_MOVED(objspace, ext->origin_); + UPDATE_IF_MOVED(objspace, ext->refined_class); + update_subclass_entries(objspace, ext->subclasses); +} + +static void +gc_update_object_references(rb_objspace_t *objspace, VALUE obj) +{ + RVALUE *any = RANY(obj); + +#if RGENGC_CHECK_MODE >= 5 + fprintf(stderr, "update-refs: %s -> ", obj_info(obj)); +#endif + + switch(BUILTIN_TYPE(obj)) { + case T_CLASS: + case T_MODULE: + update_m_tbl(objspace, RCLASS_M_TBL(obj)); + if (!RCLASS_EXT(obj)) break; + if (RCLASS_IV_TBL(obj)) { + gc_update_table_refs(objspace, RCLASS_IV_TBL(obj)); + } + update_class_ext(objspace, RCLASS_EXT(obj)); + update_const_tbl(objspace, RCLASS_CONST_TBL(obj)); + UPDATE_IF_MOVED(objspace, RCLASS(obj)->super); + break; + + case T_ICLASS: + if (FL_TEST(obj, RICLASS_IS_ORIGIN)) { + update_m_tbl(objspace, RCLASS_M_TBL(obj)); + } + if (!RCLASS_EXT(obj)) break; + if (RCLASS_IV_TBL(obj)) { + gc_update_table_refs(objspace, RCLASS_IV_TBL(obj)); + } + update_class_ext(objspace, RCLASS_EXT(obj)); + update_m_tbl(objspace, RCLASS_CALLABLE_M_TBL(obj)); + UPDATE_IF_MOVED(objspace, RCLASS(obj)->super); + break; + + case T_IMEMO: + gc_ref_update_imemo(objspace, obj); + break; + + case T_NIL: + case T_FIXNUM: + case T_NODE: + case T_MOVED: + case T_NONE: + /* These can't move */ + return; + + case T_ARRAY: + if (FL_TEST(obj, ELTS_SHARED)) { + UPDATE_IF_MOVED(objspace, any->as.array.as.heap.aux.shared); + } else { + gc_ref_update_array(objspace, obj); + } + break; + + case T_HASH: + gc_ref_update_hash(objspace, obj); + UPDATE_IF_MOVED(objspace, any->as.hash.ifnone); + break; + + case T_STRING: + if (STR_SHARED_P(obj)) { + UPDATE_IF_MOVED(objspace, any->as.string.as.heap.aux.shared); + } + case T_DATA: + /* Call the compaction callback, if it exists */ + { + void *const ptr = DATA_PTR(obj); + if (ptr) { + if (RTYPEDDATA_P(obj)) { + RUBY_DATA_FUNC compact_func = any->as.typeddata.type->function.dcompact; + if (compact_func) (*compact_func)(ptr); + } + } + } + break; + + case T_OBJECT: + gc_ref_update_object(objspace, obj); + break; + + case T_FILE: + if (any->as.file.fptr) { + UPDATE_IF_MOVED(objspace, any->as.file.fptr->pathv); + UPDATE_IF_MOVED(objspace, any->as.file.fptr->tied_io_for_writing); + UPDATE_IF_MOVED(objspace, any->as.file.fptr->writeconv_asciicompat); + UPDATE_IF_MOVED(objspace, any->as.file.fptr->writeconv_pre_ecopts); + UPDATE_IF_MOVED(objspace, any->as.file.fptr->encs.ecopts); + UPDATE_IF_MOVED(objspace, any->as.file.fptr->write_lock); + } + break; + case T_REGEXP: + UPDATE_IF_MOVED(objspace, any->as.regexp.src); + break; + + case T_SYMBOL: + if (DYNAMIC_SYM_P((VALUE)any)) { + UPDATE_IF_MOVED(objspace, RSYMBOL(any)->fstr); + } + break; + + case T_FLOAT: + case T_BIGNUM: + break; + + case T_MATCH: + UPDATE_IF_MOVED(objspace, any->as.match.regexp); + + if (any->as.match.str) { + UPDATE_IF_MOVED(objspace, any->as.match.str); + } + break; + + case T_RATIONAL: + UPDATE_IF_MOVED(objspace, any->as.rational.num); + UPDATE_IF_MOVED(objspace, any->as.rational.den); + break; + + case T_COMPLEX: + UPDATE_IF_MOVED(objspace, any->as.complex.real); + UPDATE_IF_MOVED(objspace, any->as.complex.imag); + + break; + + case T_STRUCT: + { + long i, len = RSTRUCT_LEN(obj); + VALUE *ptr = (VALUE *)RSTRUCT_CONST_PTR(obj); + + for(i = 0; i < len; i++) { + UPDATE_IF_MOVED(objspace, ptr[i]); + } + } + break; + default: +#if GC_DEBUG + rb_gcdebug_print_obj_condition((VALUE)obj); + rb_obj_info_dump(obj); + rb_bug("unreachable"); +#endif + break; + + } + + UPDATE_IF_MOVED(objspace, RBASIC(obj)->klass); + +#if RGENGC_CHECK_MODE >= 5 + fprintf(stderr, "%s\n", obj_info(obj)); +#endif +} +static int +gc_ref_update(void *vstart, void *vend, size_t stride, void * data) +{ + rb_objspace_t * objspace; + struct heap_page *page; + short free_slots = 0; + + VALUE v = (VALUE)vstart; + objspace = (rb_objspace_t *)data; + page = GET_HEAP_PAGE(v); + page->freelist = NULL; + page->flags.has_uncollectible_shady_objects = FALSE; + + /* For each object on the page */ + for(; v != (VALUE)vend; v += stride) { + if (SPECIAL_CONST_P(v)) { + } else if (BUILTIN_TYPE(v) == T_NONE) { + heap_page_add_freeobj(objspace, page, v); + free_slots++; + } else { + if (RVALUE_WB_UNPROTECTED(v)) { + page->flags.has_uncollectible_shady_objects = TRUE; + } + gc_update_object_references(objspace, v); + } + } + + page->free_slots = free_slots; + return 0; +} + +extern rb_symbols_t global_symbols; + +static void +gc_update_references(rb_objspace_t * objspace) +{ + rb_execution_context_t *ec = GET_EC(); + rb_vm_t *vm = rb_ec_vm_ptr(ec); + + rb_objspace_each_objects_without_setup(gc_ref_update, objspace); + rb_vm_update_references(vm); + gc_update_table_refs(objspace, global_symbols.str_sym); +} + +static VALUE type_sym(int type); + +static VALUE +rb_gc_compact_stats(VALUE mod) +{ + int i; + + rb_objspace_t *objspace = &rb_objspace; + VALUE h = rb_hash_new(); + VALUE considered = rb_hash_new(); + VALUE moved = rb_hash_new(); + + for (i=0; ircompactor.considered_count_table[i])); + } + + for (i=0; ircompactor.moved_count_table[i])); + } + + rb_hash_aset(h, ID2SYM(rb_intern("considered")), considered); + rb_hash_aset(h, ID2SYM(rb_intern("moved")), moved); + + return h; +} + +static VALUE +rb_gc_compact(VALUE mod) +{ + rb_objspace_t *objspace = &rb_objspace; + + /* Ensure objects are pinned */ + rb_gc(); + + /* Drain interrupts so that THEAP has a chance to evacuate before + * any possible compaction. */ + rb_thread_execute_interrupts(rb_thread_current()); + + gc_compact_heap(objspace); + + gc_update_references(objspace); + + rb_clear_method_cache_by_class(rb_cObject); + rb_clear_constant_cache(); + + /* GC after compaction to eliminate T_MOVED */ + rb_gc(); + + return rb_gc_compact_stats(mod); +} + +/* + * call-seq: + * GC.verify_compaction_references -> nil + * + * Verify compaction reference consistency. + * + * This method is implementation specific. During compaction, objects that + * were moved are replaced with T_MOVED objects. No object should have a + * reference to a T_MOVED object after compaction. + * + * This function doubles the heap to ensure room to move all objects, + * compacts the heap to make sure everything moves, updates all references, + * then performs a full GC. If any object contains a reference to a T_MOVED + * object, that object should be pushed on the mark stack, and will + * make a SEGV. + */ +static VALUE +gc_verify_compaction_references(VALUE dummy) +{ + rb_objspace_t *objspace = &rb_objspace; + + /* Double heap size */ + heap_add_pages(objspace, heap_eden, heap_allocated_pages); + + /* Ensure objects are pinned */ + rb_gc(); + + /* Compact heap */ + gc_compact_heap(objspace); + + gc_update_references(objspace); + + rb_clear_method_cache_by_class(rb_cObject); + rb_clear_constant_cache(); + + /* GC after compaction to eliminate T_MOVED */ + rb_gc(); + gc_verify_internal_consistency(Qnil); + + return rb_gc_compact_stats(dummy); +} + VALUE rb_gc_start(void) { @@ -7110,6 +8130,7 @@ enum gc_stat_sym { #if USE_RGENGC gc_stat_sym_minor_gc_count, gc_stat_sym_major_gc_count, + gc_stat_sym_object_id_collisions, gc_stat_sym_remembered_wb_unprotected_objects, gc_stat_sym_remembered_wb_unprotected_objects_limit, gc_stat_sym_old_objects, @@ -7185,6 +8206,7 @@ setup_gc_stat_symbols(void) S(malloc_increase_bytes_limit); #if USE_RGENGC S(minor_gc_count); + S(object_id_collisions); S(major_gc_count); S(remembered_wb_unprotected_objects); S(remembered_wb_unprotected_objects_limit); @@ -7357,6 +8379,7 @@ gc_stat_internal(VALUE hash_or_sym) SET(malloc_increase_bytes_limit, malloc_limit); #if USE_RGENGC SET(minor_gc_count, objspace->profile.minor_gc_count); + SET(object_id_collisions, objspace->profile.object_id_collisions); SET(major_gc_count, objspace->profile.major_gc_count); SET(remembered_wb_unprotected_objects, objspace->rgengc.uncollectible_wb_unprotected_objects); SET(remembered_wb_unprotected_objects_limit, objspace->rgengc.uncollectible_wb_unprotected_objects_limit); @@ -9613,7 +10636,7 @@ method_type_name(rb_method_type_t type) static void rb_raw_iseq_info(char *buff, const int buff_size, const rb_iseq_t *iseq) { - if (iseq->body && iseq->body->location.label) { + if (iseq->body && iseq->body->location.label && !RB_TYPE_P(iseq->body->location.pathobj, T_MOVED)) { VALUE path = rb_iseq_path(iseq); VALUE n = iseq->body->location.first_lineno; snprintf(buff, buff_size, "%s %s@%s:%d", buff, @@ -9644,10 +10667,11 @@ rb_raw_obj_info(char *buff, const int buff_size, VALUE obj) const int age = RVALUE_FLAGS_AGE(RBASIC(obj)->flags); if (is_pointer_to_heap(&rb_objspace, (void *)obj)) { - snprintf(buff, buff_size, "%p [%d%s%s%s%s] %s", + snprintf(buff, buff_size, "%p [%d%s%s%s%s%s] %s", (void *)obj, age, C(RVALUE_UNCOLLECTIBLE_BITMAP(obj), "L"), C(RVALUE_MARK_BITMAP(obj), "M"), + C(RVALUE_PIN_BITMAP(obj), "P"), C(RVALUE_MARKING_BITMAP(obj), "R"), C(RVALUE_WB_UNPROTECTED_BITMAP(obj), "U"), obj_type_name(obj)); @@ -9672,10 +10696,12 @@ rb_raw_obj_info(char *buff, const int buff_size, VALUE obj) snprintf(buff, buff_size, "%s (temporary internal)", buff); } else { + if (RTEST(RBASIC(obj)->klass)) { VALUE class_path = rb_class_path_cached(RBASIC(obj)->klass); if (!NIL_P(class_path)) { snprintf(buff, buff_size, "%s (%s)", buff, RSTRING_PTR(class_path)); } + } } #if GC_DEBUG @@ -9872,6 +10898,11 @@ rb_gcdebug_print_obj_condition(VALUE obj) fprintf(stderr, "created at: %s:%d\n", RANY(obj)->file, RANY(obj)->line); + if (BUILTIN_TYPE(obj) == T_MOVED) { + fprintf(stderr, "moved?: true\n"); + } else { + fprintf(stderr, "moved?: false\n"); + } if (is_pointer_to_heap(objspace, (void *)obj)) { fprintf(stderr, "pointer to heap?: true\n"); } @@ -9881,6 +10912,7 @@ rb_gcdebug_print_obj_condition(VALUE obj) } fprintf(stderr, "marked? : %s\n", MARKED_IN_BITMAP(GET_HEAP_MARK_BITS(obj), obj) ? "true" : "false"); + fprintf(stderr, "pinned? : %s\n", MARKED_IN_BITMAP(GET_HEAP_PINNED_BITS(obj), obj) ? "true" : "false"); #if USE_RGENGC fprintf(stderr, "age? : %d\n", RVALUE_AGE(obj)); fprintf(stderr, "old? : %s\n", RVALUE_OLD_P(obj) ? "true" : "false"); @@ -10030,6 +11062,9 @@ Init_GC(void) VALUE rb_mProfiler; VALUE gc_constants; + id_to_obj_tbl = st_init_numtable(); + obj_to_id_tbl = rb_init_identtable(); + rb_mGC = rb_define_module("GC"); rb_define_singleton_method(rb_mGC, "start", gc_start_internal, -1); rb_define_singleton_method(rb_mGC, "enable", rb_gc_enable, 0); @@ -10039,6 +11074,7 @@ Init_GC(void) rb_define_singleton_method(rb_mGC, "count", gc_count, 0); rb_define_singleton_method(rb_mGC, "stat", gc_stat, -1); rb_define_singleton_method(rb_mGC, "latest_gc_info", gc_latest_gc_info, -1); + rb_define_singleton_method(rb_mGC, "compact", rb_gc_compact, 0); rb_define_method(rb_mGC, "garbage_collect", gc_start_internal, -1); gc_constants = rb_hash_new(); @@ -10099,6 +11135,7 @@ Init_GC(void) /* internal methods */ rb_define_singleton_method(rb_mGC, "verify_internal_consistency", gc_verify_internal_consistency, 0); + rb_define_singleton_method(rb_mGC, "verify_compaction_references", gc_verify_compaction_references, 0); rb_define_singleton_method(rb_mGC, "verify_transient_heap_internal_consistency", gc_verify_transient_heap_internal_consistency, 0); #if MALLOC_ALLOCATED_SIZE rb_define_singleton_method(rb_mGC, "malloc_allocated_size", gc_malloc_allocated_size, 0); diff --git a/gc.h b/gc.h index 2c91e06620..727890181a 100644 --- a/gc.h +++ b/gc.h @@ -57,6 +57,10 @@ rb_gc_debug_body(const char *mode, const char *msg, int st, void *ptr) #define RUBY_GC_INFO if(0)printf #endif +#define RUBY_MARK_NO_PIN_UNLESS_NULL(ptr) do { \ + VALUE markobj = (ptr); \ + if (RTEST(markobj)) {rb_gc_mark_no_pin(markobj);} \ +} while (0) #define RUBY_MARK_UNLESS_NULL(ptr) do { \ VALUE markobj = (ptr); \ if (RTEST(markobj)) {rb_gc_mark(markobj);} \ diff --git a/hash.c b/hash.c index 54b935c031..adda42166c 100644 --- a/hash.c +++ b/hash.c @@ -797,6 +797,7 @@ ar_foreach(VALUE hash, int (*func)(ANYARGS), st_data_t arg) case ST_CONTINUE: break; case ST_CHECK: + case ST_REPLACE: case ST_STOP: return 0; case ST_DELETE: @@ -845,6 +846,7 @@ ar_foreach_check(VALUE hash, int (*func)(ANYARGS), st_data_t arg, case ST_CONTINUE: break; case ST_STOP: + case ST_REPLACE: return 0; case ST_DELETE: { if (!ar_empty_entry(cur_entry)) { diff --git a/id_table.c b/id_table.c index 74c9e756a0..d315664eac 100644 --- a/id_table.c +++ b/id_table.c @@ -266,6 +266,27 @@ rb_id_table_delete(struct rb_id_table *tbl, ID id) return hash_delete_index(tbl, index); } +void +rb_id_table_foreach_with_replace(struct rb_id_table *tbl, rb_id_table_foreach_func_t *func, rb_id_table_update_callback_func_t *replace, void *data) +{ + int i, capa = tbl->capa; + + for (i=0; iitems[i].val, data); + assert(key != 0); + + if (ret == ID_TABLE_REPLACE) { + VALUE val = tbl->items[i].val; + ret = (*replace)(Qundef, &val, data, TRUE); + tbl->items[i].val = val; + } else if (ret == ID_TABLE_STOP) + return; + } + } +} + void rb_id_table_foreach(struct rb_id_table *tbl, rb_id_table_foreach_func_t *func, void *data) { diff --git a/id_table.h b/id_table.h index b10b4ac164..abd9eb5f38 100644 --- a/id_table.h +++ b/id_table.h @@ -9,6 +9,7 @@ enum rb_id_table_iterator_result { ID_TABLE_CONTINUE = ST_CONTINUE, ID_TABLE_STOP = ST_STOP, ID_TABLE_DELETE = ST_DELETE, + ID_TABLE_REPLACE = ST_REPLACE, ID_TABLE_ITERATOR_RESULT_END }; @@ -23,9 +24,11 @@ int rb_id_table_insert(struct rb_id_table *tbl, ID id, VALUE val); int rb_id_table_lookup(struct rb_id_table *tbl, ID id, VALUE *valp); int rb_id_table_delete(struct rb_id_table *tbl, ID id); +typedef enum rb_id_table_iterator_result rb_id_table_update_callback_func_t(ID *id, VALUE *val, void *data, int existing); typedef enum rb_id_table_iterator_result rb_id_table_foreach_func_t(ID id, VALUE val, void *data); typedef enum rb_id_table_iterator_result rb_id_table_foreach_values_func_t(VALUE val, void *data); void rb_id_table_foreach(struct rb_id_table *tbl, rb_id_table_foreach_func_t *func, void *data); +void rb_id_table_foreach_with_replace(struct rb_id_table *tbl, rb_id_table_foreach_func_t *func, rb_id_table_update_callback_func_t *replace, void *data); void rb_id_table_foreach_values(struct rb_id_table *tbl, rb_id_table_foreach_values_func_t *func, void *data); #endif /* RUBY_ID_TABLE_H */ diff --git a/include/ruby/intern.h b/include/ruby/intern.h index 17aafd7f8e..192347c8d5 100644 --- a/include/ruby/intern.h +++ b/include/ruby/intern.h @@ -507,10 +507,15 @@ COLDFUNC NORETURN(void rb_memerror(void)); PUREFUNC(int rb_during_gc(void)); void rb_gc_mark_locations(const VALUE*, const VALUE*); void rb_mark_tbl(struct st_table*); +void rb_mark_tbl_no_pin(struct st_table*); +void rb_gc_update_tbl_refs(st_table *ptr); void rb_mark_set(struct st_table*); void rb_mark_hash(struct st_table*); +void rb_update_st_references(struct st_table *ht); void rb_gc_mark_maybe(VALUE); void rb_gc_mark(VALUE); +void rb_gc_mark_no_pin(VALUE); +VALUE rb_gc_new_location(VALUE); void rb_gc_force_recycle(VALUE); void rb_gc(void); void rb_gc_copy_finalizer(VALUE,VALUE); diff --git a/include/ruby/ruby.h b/include/ruby/ruby.h index 11387b540a..1678d3ec3e 100644 --- a/include/ruby/ruby.h +++ b/include/ruby/ruby.h @@ -512,6 +512,7 @@ enum ruby_value_type { RUBY_T_NODE = 0x1b, RUBY_T_ICLASS = 0x1c, RUBY_T_ZOMBIE = 0x1d, + RUBY_T_MOVED = 0x1e, RUBY_T_MASK = 0x1f }; @@ -542,6 +543,7 @@ enum ruby_value_type { #define T_UNDEF RUBY_T_UNDEF #define T_NODE RUBY_T_NODE #define T_ZOMBIE RUBY_T_ZOMBIE +#define T_MOVED RUBY_T_MOVED #define T_MASK RUBY_T_MASK #define RB_BUILTIN_TYPE(x) (int)(((struct RBasic*)(x))->flags & RUBY_T_MASK) @@ -881,9 +883,14 @@ enum ruby_fl_type { RUBY_FL_SINGLETON = RUBY_FL_USER0 }; +struct RMoved { + VALUE flags; + VALUE destination; +}; + struct RUBY_ALIGNAS(SIZEOF_VALUE) RBasic { VALUE flags; - const VALUE klass; + VALUE klass; }; VALUE rb_obj_hide(VALUE obj); @@ -1105,7 +1112,7 @@ struct RArray { struct RRegexp { struct RBasic basic; struct re_pattern_buffer *ptr; - const VALUE src; + VALUE src; unsigned long usecnt; }; #define RREGEXP_PTR(r) (RREGEXP(r)->ptr) @@ -1144,7 +1151,8 @@ struct rb_data_type_struct { void (*dmark)(void*); void (*dfree)(void*); size_t (*dsize)(const void *); - void *reserved[2]; /* For future extension. + void (*dcompact)(void*); + void *reserved[1]; /* For future extension. This array *must* be filled with ZERO. */ } function; const rb_data_type_t *parent; @@ -1255,6 +1263,7 @@ int rb_big_sign(VALUE); #define RBIGNUM_NEGATIVE_P(b) (RBIGNUM_SIGN(b)==0) #define R_CAST(st) (struct st*) +#define RMOVED(obj) (R_CAST(RMoved)(obj)) #define RBASIC(obj) (R_CAST(RBasic)(obj)) #define ROBJECT(obj) (R_CAST(RObject)(obj)) #define RCLASS(obj) (R_CAST(RClass)(obj)) diff --git a/include/ruby/st.h b/include/ruby/st.h index 149e0ebaef..a7eb0c6d7c 100644 --- a/include/ruby/st.h +++ b/include/ruby/st.h @@ -96,7 +96,7 @@ struct st_table { #define st_is_member(table,key) st_lookup((table),(key),(st_data_t *)0) -enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK}; +enum st_retval {ST_CONTINUE, ST_STOP, ST_DELETE, ST_CHECK, ST_REPLACE}; st_table *st_init_table(const struct st_hash_type *); st_table *st_init_table_with_size(const struct st_hash_type *, st_index_t); @@ -118,6 +118,7 @@ typedef int st_update_callback_func(st_data_t *key, st_data_t *value, st_data_t * results of hash() are same and compare() returns 0, otherwise the * behavior is undefined */ int st_update(st_table *table, st_data_t key, st_update_callback_func *func, st_data_t arg); +int st_foreach_with_replace(st_table *tab, int (*func)(ANYARGS), st_update_callback_func *replace, st_data_t arg); int st_foreach(st_table *, int (*)(ANYARGS), st_data_t); int st_foreach_check(st_table *, int (*)(ANYARGS), st_data_t, st_data_t); st_index_t st_keys(st_table *table, st_data_t *keys, st_index_t size); diff --git a/internal.h b/internal.h index 50a20a2bf6..7a0408e1e6 100644 --- a/internal.h +++ b/internal.h @@ -721,8 +721,8 @@ struct RBignum { struct RRational { struct RBasic basic; - const VALUE num; - const VALUE den; + VALUE num; + VALUE den; }; #define RRATIONAL(obj) (R_CAST(RRational)(obj)) @@ -738,8 +738,8 @@ struct RFloat { struct RComplex { struct RBasic basic; - const VALUE real; - const VALUE imag; + VALUE real; + VALUE imag; }; #define RCOMPLEX(obj) (R_CAST(RComplex)(obj)) @@ -806,8 +806,8 @@ struct RHash { st_table *st; struct ar_table_struct *ar; /* possibly 0 */ } as; - const int iter_lev; - const VALUE ifnone; + int iter_lev; + VALUE ifnone; }; #ifdef RHASH_ITER_LEV @@ -937,7 +937,7 @@ struct rb_classext_struct { */ rb_subclass_entry_t **module_subclasses; rb_serial_t class_serial; - const VALUE origin_; + VALUE origin_; VALUE refined_class; rb_alloc_func_t allocator; }; @@ -1055,10 +1055,10 @@ imemo_type_p(VALUE imemo, enum imemo_type imemo_type) /*! SVAR (Special VARiable) */ struct vm_svar { VALUE flags; - const VALUE cref_or_me; /*!< class reference or rb_method_entry_t */ - const VALUE lastline; - const VALUE backref; - const VALUE others; + VALUE cref_or_me; /*!< class reference or rb_method_entry_t */ + VALUE lastline; + VALUE backref; + VALUE others; }; @@ -1068,7 +1068,7 @@ struct vm_svar { struct vm_throw_data { VALUE flags; VALUE reserved; - const VALUE throw_obj; + VALUE throw_obj; const struct rb_control_frame_struct *catch_frame; VALUE throw_state; }; @@ -1091,7 +1091,7 @@ struct vm_ifunc { VALUE flags; VALUE reserved; VALUE (*func)(ANYARGS); - const void *data; + void *data; struct vm_ifunc_argc argc; }; @@ -1142,12 +1142,12 @@ void rb_strterm_mark(VALUE obj); struct MEMO { VALUE flags; VALUE reserved; - const VALUE v1; - const VALUE v2; + VALUE v1; + VALUE v2; union { long cnt; long state; - const VALUE value; + VALUE value; VALUE (*func)(ANYARGS); } u3; }; @@ -2314,6 +2314,7 @@ extern unsigned long ruby_scan_digits(const char *str, ssize_t len, int base, si /* variable.c (export) */ void rb_mark_generic_ivar(VALUE); +void rb_mv_generic_ivar(VALUE src, VALUE dst); VALUE rb_const_missing(VALUE klass, VALUE name); int rb_class_ivar_set(VALUE klass, ID vid, VALUE value); st_table *rb_st_copy(VALUE obj, struct st_table *orig_tbl); @@ -2325,9 +2326,10 @@ VALUE rb_wb_unprotected_newobj_of(VALUE, VALUE); size_t rb_obj_memsize_of(VALUE); void rb_gc_verify_internal_consistency(void); -#define RB_OBJ_GC_FLAGS_MAX 5 +#define RB_OBJ_GC_FLAGS_MAX 6 size_t rb_obj_gc_flags(VALUE, ID[], size_t); void rb_gc_mark_values(long n, const VALUE *values); +void rb_gc_mark_stack_values(long n, const VALUE *values); #if IMEMO_DEBUG VALUE rb_imemo_new_debug(enum imemo_type type, VALUE v1, VALUE v2, VALUE v3, VALUE v0, const char *file, int line); diff --git a/iseq.c b/iseq.c index 864e92645c..46860a99e5 100644 --- a/iseq.c +++ b/iseq.c @@ -136,11 +136,11 @@ rb_vm_insn_null_translator(const void *addr) return (VALUE)addr; } -typedef void iseq_value_itr_t(void *ctx, VALUE obj); +typedef VALUE iseq_value_itr_t(void *ctx, VALUE obj); typedef VALUE rb_vm_insns_translator_t(const void *addr); static int -iseq_extract_values(const VALUE *code, size_t pos, iseq_value_itr_t * func, void *data, rb_vm_insns_translator_t * translator) +iseq_extract_values(VALUE *code, size_t pos, iseq_value_itr_t * func, void *data, rb_vm_insns_translator_t * translator) { VALUE insn = translator((void *)code[pos]); int len = insn_len(insn); @@ -156,7 +156,10 @@ iseq_extract_values(const VALUE *code, size_t pos, iseq_value_itr_t * func, void { VALUE op = code[pos + op_no + 1]; if (!SPECIAL_CONST_P(op)) { - func(data, op); + VALUE newop = func(data, op); + if (newop != op) { + code[pos + op_no + 1] = newop; + } } break; } @@ -164,7 +167,10 @@ iseq_extract_values(const VALUE *code, size_t pos, iseq_value_itr_t * func, void { union iseq_inline_storage_entry *const is = (union iseq_inline_storage_entry *)code[pos + op_no + 1]; if (is->once.value) { - func(data, is->once.value); + VALUE nv = func(data, is->once.value); + if (is->once.value != nv) { + is->once.value = nv; + } } break; } @@ -180,7 +186,7 @@ static void rb_iseq_each_value(const rb_iseq_t *iseq, iseq_value_itr_t * func, void *data) { unsigned int size; - const VALUE *code; + VALUE *code; size_t n; rb_vm_insns_translator_t * translator; const struct rb_iseq_constant_body *const body = iseq->body; @@ -204,10 +210,65 @@ rb_iseq_each_value(const rb_iseq_t *iseq, iseq_value_itr_t * func, void *data) } } -static void +static VALUE +update_each_insn_value(void *ctx, VALUE obj) +{ + return rb_gc_new_location(obj); +} + +void +rb_iseq_update_references(rb_iseq_t *iseq) +{ + if (iseq->body) { + struct rb_iseq_constant_body *body = iseq->body; + + body->variable.coverage = rb_gc_new_location(body->variable.coverage); + body->variable.pc2branchindex = rb_gc_new_location(body->variable.pc2branchindex); + body->location.label = rb_gc_new_location(body->location.label); + body->location.base_label = rb_gc_new_location(body->location.base_label); + body->location.pathobj = rb_gc_new_location(body->location.pathobj); + if (body->local_iseq) { + body->local_iseq = (struct rb_iseq_struct *)rb_gc_new_location((VALUE)body->local_iseq); + } + if (body->parent_iseq) { + body->parent_iseq = (struct rb_iseq_struct *)rb_gc_new_location((VALUE)body->parent_iseq); + } + if(FL_TEST(iseq, ISEQ_MARKABLE_ISEQ)) { + rb_iseq_each_value(iseq, update_each_insn_value, NULL); + } + + if (body->param.flags.has_kw && ISEQ_COMPILE_DATA(iseq) == NULL) { + int i, j; + + i = body->param.keyword->required_num; + + for (j = 0; i < body->param.keyword->num; i++, j++) { + VALUE obj = body->param.keyword->default_values[j]; + if (obj != Qundef) { + body->param.keyword->default_values[j] = rb_gc_new_location(obj); + } + } + } + + if (body->catch_table) { + const struct iseq_catch_table *table = body->catch_table; + unsigned int i; + for(i = 0; i < table->size; i++) { + struct iseq_catch_table_entry *entry; + entry = &table->entries[i]; + if (entry->iseq) { + entry->iseq = (rb_iseq_t *)rb_gc_new_location((VALUE)entry->iseq); + } + } + } + } +} + +static VALUE each_insn_value(void *ctx, VALUE obj) { - rb_gc_mark(obj); + rb_gc_mark_no_pin(obj); + return obj; } void @@ -224,12 +285,12 @@ rb_iseq_mark(const rb_iseq_t *iseq) rb_iseq_each_value(iseq, each_insn_value, NULL); } - rb_gc_mark(body->variable.coverage); - rb_gc_mark(body->variable.pc2branchindex); - rb_gc_mark(body->location.label); - rb_gc_mark(body->location.base_label); - rb_gc_mark(body->location.pathobj); - RUBY_MARK_UNLESS_NULL((VALUE)body->parent_iseq); + rb_gc_mark_no_pin(body->variable.coverage); + rb_gc_mark_no_pin(body->variable.pc2branchindex); + rb_gc_mark_no_pin(body->location.label); + rb_gc_mark_no_pin(body->location.base_label); + rb_gc_mark_no_pin(body->location.pathobj); + RUBY_MARK_NO_PIN_UNLESS_NULL((VALUE)body->parent_iseq); if (body->param.flags.has_kw && ISEQ_COMPILE_DATA(iseq) == NULL) { const struct rb_iseq_param_keyword *const keyword = body->param.keyword; @@ -252,7 +313,7 @@ rb_iseq_mark(const rb_iseq_t *iseq) const struct iseq_catch_table_entry *entry; entry = &table->entries[i]; if (entry->iseq) { - rb_gc_mark((VALUE)entry->iseq); + rb_gc_mark_no_pin((VALUE)entry->iseq); } } } @@ -263,11 +324,16 @@ rb_iseq_mark(const rb_iseq_t *iseq) } else if (FL_TEST_RAW(iseq, ISEQ_USE_COMPILE_DATA)) { const struct iseq_compile_data *const compile_data = ISEQ_COMPILE_DATA(iseq); + if (RTEST(compile_data->mark_ary)) { + rb_gc_mark(compile_data->mark_ary); + rb_gc_mark_values(RARRAY_LEN(compile_data->mark_ary), RARRAY_CONST_PTR(compile_data->mark_ary)); + } + RUBY_MARK_UNLESS_NULL(compile_data->err_info); + if (RTEST(compile_data->catch_table_ary)) { + rb_gc_mark(compile_data->catch_table_ary); + rb_gc_mark_values(RARRAY_LEN(compile_data->catch_table_ary), RARRAY_CONST_PTR(compile_data->catch_table_ary)); + } VM_ASSERT(compile_data != NULL); - - RUBY_MARK_UNLESS_NULL(compile_data->mark_ary); - RUBY_MARK_UNLESS_NULL(compile_data->err_info); - RUBY_MARK_UNLESS_NULL(compile_data->catch_table_ary); } else { /* executable */ diff --git a/iseq.h b/iseq.h index f3f269b572..3afb0cec4c 100644 --- a/iseq.h +++ b/iseq.h @@ -236,7 +236,7 @@ struct iseq_catch_table_entry { * CATCH_TYPE_REDO, CATCH_TYPE_NEXT: * NULL. */ - const rb_iseq_t *iseq; + rb_iseq_t *iseq; unsigned int start; unsigned int end; diff --git a/method.h b/method.h index 531acf72b8..4ac94454e9 100644 --- a/method.h +++ b/method.h @@ -40,9 +40,9 @@ typedef struct rb_scope_visi_struct { /*! CREF (Class REFerence) */ typedef struct rb_cref_struct { VALUE flags; - const VALUE refinements; - const VALUE klass; - struct rb_cref_struct * const next; + VALUE refinements; + VALUE klass; + struct rb_cref_struct * next; const rb_scope_visibility_t scope_visi; } rb_cref_t; @@ -50,10 +50,10 @@ typedef struct rb_cref_struct { typedef struct rb_method_entry_struct { VALUE flags; - const VALUE defined_class; + VALUE defined_class; struct rb_method_definition_struct * const def; ID called_id; - const VALUE owner; + VALUE owner; } rb_method_entry_t; typedef struct rb_callable_method_entry_struct { /* same fields with rb_method_entry_t */ @@ -123,8 +123,8 @@ typedef struct rb_iseq_struct rb_iseq_t; #endif typedef struct rb_method_iseq_struct { - const rb_iseq_t * const iseqptr; /*!< iseq pointer, should be separated from iseqval */ - rb_cref_t * const cref; /*!< class reference, should be marked */ + rb_iseq_t * iseqptr; /*!< iseq pointer, should be separated from iseqval */ + rb_cref_t * cref; /*!< class reference, should be marked */ } rb_method_iseq_t; /* check rb_add_method_iseq() when modify the fields */ typedef struct rb_method_cfunc_struct { @@ -135,20 +135,20 @@ typedef struct rb_method_cfunc_struct { typedef struct rb_method_attr_struct { ID id; - const VALUE location; /* should be marked */ + VALUE location; /* should be marked */ } rb_method_attr_t; typedef struct rb_method_alias_struct { - const struct rb_method_entry_struct * const original_me; /* original_me->klass is original owner */ + struct rb_method_entry_struct * original_me; /* original_me->klass is original owner */ } rb_method_alias_t; typedef struct rb_method_refined_struct { - const struct rb_method_entry_struct * const orig_me; - const VALUE owner; + struct rb_method_entry_struct * orig_me; + VALUE owner; } rb_method_refined_t; typedef struct rb_method_bmethod_struct { - const VALUE proc; /* should be marked */ + VALUE proc; /* should be marked */ struct rb_hook_list_struct *hooks; } rb_method_bmethod_t; @@ -172,7 +172,7 @@ PACKED_STRUCT_UNALIGNED(struct rb_method_definition_struct { rb_method_refined_t refined; rb_method_bmethod_t bmethod; - enum method_optimized_type optimize_type; + enum method_optimized_type optimize_type; } body; ID original_id; diff --git a/st.c b/st.c index ed235c674e..389591e5dd 100644 --- a/st.c +++ b/st.c @@ -1548,7 +1548,7 @@ st_update(st_table *tab, st_data_t key, different for ST_CHECK and when the current element is removed during traversing. */ static inline int -st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg, +st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_update_callback_func *replace, st_data_t arg, int check_p) { st_index_t bin; @@ -1572,6 +1572,15 @@ st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg, rebuilds_num = tab->rebuilds_num; hash = curr_entry_ptr->hash; retval = (*func)(key, curr_entry_ptr->record, arg, 0); + + if (retval == ST_REPLACE && replace) { + st_data_t value; + value = curr_entry_ptr->record; + retval = (*replace)(&key, &value, arg, TRUE); + curr_entry_ptr->key = key; + curr_entry_ptr->record = value; + } + if (rebuilds_num != tab->rebuilds_num) { retry: entries = tab->entries; @@ -1600,6 +1609,8 @@ st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg, curr_entry_ptr = &entries[i]; } switch (retval) { + case ST_REPLACE: + break; case ST_CONTINUE: break; case ST_CHECK: @@ -1647,10 +1658,16 @@ st_general_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg, return 0; } +int +st_foreach_with_replace(st_table *tab, int (*func)(ANYARGS), st_update_callback_func *replace, st_data_t arg) +{ + return st_general_foreach(tab, func, replace, arg, TRUE); +} + int st_foreach(st_table *tab, int (*func)(ANYARGS), st_data_t arg) { - return st_general_foreach(tab, func, arg, FALSE); + return st_general_foreach(tab, func, NULL, arg, FALSE); } /* See comments for function st_delete_safe. */ @@ -1658,7 +1675,7 @@ int st_foreach_check(st_table *tab, int (*func)(ANYARGS), st_data_t arg, st_data_t never ATTRIBUTE_UNUSED) { - return st_general_foreach(tab, func, arg, TRUE); + return st_general_foreach(tab, func, NULL, arg, TRUE); } /* Set up array KEYS by at most SIZE keys of head table TAB entries. diff --git a/symbol.c b/symbol.c index ae6003ef15..05457c422c 100644 --- a/symbol.c +++ b/symbol.c @@ -60,12 +60,7 @@ enum id_entry_type { ID_ENTRY_SIZE }; -static struct symbols { - rb_id_serial_t last_id; - st_table *str_sym; - VALUE ids; - VALUE dsymbol_fstr_hash; -} global_symbols = {tNEXT_ID-1}; +rb_symbols_t global_symbols = {tNEXT_ID-1}; static const struct st_hash_type symhash = { rb_str_hash_cmp, diff --git a/symbol.h b/symbol.h index 56568a91fc..3b9866d80f 100644 --- a/symbol.h +++ b/symbol.h @@ -54,6 +54,13 @@ id_type(ID id) typedef uint32_t rb_id_serial_t; +typedef struct { + rb_id_serial_t last_id; + st_table *str_sym; + VALUE ids; + VALUE dsymbol_fstr_hash; +} rb_symbols_t; + static inline rb_id_serial_t rb_id_to_serial(ID id) { diff --git a/test/-ext-/gc_compact/test_gc_compact.rb b/test/-ext-/gc_compact/test_gc_compact.rb new file mode 100644 index 0000000000..ac18c6e7d7 --- /dev/null +++ b/test/-ext-/gc_compact/test_gc_compact.rb @@ -0,0 +1,87 @@ +# frozen_string_literal: true +require 'test/unit' +require '-test-/memory_location' + +class TestGCCompact < Test::Unit::TestCase + def test_works + assert Object.new.memory_location + end + + def assert_object_ids(list) + same_count = list.find_all { |obj| + obj.memory_location == obj.object_id + }.count + list.count - same_count + end + + def big_list + 1000.times.map { Object.new } # likely next to each other + end + + # Find an object that's allocated in a slot that had a previous + # tenant, and that tenant moved and is still alive + def find_object_in_recycled_slot(addresses) + new_object = nil + + loop do + new_object = Object.new + if addresses.include? new_object.memory_location + break + end + end + + new_object + end + + def test_find_collided_object + list_of_objects = big_list + + ids = list_of_objects.map(&:object_id) # store id in map + addresses = list_of_objects.map(&:memory_location) + + # All object ids should be equal + assert_equal 0, assert_object_ids(list_of_objects) # should be 0 + + GC.compact + + # Some should have moved + assert_operator assert_object_ids(list_of_objects), :>, 0 + + new_ids = list_of_objects.map(&:object_id) + + # Object ids should not change after compaction + assert_equal ids, new_ids + + new_tenant = find_object_in_recycled_slot(addresses) + assert new_tenant + + # This is the object that used to be in new_object's position + previous_tenant = list_of_objects[addresses.index(new_tenant.memory_location)] + + assert_not_equal previous_tenant.object_id, new_tenant.object_id + + # Should be able to look up object by object_id + assert_equal new_tenant, ObjectSpace._id2ref(new_tenant.object_id) + + # Should be able to look up object by object_id + assert_equal previous_tenant, ObjectSpace._id2ref(previous_tenant.object_id) + + int = (new_tenant.object_id >> 1) + # These two should be the same! but they are not :( + assert_equal int, ObjectSpace._id2ref(int.object_id) + end + + def test_many_collisions + list_of_objects = big_list + ids = list_of_objects.map(&:object_id) + addresses = list_of_objects.map(&:memory_location) + + GC.compact + + new_tenants = 10.times.map { + find_object_in_recycled_slot(addresses) + } + + assert_operator GC.stat(:object_id_collisions), :>, 0 + end +end diff --git a/variable.c b/variable.c index b33725b39e..2cfce43e8c 100644 --- a/variable.c +++ b/variable.c @@ -1190,6 +1190,16 @@ rb_mark_generic_ivar(VALUE obj) } } +void +rb_mv_generic_ivar(VALUE rsrc, VALUE dst) +{ + st_data_t key = (st_data_t)rsrc; + struct gen_ivtbl *ivtbl; + + if (st_delete(generic_iv_tbl, &key, (st_data_t *)&ivtbl)) + st_insert(generic_iv_tbl, (st_data_t)dst, (st_data_t)ivtbl); +} + void rb_free_generic_ivar(VALUE obj) { @@ -1940,7 +1950,7 @@ rb_mod_const_missing(VALUE klass, VALUE name) static void autoload_mark(void *ptr) { - rb_mark_tbl((st_table *)ptr); + rb_mark_tbl_no_pin((st_table *)ptr); } static void @@ -1956,9 +1966,15 @@ autoload_memsize(const void *ptr) return st_memsize(tbl); } +static void +autoload_compact(void *ptr) +{ + rb_gc_update_tbl_refs((st_table *)ptr); +} + static const rb_data_type_t autoload_data_type = { "autoload", - {autoload_mark, autoload_free, autoload_memsize,}, + {autoload_mark, autoload_free, autoload_memsize, autoload_compact,}, 0, 0, RUBY_TYPED_FREE_IMMEDIATELY }; @@ -2004,12 +2020,19 @@ struct autoload_data_i { struct list_head constants; /* <=> autoload_const.cnode */ }; +static void +autoload_i_compact(void *ptr) +{ + struct autoload_data_i *p = ptr; + p->feature = rb_gc_new_location(p->feature); +} + static void autoload_i_mark(void *ptr) { struct autoload_data_i *p = ptr; - rb_gc_mark(p->feature); + rb_gc_mark_no_pin(p->feature); /* allow GC to free us if no modules refer to this via autoload_const.ad */ if (list_empty(&p->constants)) { @@ -2036,7 +2059,7 @@ autoload_i_memsize(const void *ptr) static const rb_data_type_t autoload_data_i_type = { "autoload_i", - {autoload_i_mark, autoload_i_free, autoload_i_memsize,}, + {autoload_i_mark, autoload_i_free, autoload_i_memsize, autoload_i_compact}, 0, 0, RUBY_TYPED_FREE_IMMEDIATELY }; @@ -2961,6 +2984,7 @@ rb_define_const(VALUE klass, const char *name, VALUE val) if (!rb_is_const_id(id)) { rb_warn("rb_define_const: invalid name `%s' for constant", name); } + rb_gc_register_mark_object(val); rb_const_set(klass, id, val); } diff --git a/vm.c b/vm.c index bbda6ee4c0..361b2b3799 100644 --- a/vm.c +++ b/vm.c @@ -2207,6 +2207,15 @@ rb_vm_call_cfunc(VALUE recv, VALUE (*func)(VALUE), VALUE arg, /* vm */ +void +rb_vm_update_references(void *ptr) +{ + if (ptr) { + rb_vm_t *vm = ptr; + rb_update_st_references(vm->frozen_strings); + } +} + void rb_vm_mark(void *ptr) { @@ -2215,12 +2224,30 @@ rb_vm_mark(void *ptr) if (ptr) { rb_vm_t *vm = ptr; rb_thread_t *th = 0; + long i, len; + const VALUE *obj_ary; list_for_each(&vm->living_threads, th, vmlt_node) { rb_gc_mark(th->self); } rb_gc_mark(vm->thgroup_default); rb_gc_mark(vm->mark_object_ary); + + len = RARRAY_LEN(vm->mark_object_ary); + obj_ary = RARRAY_CONST_PTR(vm->mark_object_ary); + for (i=0; i < len; i++) { + const VALUE *ptr; + long j, jlen; + + rb_gc_mark(*obj_ary); + jlen = RARRAY_LEN(*obj_ary); + ptr = RARRAY_CONST_PTR(*obj_ary); + for (j=0; j < jlen; j++) { + rb_gc_mark(*ptr++); + } + obj_ary++; + } + rb_gc_mark(vm->load_path); rb_gc_mark(vm->load_path_snapshot); RUBY_MARK_UNLESS_NULL(vm->load_path_check_cache); @@ -2230,6 +2257,8 @@ rb_vm_mark(void *ptr) rb_gc_mark(vm->top_self); RUBY_MARK_UNLESS_NULL(vm->coverages); rb_gc_mark(vm->defined_module_hash); + /* Prevent classes from moving */ + rb_mark_tbl(rb_hash_tbl(vm->defined_module_hash, __FILE__, __LINE__)); if (vm->loading_table) { rb_mark_tbl(vm->loading_table); @@ -2468,7 +2497,7 @@ rb_execution_context_mark(const rb_execution_context_t *ec) rb_control_frame_t *cfp = ec->cfp; rb_control_frame_t *limit_cfp = (void *)(ec->vm_stack + ec->vm_stack_size); - rb_gc_mark_values((long)(sp - p), p); + rb_gc_mark_stack_values((long)(sp - p), p); while (cfp != limit_cfp) { const VALUE *ep = cfp->ep; diff --git a/vm_core.h b/vm_core.h index 7852ca4625..b65730392a 100644 --- a/vm_core.h +++ b/vm_core.h @@ -346,7 +346,7 @@ struct rb_iseq_constant_body { } type; /* instruction sequence type */ unsigned int iseq_size; - const VALUE *iseq_encoded; /* encoded iseq (insn addr and operands) */ + VALUE *iseq_encoded; /* encoded iseq (insn addr and operands) */ /** * parameter information @@ -414,7 +414,7 @@ struct rb_iseq_constant_body { int bits_start; int rest_start; const ID *table; - const VALUE *default_values; + VALUE *default_values; } *keyword; } param; @@ -1028,7 +1028,7 @@ typedef struct { typedef struct { VALUE flags; /* imemo header */ - const rb_iseq_t *iseq; + rb_iseq_t *iseq; const VALUE *ep; const VALUE *env; unsigned int env_size; diff --git a/vm_eval.c b/vm_eval.c index c537ecce31..47ed91a9e6 100644 --- a/vm_eval.c +++ b/vm_eval.c @@ -484,6 +484,7 @@ rb_type_str(enum ruby_value_type type) case type_case(T_NODE); case type_case(T_ICLASS); case type_case(T_ZOMBIE); + case type_case(T_MOVED); case T_MASK: break; } #undef type_case diff --git a/vm_method.c b/vm_method.c index fcb9f1967e..22a4e8ab56 100644 --- a/vm_method.c +++ b/vm_method.c @@ -673,7 +673,7 @@ void rb_add_method_iseq(VALUE klass, ID mid, const rb_iseq_t *iseq, rb_cref_t *cref, rb_method_visibility_t visi) { struct { /* should be same fields with rb_method_iseq_struct */ - const rb_iseq_t *iseqptr; + rb_iseq_t *iseqptr; rb_cref_t *cref; } iseq_body; -- 2.17.0