From 7d7fedefa8b8584a778831279919b6a7c38ffb4e Mon Sep 17 00:00:00 2001 From: Eric Wong Date: Sun, 9 Mar 2014 02:05:10 +0000 Subject: [PATCH 3/4] parse.y: switch to ihash, saves ~200K out-of-the-box out-of-the-box: "ruby -e exit" --- parse.y | 183 ++++++++++++++++++++++++++++++++++++++++++++++++++-------------- 1 file changed, 145 insertions(+), 38 deletions(-) diff --git a/parse.y b/parse.y index 492fd3d..a7495b2 100644 --- a/parse.y +++ b/parse.y @@ -22,6 +22,7 @@ #include "ruby/st.h" #include "ruby/encoding.h" #include "internal.h" +#include "ihash.h" #include "node.h" #include "parse.h" #include "id.h" @@ -10103,8 +10104,8 @@ static const struct { static struct symbols { ID last_id; - st_table *sym_id; - st_table *id_str; + struct rb_ihash_tbl *sym_id; + struct rb_ihash_tbl *id_str; #if ENABLE_SELECTOR_NAMESPACE st_table *ivar2_id; st_table *id_ivar2; @@ -10113,9 +10114,100 @@ static struct symbols { int minor_marked; } global_symbols = {tLAST_TOKEN}; -static const struct st_hash_type symhash = { - rb_str_hash_cmp, - rb_str_hash, +/* this struct is stored in both sym_id and id_str tables */ +struct rb_idsym { + struct rb_ihash_node id_str_node; + ID id; + struct rb_ihash_node sym_id_node; + VALUE symstr; + st_index_t hashval; +}; + +static inline struct rb_idsym * +rb_idsym_of_s(const struct rb_ihash_node *node) +{ + return RB_CONTAINER_OF(node, struct rb_idsym, sym_id_node); +} + +static int +sym_id_cmp(const struct rb_ihash_node *n1, const struct rb_ihash_node *n2) +{ + struct rb_idsym *is1 = rb_idsym_of_s(n1); + struct rb_idsym *is2 = rb_idsym_of_s(n2); + + if (is1->hashval == is2->hashval) { + return rb_str_hash_cmp(is1->symstr, is2->symstr); + } + return 1; +} + +static st_index_t +sym_id_hash(const struct rb_ihash_node *node) +{ + struct rb_idsym *is = rb_idsym_of_s(node); + + return is->hashval; /* needs to be precomputed */ +} + +static inline struct rb_idsym * +rb_idsym_of_i(const struct rb_ihash_node *node) +{ + return RB_CONTAINER_OF(node, struct rb_idsym, id_str_node); +} + +static int +id_str_cmp(const struct rb_ihash_node *n1, const struct rb_ihash_node *n2) +{ + return rb_idsym_of_i(n1)->id != rb_idsym_of_i(n2)->id; +} + +static st_index_t +id_str_hash(const struct rb_ihash_node *node) +{ + return rb_idsym_of_i(node)->id; +} + +static int +id_str_lookup(ID id, VALUE *str) +{ + struct rb_idsym finder; + struct rb_ihash_node *node; + + finder.id = id; + node = rb_ihash_lookup(global_symbols.id_str, &finder.id_str_node); + + if (node) { + *str = rb_idsym_of_i(node)->symstr; + return 1; + } + return 0; +} + +static int +sym_id_lookup(VALUE str, ID *id) +{ + struct rb_idsym finder; + struct rb_ihash_node *node; + + finder.symstr = str; + finder.hashval = rb_str_hash(str); + node = rb_ihash_lookup(global_symbols.sym_id, &finder.sym_id_node); + + if (node) { + *id = rb_idsym_of_s(node)->id; + return 1; + } + return 0; +} + +static const struct rb_ihash_type sym_id_hash_type = { + sym_id_cmp, + sym_id_hash, +}; + +static const struct rb_ihash_type id_str_hash_type = { + id_str_cmp, + id_str_hash, }; #if ENABLE_SELECTOR_NAMESPACE @@ -10148,8 +10240,8 @@ static const struct st_hash_type ivar2_hash_type = { void Init_sym(void) { - global_symbols.sym_id = st_init_table_with_size(&symhash, 1000); - global_symbols.id_str = st_init_numtable_with_size(1000); + global_symbols.sym_id = rb_ihash_new(&sym_id_hash_type, 10); + global_symbols.id_str = rb_ihash_new(&id_str_hash_type, 10); #if ENABLE_SELECTOR_NAMESPACE global_symbols.ivar2_id = st_init_table_with_size(&ivar2_hash_type, 1000); global_symbols.id_ivar2 = st_init_numtable_with_size(1000); @@ -10164,11 +10256,21 @@ Init_sym(void) Init_id(); } +static enum rb_ihash_next +mark_symstr(struct rb_ihash_node *id_str_node, void *unused) +{ + struct rb_idsym *is = rb_idsym_of_i(id_str_node); + rb_gc_mark(is->symstr); + return RB_IHASH_CONTINUE; +} + void rb_gc_mark_symbols(int full_mark) { if (full_mark || global_symbols.minor_marked == 0) { - rb_mark_tbl(global_symbols.id_str); + if (global_symbols.id_str) { + rb_ihash_foreach(&global_symbols.id_str, mark_symstr, 0); + } rb_gc_mark_locations(global_symbols.op_sym, global_symbols.op_sym + numberof(global_symbols.op_sym)); @@ -10351,6 +10453,8 @@ register_symid(ID id, const char *name, long len, rb_encoding *enc) static ID register_symid_str(ID id, VALUE str) { + struct rb_idsym *is = ALLOC(struct rb_idsym); + OBJ_FREEZE(str); str = rb_fstring(str); @@ -10358,8 +10462,11 @@ register_symid_str(ID id, VALUE str) RUBY_DTRACE_SYMBOL_CREATE(RSTRING_PTR(str), rb_sourcefile(), rb_sourceline()); } - st_add_direct(global_symbols.sym_id, (st_data_t)str, id); - st_add_direct(global_symbols.id_str, id, (st_data_t)str); + is->hashval = rb_str_hash(str); + is->symstr = str; + is->id = id; + rb_ihash_add_direct(&global_symbols.sym_id, &is->sym_id_node); + rb_ihash_add_direct(&global_symbols.id_str, &is->id_str_node); global_symbols.minor_marked = 0; return id; } @@ -10398,14 +10505,14 @@ setup_fake_str(struct RString *fake_str, const char *name, long len) ID rb_intern3(const char *name, long len, rb_encoding *enc) { - st_data_t data; + ID id; struct RString fake_str; VALUE str = setup_fake_str(&fake_str, name, len); rb_enc_associate(str, enc); OBJ_FREEZE(str); - if (st_lookup(global_symbols.sym_id, str, &data)) - return (ID)data; + if (sym_id_lookup(str, &id)) + return id; str = rb_enc_str_new(name, len, enc); /* make true string */ return intern_str(str); @@ -10550,23 +10657,23 @@ rb_intern(const char *name) ID rb_intern_str(VALUE str) { - st_data_t id; + ID id; - if (st_lookup(global_symbols.sym_id, str, &id)) - return (ID)id; + if (sym_id_lookup(str, &id)) + return id; return intern_str(rb_str_dup(str)); } VALUE rb_id2str(ID id) { - st_data_t data; + VALUE str; if (id < tLAST_TOKEN) { int i = 0; if (id < INT_MAX && rb_ispunct((int)id)) { - VALUE str = global_symbols.op_sym[i = (int)id]; + str = global_symbols.op_sym[i = (int)id]; if (!str) { char name[2]; name[0] = (char)id; @@ -10581,7 +10688,7 @@ rb_id2str(ID id) } for (i = 0; i < op_tbl_count; i++) { if (op_tbl[i].token == id) { - VALUE str = global_symbols.op_sym[i]; + str = global_symbols.op_sym[i]; if (!str) { str = rb_usascii_str_new2(op_tbl[i].name); OBJ_FREEZE(str); @@ -10594,8 +10701,7 @@ rb_id2str(ID id) } } - if (st_lookup(global_symbols.id_str, id, &data)) { - VALUE str = (VALUE)data; + if (id_str_lookup(id, &str)) { if (RBASIC(str)->klass == 0) RBASIC_SET_CLASS_RAW(str, rb_cString); return str; @@ -10603,7 +10709,6 @@ rb_id2str(ID id) if (is_attrset_id(id)) { ID id_stem = (id & ~ID_SCOPE_MASK); - VALUE str; do { if (!!(str = rb_id2str(id_stem | ID_LOCAL))) break; @@ -10617,8 +10722,7 @@ rb_id2str(ID id) str = rb_str_dup(str); rb_str_cat(str, "=", 1); register_symid_str(id, str); - if (st_lookup(global_symbols.id_str, id, &data)) { - VALUE str = (VALUE)data; + if (id_str_lookup(id, &str)) { if (RBASIC(str)->klass == 0) RBASIC_SET_CLASS_RAW(str, rb_cString); return str; @@ -10642,11 +10746,14 @@ rb_make_internal_id(void) return next_id_base() | ID_INTERNAL; } -static int -symbols_i(VALUE sym, ID value, VALUE ary) +static enum rb_ihash_next +symbols_i(struct rb_ihash_node *sym_id_node, void *arg) { - rb_ary_push(ary, ID2SYM(value)); - return ST_CONTINUE; + VALUE ary = (VALUE)arg; + struct rb_idsym *is = rb_idsym_of_s(sym_id_node); + + rb_ary_push(ary, ID2SYM(is->id)); + return RB_IHASH_CONTINUE; } /* @@ -10670,7 +10777,7 @@ rb_sym_all_symbols(void) { VALUE ary = rb_ary_new2(global_symbols.sym_id->num_entries); - st_foreach(global_symbols.sym_id, symbols_i, ary); + rb_ihash_foreach(&global_symbols.sym_id, symbols_i, (void *)ary); return ary; } @@ -10730,7 +10837,7 @@ rb_is_junk_id(ID id) ID rb_check_id(volatile VALUE *namep) { - st_data_t id; + ID id; VALUE tmp; VALUE name = *namep; @@ -10750,8 +10857,8 @@ rb_check_id(volatile VALUE *namep) sym_check_asciionly(name); - if (st_lookup(global_symbols.sym_id, (st_data_t)name, &id)) - return (ID)id; + if (sym_id_lookup(name, &id)) + return id; if (rb_is_attrset_name(name)) { struct RString fake_str; @@ -10760,7 +10867,7 @@ rb_check_id(volatile VALUE *namep) rb_enc_copy(localname, name); OBJ_FREEZE(localname); - if (st_lookup(global_symbols.sym_id, (st_data_t)localname, &id)) { + if (sym_id_lookup(localname, &id)) { return rb_id_attrset((ID)id); } RB_GC_GUARD(name); @@ -10772,20 +10879,20 @@ rb_check_id(volatile VALUE *namep) ID rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc) { - st_data_t id; + ID id; struct RString fake_str; const VALUE name = setup_fake_str(&fake_str, ptr, len); rb_enc_associate(name, enc); sym_check_asciionly(name); - if (st_lookup(global_symbols.sym_id, (st_data_t)name, &id)) - return (ID)id; + if (sym_id_lookup(name, &id)) + return id; if (rb_is_attrset_name(name)) { fake_str.as.heap.len = len - 1; - if (st_lookup(global_symbols.sym_id, (st_data_t)name, &id)) { - return rb_id_attrset((ID)id); + if (sym_id_lookup(name, &id)) { + return rb_id_attrset(id); } } -- 1.9.0.rc3.13.gda73b5f