Project

General

Profile

Feature #9614 » 0003-parse.y-switch-to-ihash-saves-200K-out-of-the-box.patch

normalperson (Eric Wong), 03/09/2014 02:22 AM

View differences:

parse.y
#include "ruby/st.h"
#include "ruby/encoding.h"
#include "internal.h"
#include "ihash.h"
#include "node.h"
#include "parse.h"
#include "id.h"
......
static struct symbols {
ID last_id;
st_table *sym_id;
st_table *id_str;
struct rb_ihash_tbl *sym_id;
struct rb_ihash_tbl *id_str;
#if ENABLE_SELECTOR_NAMESPACE
st_table *ivar2_id;
st_table *id_ivar2;
......
int minor_marked;
} global_symbols = {tLAST_TOKEN};
static const struct st_hash_type symhash = {
rb_str_hash_cmp,
rb_str_hash,
/* this struct is stored in both sym_id and id_str tables */
struct rb_idsym {
struct rb_ihash_node id_str_node;
ID id;
struct rb_ihash_node sym_id_node;
VALUE symstr;
st_index_t hashval;
};
static inline struct rb_idsym *
rb_idsym_of_s(const struct rb_ihash_node *node)
{
return RB_CONTAINER_OF(node, struct rb_idsym, sym_id_node);
}
static int
sym_id_cmp(const struct rb_ihash_node *n1, const struct rb_ihash_node *n2)
{
struct rb_idsym *is1 = rb_idsym_of_s(n1);
struct rb_idsym *is2 = rb_idsym_of_s(n2);
if (is1->hashval == is2->hashval) {
return rb_str_hash_cmp(is1->symstr, is2->symstr);
}
return 1;
}
static st_index_t
sym_id_hash(const struct rb_ihash_node *node)
{
struct rb_idsym *is = rb_idsym_of_s(node);
return is->hashval; /* needs to be precomputed */
}
static inline struct rb_idsym *
rb_idsym_of_i(const struct rb_ihash_node *node)
{
return RB_CONTAINER_OF(node, struct rb_idsym, id_str_node);
}
static int
id_str_cmp(const struct rb_ihash_node *n1, const struct rb_ihash_node *n2)
{
return rb_idsym_of_i(n1)->id != rb_idsym_of_i(n2)->id;
}
static st_index_t
id_str_hash(const struct rb_ihash_node *node)
{
return rb_idsym_of_i(node)->id;
}
static int
id_str_lookup(ID id, VALUE *str)
{
struct rb_idsym finder;
struct rb_ihash_node *node;
finder.id = id;
node = rb_ihash_lookup(global_symbols.id_str, &finder.id_str_node);
if (node) {
*str = rb_idsym_of_i(node)->symstr;
return 1;
}
return 0;
}
static int
sym_id_lookup(VALUE str, ID *id)
{
struct rb_idsym finder;
struct rb_ihash_node *node;
finder.symstr = str;
finder.hashval = rb_str_hash(str);
node = rb_ihash_lookup(global_symbols.sym_id, &finder.sym_id_node);
if (node) {
*id = rb_idsym_of_s(node)->id;
return 1;
}
return 0;
}
static const struct rb_ihash_type sym_id_hash_type = {
sym_id_cmp,
sym_id_hash,
};
static const struct rb_ihash_type id_str_hash_type = {
id_str_cmp,
id_str_hash,
};
#if ENABLE_SELECTOR_NAMESPACE
......
void
Init_sym(void)
{
global_symbols.sym_id = st_init_table_with_size(&symhash, 1000);
global_symbols.id_str = st_init_numtable_with_size(1000);
global_symbols.sym_id = rb_ihash_new(&sym_id_hash_type, 10);
global_symbols.id_str = rb_ihash_new(&id_str_hash_type, 10);
#if ENABLE_SELECTOR_NAMESPACE
global_symbols.ivar2_id = st_init_table_with_size(&ivar2_hash_type, 1000);
global_symbols.id_ivar2 = st_init_numtable_with_size(1000);
......
Init_id();
}
static enum rb_ihash_next
mark_symstr(struct rb_ihash_node *id_str_node, void *unused)
{
struct rb_idsym *is = rb_idsym_of_i(id_str_node);
rb_gc_mark(is->symstr);
return RB_IHASH_CONTINUE;
}
void
rb_gc_mark_symbols(int full_mark)
{
if (full_mark || global_symbols.minor_marked == 0) {
rb_mark_tbl(global_symbols.id_str);
if (global_symbols.id_str) {
rb_ihash_foreach(&global_symbols.id_str, mark_symstr, 0);
}
rb_gc_mark_locations(global_symbols.op_sym,
global_symbols.op_sym + numberof(global_symbols.op_sym));
......
static ID
register_symid_str(ID id, VALUE str)
{
struct rb_idsym *is = ALLOC(struct rb_idsym);
OBJ_FREEZE(str);
str = rb_fstring(str);
......
RUBY_DTRACE_SYMBOL_CREATE(RSTRING_PTR(str), rb_sourcefile(), rb_sourceline());
}
st_add_direct(global_symbols.sym_id, (st_data_t)str, id);
st_add_direct(global_symbols.id_str, id, (st_data_t)str);
is->hashval = rb_str_hash(str);
is->symstr = str;
is->id = id;
rb_ihash_add_direct(&global_symbols.sym_id, &is->sym_id_node);
rb_ihash_add_direct(&global_symbols.id_str, &is->id_str_node);
global_symbols.minor_marked = 0;
return id;
}
......
ID
rb_intern3(const char *name, long len, rb_encoding *enc)
{
st_data_t data;
ID id;
struct RString fake_str;
VALUE str = setup_fake_str(&fake_str, name, len);
rb_enc_associate(str, enc);
OBJ_FREEZE(str);
if (st_lookup(global_symbols.sym_id, str, &data))
return (ID)data;
if (sym_id_lookup(str, &id))
return id;
str = rb_enc_str_new(name, len, enc); /* make true string */
return intern_str(str);
......
ID
rb_intern_str(VALUE str)
{
st_data_t id;
ID id;
if (st_lookup(global_symbols.sym_id, str, &id))
return (ID)id;
if (sym_id_lookup(str, &id))
return id;
return intern_str(rb_str_dup(str));
}
VALUE
rb_id2str(ID id)
{
st_data_t data;
VALUE str;
if (id < tLAST_TOKEN) {
int i = 0;
if (id < INT_MAX && rb_ispunct((int)id)) {
VALUE str = global_symbols.op_sym[i = (int)id];
str = global_symbols.op_sym[i = (int)id];
if (!str) {
char name[2];
name[0] = (char)id;
......
}
for (i = 0; i < op_tbl_count; i++) {
if (op_tbl[i].token == id) {
VALUE str = global_symbols.op_sym[i];
str = global_symbols.op_sym[i];
if (!str) {
str = rb_usascii_str_new2(op_tbl[i].name);
OBJ_FREEZE(str);
......
}
}
if (st_lookup(global_symbols.id_str, id, &data)) {
VALUE str = (VALUE)data;
if (id_str_lookup(id, &str)) {
if (RBASIC(str)->klass == 0)
RBASIC_SET_CLASS_RAW(str, rb_cString);
return str;
......
if (is_attrset_id(id)) {
ID id_stem = (id & ~ID_SCOPE_MASK);
VALUE str;
do {
if (!!(str = rb_id2str(id_stem | ID_LOCAL))) break;
......
str = rb_str_dup(str);
rb_str_cat(str, "=", 1);
register_symid_str(id, str);
if (st_lookup(global_symbols.id_str, id, &data)) {
VALUE str = (VALUE)data;
if (id_str_lookup(id, &str)) {
if (RBASIC(str)->klass == 0)
RBASIC_SET_CLASS_RAW(str, rb_cString);
return str;
......
return next_id_base() | ID_INTERNAL;
}
static int
symbols_i(VALUE sym, ID value, VALUE ary)
static enum rb_ihash_next
symbols_i(struct rb_ihash_node *sym_id_node, void *arg)
{
rb_ary_push(ary, ID2SYM(value));
return ST_CONTINUE;
VALUE ary = (VALUE)arg;
struct rb_idsym *is = rb_idsym_of_s(sym_id_node);
rb_ary_push(ary, ID2SYM(is->id));
return RB_IHASH_CONTINUE;
}
/*
......
{
VALUE ary = rb_ary_new2(global_symbols.sym_id->num_entries);
st_foreach(global_symbols.sym_id, symbols_i, ary);
rb_ihash_foreach(&global_symbols.sym_id, symbols_i, (void *)ary);
return ary;
}
......
ID
rb_check_id(volatile VALUE *namep)
{
st_data_t id;
ID id;
VALUE tmp;
VALUE name = *namep;
......
sym_check_asciionly(name);
if (st_lookup(global_symbols.sym_id, (st_data_t)name, &id))
return (ID)id;
if (sym_id_lookup(name, &id))
return id;
if (rb_is_attrset_name(name)) {
struct RString fake_str;
......
rb_enc_copy(localname, name);
OBJ_FREEZE(localname);
if (st_lookup(global_symbols.sym_id, (st_data_t)localname, &id)) {
if (sym_id_lookup(localname, &id)) {
return rb_id_attrset((ID)id);
}
RB_GC_GUARD(name);
......
ID
rb_check_id_cstr(const char *ptr, long len, rb_encoding *enc)
{
st_data_t id;
ID id;
struct RString fake_str;
const VALUE name = setup_fake_str(&fake_str, ptr, len);
rb_enc_associate(name, enc);
sym_check_asciionly(name);
if (st_lookup(global_symbols.sym_id, (st_data_t)name, &id))
return (ID)id;
if (sym_id_lookup(name, &id))
return id;
if (rb_is_attrset_name(name)) {
fake_str.as.heap.len = len - 1;
if (st_lookup(global_symbols.sym_id, (st_data_t)name, &id)) {
return rb_id_attrset((ID)id);
if (sym_id_lookup(name, &id)) {
return rb_id_attrset(id);
}
}
(4-4/4)