Feature #12142 » strong_hash.patch
| hash.c | ||
|---|---|---|
|
long rb_objid_hash(st_index_t index);
|
||
|
#if SIZEOF_INT == SIZEOF_VOIDP
|
||
|
static const st_index_t str_seed = 0xfa835867;
|
||
|
#else
|
||
|
static const st_index_t str_seed = 0xc42b5e2e6480b23bULL;
|
||
|
#endif
|
||
|
static inline st_index_t
|
||
|
any_hash(VALUE a, st_index_t (*other_func)(VALUE))
|
||
|
any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE))
|
||
|
{
|
||
|
VALUE hval;
|
||
|
st_index_t hnum;
|
||
| ... | ... | |
|
hnum = rb_objid_hash((st_index_t)a);
|
||
|
}
|
||
|
else if (BUILTIN_TYPE(a) == T_STRING) {
|
||
|
hnum = rb_str_hash(a);
|
||
|
hnum = (strong_p
|
||
|
? rb_str_hash(a)
|
||
|
: st_hash(RSTRING_PTR(a), RSTRING_LEN(a), str_seed));
|
||
|
}
|
||
|
else if (BUILTIN_TYPE(a) == T_SYMBOL) {
|
||
|
hnum = RSYMBOL(a)->hashval;
|
||
| ... | ... | |
|
return FIX2LONG(obj);
|
||
|
}
|
||
|
static inline st_index_t
|
||
|
any_hash_weak(VALUE a, st_index_t (*other_func)(VALUE)) {
|
||
|
return any_hash_general(a, FALSE, other_func);
|
||
|
}
|
||
|
static st_index_t
|
||
|
rb_any_hash(VALUE a)
|
||
|
{
|
||
|
rb_any_hash_weak(VALUE a) {
|
||
|
return any_hash_weak(a, obj_any_hash);
|
||
|
}
|
||
|
static inline st_index_t
|
||
|
any_hash(VALUE a, st_index_t (*other_func)(VALUE)) {
|
||
|
return any_hash_general(a, TRUE, other_func);
|
||
|
}
|
||
|
static st_index_t
|
||
|
rb_any_hash(VALUE a) {
|
||
|
return any_hash(a, obj_any_hash);
|
||
|
}
|
||
| ... | ... | |
|
static const struct st_hash_type objhash = {
|
||
|
rb_any_cmp,
|
||
|
rb_any_hash_weak,
|
||
|
rb_any_hash,
|
||
|
};
|
||
| include/ruby/st.h | ||
|---|---|---|
|
struct st_hash_type {
|
||
|
int (*compare)(ANYARGS /*st_data_t, st_data_t*/); /* st_compare_func* */
|
||
|
st_index_t (*hash)(ANYARGS /*st_data_t*/); /* st_hash_func* */
|
||
|
/* The following is an optional func for stronger hash. When we
|
||
|
have many different keys with the same hash we can switch to
|
||
|
use it to prevent a denial attack with usage of hash table
|
||
|
collisions. */
|
||
|
st_index_t (*strong_hash)(ANYARGS /*st_data_t*/);
|
||
|
};
|
||
|
#if defined(HAVE_BUILTIN___BUILTIN_CHOOSE_EXPR) && defined(HAVE_BUILTIN___BUILTIN_TYPES_COMPATIBLE_P)
|
||
| ... | ... | |
|
struct st_table {
|
||
|
/* Cached features of the table -- see st.c for more details. */
|
||
|
unsigned char entry_power, bin_power, size_ind;
|
||
|
/* True when we are rebuilding the table. */
|
||
|
unsigned char inside_rebuild_p;
|
||
|
/* How many times the table was rebuilt. */
|
||
|
unsigned int rebuilds_num;
|
||
|
/* Currently used hash function. */
|
||
|
st_index_t (*curr_hash)(ANYARGS /*st_data_t*/);
|
||
|
const struct st_hash_type *type;
|
||
|
/* Number of entries currently in the table. */
|
||
|
st_index_t num_entries;
|
||
| st.c | ||
|---|---|---|
|
/* Return hash value of KEY for table TAB. */
|
||
|
static inline st_hash_t
|
||
|
do_hash(st_data_t key, st_table *tab) {
|
||
|
st_index_t h = (st_index_t)(tab->type->hash)(key);
|
||
|
st_index_t h = (st_index_t)(tab->curr_hash)(key);
|
||
|
#if SIZEOF_INT == SIZEOF_VOIDP
|
||
|
st_hash_t hash = h;
|
||
|
#else
|
||
| ... | ... | |
|
static void
|
||
|
make_tab_empty(st_table *tab)
|
||
|
{
|
||
|
tab->curr_hash = tab->type->hash;
|
||
|
tab->num_entries = 0;
|
||
|
tab->rebuilds_num = 0;
|
||
|
tab->entries_start = tab->entries_bound = 0;
|
||
| ... | ... | |
|
tab->entry_power = n;
|
||
|
tab->bin_power = features[n].bin_power;
|
||
|
tab->size_ind = features[n].size_ind;
|
||
|
tab->inside_rebuild_p = FALSE;
|
||
|
if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
|
||
|
tab->bins = NULL;
|
||
|
else
|
||
| ... | ... | |
|
}
|
||
|
bound = tab->entries_bound;
|
||
|
entries = tab->entries;
|
||
|
tab->inside_rebuild_p = TRUE;
|
||
|
if ((2 * tab->num_entries <= get_allocated_entries(tab)
|
||
|
&& REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
|
||
|
|| (tab->entry_power > MINIMAL_POWER2
|
||
| ... | ... | |
|
else {
|
||
|
new_tab = st_init_table_with_size(tab->type,
|
||
|
2 * tab->num_entries - 1);
|
||
|
st_assert(new_tab->curr_hash == new_tab->type->hash);
|
||
|
new_tab->curr_hash = tab->curr_hash;
|
||
|
new_entries = new_tab->entries;
|
||
|
}
|
||
|
ni = 0;
|
||
| ... | ... | |
|
tab->entries_start = 0;
|
||
|
tab->entries_bound = tab->num_entries;
|
||
|
tab->rebuilds_num++;
|
||
|
tab->inside_rebuild_p = FALSE;
|
||
|
#ifdef ST_DEBUG
|
||
|
st_check(tab);
|
||
|
#endif
|
||
| ... | ... | |
|
}
|
||
|
}
|
||
|
/* Recalculate hashes of entries in table TAB. */
|
||
|
static void
|
||
|
reset_entry_hashes (st_table *tab)
|
||
|
{
|
||
|
st_index_t i, bound;
|
||
|
st_table_entry *entries, *curr_entry_ptr;
|
||
|
|
||
|
bound = tab->entries_bound;
|
||
|
entries = tab->entries;
|
||
|
for (i = tab->entries_start; i < bound; i++) {
|
||
|
curr_entry_ptr = &entries[i];
|
||
|
if (! DELETED_ENTRY_P(curr_entry_ptr))
|
||
|
curr_entry_ptr->hash = do_hash(curr_entry_ptr->key, tab);
|
||
|
}
|
||
|
}
|
||
|
/* If we have the following number of collisions with different keys
|
||
|
but with the same hash during finding a bin for new entry
|
||
|
inclusions, possibly a denial attack is going on. Start to use a
|
||
|
stronger hash. */
|
||
|
#define HIT_THRESHOULD_FOR_STRONG_HASH 10
|
||
|
/* Return index of table TAB bin for HASH_VALUE and KEY through
|
||
|
BIN_IND and the pointed value as the function result. Reserve the
|
||
|
bin for inclusion of the corresponding entry into the table if it
|
||
| ... | ... | |
|
st_index_t entry_index;
|
||
|
st_index_t first_deleted_bin_ind;
|
||
|
st_table_entry *entries;
|
||
|
int hit;
|
||
|
|
||
|
st_assert(tab != NULL && tab->bins != NULL
|
||
|
&& tab->entries_bound <= get_allocated_entries(tab)
|
||
|
&& tab->entries_start <= tab->entries_bound);
|
||
|
repeat:
|
||
|
ind = hash_bin(curr_hash_value, tab);
|
||
|
#ifdef QUADRATIC_PROBE
|
||
|
d = 1;
|
||
| ... | ... | |
|
FOUND_BIN;
|
||
|
first_deleted_bin_ind = UNDEFINED_BIN_IND;
|
||
|
entries = tab->entries;
|
||
|
hit = 0;
|
||
|
for (;;) {
|
||
|
entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
|
||
|
if (EMPTY_BIN_P(entry_index)) {
|
||
| ... | ... | |
|
} else if (! DELETED_BIN_P(entry_index)) {
|
||
|
if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key))
|
||
|
break;
|
||
|
if (curr_hash_value == entries[entry_index - ENTRY_BASE].hash) {
|
||
|
hit++;
|
||
|
if (hit > HIT_THRESHOULD_FOR_STRONG_HASH
|
||
|
&& tab->curr_hash != tab->type->strong_hash
|
||
|
&& tab->type->strong_hash != NULL
|
||
|
&& ! tab->inside_rebuild_p) {
|
||
|
tab->curr_hash = tab->type->strong_hash;
|
||
|
*hash_value = curr_hash_value = do_hash(key, tab);
|
||
|
reset_entry_hashes(tab);
|
||
|
rebuild_table(tab);
|
||
|
goto repeat;
|
||
|
}
|
||
|
}
|
||
|
} else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
|
||
|
first_deleted_bin_ind = ind;
|
||
|
#ifdef QUADRATIC_PROBE
|
||