Feature #12142 » strong_hash.patch
hash.c | ||
---|---|---|
long rb_objid_hash(st_index_t index);
|
||
#if SIZEOF_INT == SIZEOF_VOIDP
|
||
static const st_index_t str_seed = 0xfa835867;
|
||
#else
|
||
static const st_index_t str_seed = 0xc42b5e2e6480b23bULL;
|
||
#endif
|
||
static inline st_index_t
|
||
any_hash(VALUE a, st_index_t (*other_func)(VALUE))
|
||
any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE))
|
||
{
|
||
VALUE hval;
|
||
st_index_t hnum;
|
||
... | ... | |
hnum = rb_objid_hash((st_index_t)a);
|
||
}
|
||
else if (BUILTIN_TYPE(a) == T_STRING) {
|
||
hnum = rb_str_hash(a);
|
||
hnum = (strong_p
|
||
? rb_str_hash(a)
|
||
: st_hash(RSTRING_PTR(a), RSTRING_LEN(a), str_seed));
|
||
}
|
||
else if (BUILTIN_TYPE(a) == T_SYMBOL) {
|
||
hnum = RSYMBOL(a)->hashval;
|
||
... | ... | |
return FIX2LONG(obj);
|
||
}
|
||
static inline st_index_t
|
||
any_hash_weak(VALUE a, st_index_t (*other_func)(VALUE)) {
|
||
return any_hash_general(a, FALSE, other_func);
|
||
}
|
||
static st_index_t
|
||
rb_any_hash(VALUE a)
|
||
{
|
||
rb_any_hash_weak(VALUE a) {
|
||
return any_hash_weak(a, obj_any_hash);
|
||
}
|
||
static inline st_index_t
|
||
any_hash(VALUE a, st_index_t (*other_func)(VALUE)) {
|
||
return any_hash_general(a, TRUE, other_func);
|
||
}
|
||
static st_index_t
|
||
rb_any_hash(VALUE a) {
|
||
return any_hash(a, obj_any_hash);
|
||
}
|
||
... | ... | |
static const struct st_hash_type objhash = {
|
||
rb_any_cmp,
|
||
rb_any_hash_weak,
|
||
rb_any_hash,
|
||
};
|
||
include/ruby/st.h | ||
---|---|---|
struct st_hash_type {
|
||
int (*compare)(ANYARGS /*st_data_t, st_data_t*/); /* st_compare_func* */
|
||
st_index_t (*hash)(ANYARGS /*st_data_t*/); /* st_hash_func* */
|
||
/* The following is an optional func for stronger hash. When we
|
||
have many different keys with the same hash we can switch to
|
||
use it to prevent a denial attack with usage of hash table
|
||
collisions. */
|
||
st_index_t (*strong_hash)(ANYARGS /*st_data_t*/);
|
||
};
|
||
#if defined(HAVE_BUILTIN___BUILTIN_CHOOSE_EXPR) && defined(HAVE_BUILTIN___BUILTIN_TYPES_COMPATIBLE_P)
|
||
... | ... | |
struct st_table {
|
||
/* Cached features of the table -- see st.c for more details. */
|
||
unsigned char entry_power, bin_power, size_ind;
|
||
/* True when we are rebuilding the table. */
|
||
unsigned char inside_rebuild_p;
|
||
/* How many times the table was rebuilt. */
|
||
unsigned int rebuilds_num;
|
||
/* Currently used hash function. */
|
||
st_index_t (*curr_hash)(ANYARGS /*st_data_t*/);
|
||
const struct st_hash_type *type;
|
||
/* Number of entries currently in the table. */
|
||
st_index_t num_entries;
|
st.c | ||
---|---|---|
/* Return hash value of KEY for table TAB. */
|
||
static inline st_hash_t
|
||
do_hash(st_data_t key, st_table *tab) {
|
||
st_index_t h = (st_index_t)(tab->type->hash)(key);
|
||
st_index_t h = (st_index_t)(tab->curr_hash)(key);
|
||
#if SIZEOF_INT == SIZEOF_VOIDP
|
||
st_hash_t hash = h;
|
||
#else
|
||
... | ... | |
static void
|
||
make_tab_empty(st_table *tab)
|
||
{
|
||
tab->curr_hash = tab->type->hash;
|
||
tab->num_entries = 0;
|
||
tab->rebuilds_num = 0;
|
||
tab->entries_start = tab->entries_bound = 0;
|
||
... | ... | |
tab->entry_power = n;
|
||
tab->bin_power = features[n].bin_power;
|
||
tab->size_ind = features[n].size_ind;
|
||
tab->inside_rebuild_p = FALSE;
|
||
if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
|
||
tab->bins = NULL;
|
||
else
|
||
... | ... | |
}
|
||
bound = tab->entries_bound;
|
||
entries = tab->entries;
|
||
tab->inside_rebuild_p = TRUE;
|
||
if ((2 * tab->num_entries <= get_allocated_entries(tab)
|
||
&& REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab))
|
||
|| (tab->entry_power > MINIMAL_POWER2
|
||
... | ... | |
else {
|
||
new_tab = st_init_table_with_size(tab->type,
|
||
2 * tab->num_entries - 1);
|
||
st_assert(new_tab->curr_hash == new_tab->type->hash);
|
||
new_tab->curr_hash = tab->curr_hash;
|
||
new_entries = new_tab->entries;
|
||
}
|
||
ni = 0;
|
||
... | ... | |
tab->entries_start = 0;
|
||
tab->entries_bound = tab->num_entries;
|
||
tab->rebuilds_num++;
|
||
tab->inside_rebuild_p = FALSE;
|
||
#ifdef ST_DEBUG
|
||
st_check(tab);
|
||
#endif
|
||
... | ... | |
}
|
||
}
|
||
/* Recalculate hashes of entries in table TAB. */
|
||
static void
|
||
reset_entry_hashes (st_table *tab)
|
||
{
|
||
st_index_t i, bound;
|
||
st_table_entry *entries, *curr_entry_ptr;
|
||
|
||
bound = tab->entries_bound;
|
||
entries = tab->entries;
|
||
for (i = tab->entries_start; i < bound; i++) {
|
||
curr_entry_ptr = &entries[i];
|
||
if (! DELETED_ENTRY_P(curr_entry_ptr))
|
||
curr_entry_ptr->hash = do_hash(curr_entry_ptr->key, tab);
|
||
}
|
||
}
|
||
/* If we have the following number of collisions with different keys
|
||
but with the same hash during finding a bin for new entry
|
||
inclusions, possibly a denial attack is going on. Start to use a
|
||
stronger hash. */
|
||
#define HIT_THRESHOULD_FOR_STRONG_HASH 10
|
||
/* Return index of table TAB bin for HASH_VALUE and KEY through
|
||
BIN_IND and the pointed value as the function result. Reserve the
|
||
bin for inclusion of the corresponding entry into the table if it
|
||
... | ... | |
st_index_t entry_index;
|
||
st_index_t first_deleted_bin_ind;
|
||
st_table_entry *entries;
|
||
int hit;
|
||
|
||
st_assert(tab != NULL && tab->bins != NULL
|
||
&& tab->entries_bound <= get_allocated_entries(tab)
|
||
&& tab->entries_start <= tab->entries_bound);
|
||
repeat:
|
||
ind = hash_bin(curr_hash_value, tab);
|
||
#ifdef QUADRATIC_PROBE
|
||
d = 1;
|
||
... | ... | |
FOUND_BIN;
|
||
first_deleted_bin_ind = UNDEFINED_BIN_IND;
|
||
entries = tab->entries;
|
||
hit = 0;
|
||
for (;;) {
|
||
entry_index = get_bin(tab->bins, get_size_ind(tab), ind);
|
||
if (EMPTY_BIN_P(entry_index)) {
|
||
... | ... | |
} else if (! DELETED_BIN_P(entry_index)) {
|
||
if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key))
|
||
break;
|
||
if (curr_hash_value == entries[entry_index - ENTRY_BASE].hash) {
|
||
hit++;
|
||
if (hit > HIT_THRESHOULD_FOR_STRONG_HASH
|
||
&& tab->curr_hash != tab->type->strong_hash
|
||
&& tab->type->strong_hash != NULL
|
||
&& ! tab->inside_rebuild_p) {
|
||
tab->curr_hash = tab->type->strong_hash;
|
||
*hash_value = curr_hash_value = do_hash(key, tab);
|
||
reset_entry_hashes(tab);
|
||
rebuild_table(tab);
|
||
goto repeat;
|
||
}
|
||
}
|
||
} else if (first_deleted_bin_ind == UNDEFINED_BIN_IND)
|
||
first_deleted_bin_ind = ind;
|
||
#ifdef QUADRATIC_PROBE
|