Bug #13002 ยป switching_hash_removal.patch
| hash.c | ||
|---|---|---|
|     /* normalize -0.0 to 0.0 */ | ||
|     if (d == 0.0) d = 0.0; | ||
| #if SIZEOF_INT == SIZEOF_VOIDP | ||
|     return rb_memhash(&d, sizeof(d)); | ||
|     return rb_hash_start(rb_memhash(&d, sizeof(d))); | ||
| #else | ||
|     { | ||
| 	union {double d; uint64_t i;} u; | ||
| 	u.d = d; | ||
| 	return rb_objid_hash(u.i); | ||
| 	return rb_objid_hash(rb_hash_start(u.i)); | ||
|     } | ||
| #endif | ||
| } | ||
| #if SIZEOF_INT == SIZEOF_VOIDP | ||
| static const st_index_t str_seed = 0xfa835867; | ||
| #else | ||
| static const st_index_t str_seed = 0xc42b5e2e6480b23bULL; | ||
| #endif | ||
| static inline st_index_t | ||
| any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE)) | ||
| any_hash(VALUE a, st_index_t (*other_func)(VALUE)) | ||
| { | ||
|     VALUE hval; | ||
|     st_index_t hnum; | ||
| ... | ... | |
| 	hnum = rb_objid_hash((st_index_t)a); | ||
|     } | ||
|     else if (BUILTIN_TYPE(a) == T_STRING) { | ||
| 	hnum = (strong_p | ||
| 		? rb_str_hash(a) | ||
| 		: st_hash(RSTRING_PTR(a), RSTRING_LEN(a), str_seed)); | ||
| 	hnum = rb_str_hash(a); | ||
|     } | ||
|     else if (BUILTIN_TYPE(a) == T_SYMBOL) { | ||
| 	hnum = RSYMBOL(a)->hashval; | ||
| ... | ... | |
|     return FIX2LONG(obj); | ||
| } | ||
| static inline st_index_t | ||
| any_hash_weak(VALUE a, st_index_t (*other_func)(VALUE)) { | ||
|     return any_hash_general(a, FALSE, other_func); | ||
| } | ||
| static st_index_t | ||
| rb_any_hash_weak(VALUE a) { | ||
|     return any_hash_weak(a, obj_any_hash); | ||
| } | ||
| static inline st_index_t | ||
| any_hash(VALUE a, st_index_t (*other_func)(VALUE)) { | ||
|     return any_hash_general(a, TRUE, other_func); | ||
| } | ||
| static st_index_t | ||
| rb_any_hash(VALUE a) { | ||
| rb_any_hash(VALUE a) | ||
| { | ||
|     return any_hash(a, obj_any_hash); | ||
| } | ||
| ... | ... | |
| } | ||
| static inline uint64_t | ||
| key64_hash (uint64_t key, uint32_t seed) | ||
| key64_hash(uint64_t key, uint32_t seed) | ||
| { | ||
|     return mult_and_mix(key + seed, prime1); | ||
| } | ||
| ... | ... | |
| long | ||
| rb_objid_hash(st_index_t index) | ||
| { | ||
|     return (long)key64_hash(index, (uint32_t)prime2); | ||
|     return (long)key64_hash(rb_hash_start(index), (uint32_t)prime2); | ||
| } | ||
| static st_index_t | ||
| ... | ... | |
| static const struct st_hash_type objhash = { | ||
|     rb_any_cmp, | ||
|     rb_any_hash_weak, | ||
|     rb_any_hash, | ||
| }; | ||
| ... | ... | |
|     } | ||
| #endif | ||
|     return (st_index_t) key64_hash((st_index_t)n, (uint32_t) prime2); | ||
|     return (st_index_t) key64_hash(rb_hash_start((st_index_t)n), (uint32_t) prime2); | ||
| } | ||
| static const struct st_hash_type identhash = { | ||
| include/ruby/st.h | ||
|---|---|---|
| struct st_hash_type { | ||
|     int (*compare)(ANYARGS /*st_data_t, st_data_t*/); /* st_compare_func* */ | ||
|     st_index_t (*hash)(ANYARGS /*st_data_t*/);        /* st_hash_func* */ | ||
|     /* The following is an optional func for stronger hash.  When we | ||
|        have many different keys with the same hash we can switch to | ||
|        use it to prevent a denial attack with usage of hash table | ||
|        collisions. */ | ||
|     st_index_t (*strong_hash)(ANYARGS /*st_data_t*/); | ||
| }; | ||
| #if defined(HAVE_BUILTIN___BUILTIN_CHOOSE_EXPR) && defined(HAVE_BUILTIN___BUILTIN_TYPES_COMPATIBLE_P) | ||
| ... | ... | |
| struct st_table { | ||
|     /* Cached features of the table -- see st.c for more details.  */ | ||
|     unsigned char entry_power, bin_power, size_ind; | ||
|     /* True when we are rebuilding the table.  */ | ||
|     unsigned char inside_rebuild_p; | ||
|     /* How many times the table was rebuilt.  */ | ||
|     unsigned int rebuilds_num; | ||
|     /* Currently used hash function.  */ | ||
|     st_index_t (*curr_hash)(ANYARGS /*st_data_t*/); | ||
|     const struct st_hash_type *type; | ||
|     /* Number of entries currently in the table.  */ | ||
|     st_index_t num_entries; | ||
| st.c | ||
|---|---|---|
| /* Return hash value of KEY for table TAB.  */ | ||
| static inline st_hash_t | ||
| do_hash(st_data_t key, st_table *tab) { | ||
|     st_index_t h = (st_index_t)(tab->curr_hash)(key); | ||
|     st_index_t h = (st_index_t)(tab->type->hash)(key); | ||
| #if SIZEOF_INT == SIZEOF_VOIDP | ||
|     st_hash_t hash = h; | ||
| #else | ||
| ... | ... | |
| static void | ||
| make_tab_empty(st_table *tab) | ||
| { | ||
|     tab->curr_hash = tab->type->hash; | ||
|     tab->num_entries = 0; | ||
|     tab->entries_start = tab->entries_bound = 0; | ||
|     if (tab->bins != NULL) | ||
| ... | ... | |
|     tab->entry_power = n; | ||
|     tab->bin_power = features[n].bin_power; | ||
|     tab->size_ind = features[n].size_ind; | ||
|     tab->inside_rebuild_p = FALSE; | ||
|     if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS) | ||
|         tab->bins = NULL; | ||
|     else | ||
| ... | ... | |
|     st_assert(tab != NULL); | ||
|     bound = tab->entries_bound; | ||
|     entries = tab->entries; | ||
|     tab->inside_rebuild_p = TRUE; | ||
|     if ((2 * tab->num_entries <= get_allocated_entries(tab) | ||
| 	 && REBUILD_THRESHOLD * tab->num_entries > get_allocated_entries(tab)) | ||
| 	|| tab->num_entries < (1 << MINIMAL_POWER2)) { | ||
| ... | ... | |
|     else { | ||
|         new_tab = st_init_table_with_size(tab->type, | ||
| 					  2 * tab->num_entries - 1); | ||
| 	st_assert(new_tab->curr_hash == new_tab->type->hash); | ||
| 	new_tab->curr_hash = tab->curr_hash; | ||
| 	new_entries = new_tab->entries; | ||
|     } | ||
|     ni = 0; | ||
| ... | ... | |
|     tab->entries_start = 0; | ||
|     tab->entries_bound = tab->num_entries; | ||
|     tab->rebuilds_num++; | ||
|     tab->inside_rebuild_p = FALSE; | ||
| #ifdef ST_DEBUG | ||
|     st_check(tab); | ||
| #endif | ||
| ... | ... | |
|     } | ||
| } | ||
| /* Recalculate hashes of entries in table TAB.  */ | ||
| static void | ||
| reset_entry_hashes (st_table *tab) | ||
| { | ||
|     st_index_t i, bound; | ||
|     st_table_entry *entries, *curr_entry_ptr; | ||
|     bound = tab->entries_bound; | ||
|     entries = tab->entries; | ||
|     for (i = tab->entries_start; i < bound; i++) { | ||
| 	curr_entry_ptr = &entries[i]; | ||
| 	if (! DELETED_ENTRY_P(curr_entry_ptr)) | ||
| 	    curr_entry_ptr->hash = do_hash(curr_entry_ptr->key, tab); | ||
|     } | ||
| } | ||
| /* If we have the following number of collisions with different keys | ||
|    but with the same hash during finding a bin for new entry | ||
|    inclusions, possibly a denial attack is going on.  Start to use a | ||
|    stronger hash.  */ | ||
| #define HIT_THRESHOULD_FOR_STRONG_HASH 10 | ||
| /* Return index of table TAB bin for HASH_VALUE and KEY through | ||
|    BIN_IND and the pointed value as the function result.  Reserve the | ||
|    bin for inclusion of the corresponding entry into the table if it | ||
| ... | ... | |
|     st_index_t entry_index; | ||
|     st_index_t first_deleted_bin_ind; | ||
|     st_table_entry *entries; | ||
|     int hit; | ||
|     st_assert(tab != NULL && tab->bins != NULL | ||
| 	      && tab->entries_bound <= get_allocated_entries(tab) | ||
| 	      && tab->entries_start <= tab->entries_bound); | ||
|   repeat: | ||
|     ind = hash_bin(curr_hash_value, tab); | ||
| #ifdef QUADRATIC_PROBE | ||
|     d = 1; | ||
| ... | ... | |
|     FOUND_BIN; | ||
|     first_deleted_bin_ind = UNDEFINED_BIN_IND; | ||
|     entries = tab->entries; | ||
|     hit = 0; | ||
|     for (;;) { | ||
|         entry_index = get_bin(tab->bins, get_size_ind(tab), ind); | ||
|         if (EMPTY_BIN_P(entry_index)) { | ||
| ... | ... | |
|         } else if (! DELETED_BIN_P(entry_index)) { | ||
|             if (PTR_EQUAL(tab, &entries[entry_index - ENTRY_BASE], curr_hash_value, key)) | ||
|                 break; | ||
| 	    if (curr_hash_value == entries[entry_index - ENTRY_BASE].hash) { | ||
| 	        hit++; | ||
| 		if (hit > HIT_THRESHOULD_FOR_STRONG_HASH | ||
| 		    && tab->curr_hash != tab->type->strong_hash | ||
| 		    && tab->type->strong_hash != NULL | ||
| 		    && ! tab->inside_rebuild_p) { | ||
| 		    tab->curr_hash = tab->type->strong_hash; | ||
| 		    *hash_value = curr_hash_value = do_hash(key, tab); | ||
| 		    reset_entry_hashes(tab); | ||
| 		    rebuild_table(tab); | ||
| 		    goto repeat; | ||
| 		} | ||
| 	    } | ||
|         } else if (first_deleted_bin_ind == UNDEFINED_BIN_IND) | ||
|             first_deleted_bin_ind = ind; | ||
| #ifdef QUADRATIC_PROBE | ||