Feature #12142 » st-march31.patch
benchmark/bm_bighash.rb | ||
---|---|---|
h = {}; 20000000.times {|n| h[n] = n }
|
benchmark/bm_hash_aref_dsym.rb | ||
---|---|---|
h = {}
|
||
syms = ('a'..'z').map { |s| s.to_sym }
|
||
syms.each { |s| h[s] = 1 }
|
||
200_000.times { syms.each { |s| h[s] } }
|
||
400_000.times { syms.each { |s| h[s] } }
|
benchmark/bm_hash_aref_fix.rb | ||
---|---|---|
h = {}
|
||
nums = (1..26).to_a
|
||
nums.each { |i| h[i] = i }
|
||
200_000.times { nums.each { |s| h[s] } }
|
||
800_000.times { nums.each { |s| h[s] } }
|
benchmark/bm_hash_aref_flo.rb | ||
---|---|---|
h = {}
|
||
strs = [*1..10000].map! {|i| i.fdiv(10)}
|
||
strs.each { |s| h[s] = s }
|
||
50.times { strs.each { |s| h[s] } }
|
||
500.times { strs.each { |s| h[s] } }
|
benchmark/bm_hash_aref_miss.rb | ||
---|---|---|
strs = ('a'..'z').to_a.map!(&:freeze)
|
||
strs.each { |s| h[s] = s }
|
||
strs = ('A'..'Z').to_a
|
||
200_000.times { strs.each { |s| h[s] } }
|
||
500_000.times { strs.each { |s| h[s] } }
|
benchmark/bm_hash_aref_str.rb | ||
---|---|---|
h = {}
|
||
strs = ('a'..'z').to_a.map!(&:freeze)
|
||
strs.each { |s| h[s] = s }
|
||
200_000.times { strs.each { |s| h[s] } }
|
||
500_000.times { strs.each { |s| h[s] } }
|
benchmark/bm_hash_aref_sym.rb | ||
---|---|---|
syms.map!(&:to_sym)
|
||
end
|
||
syms.each { |s| h[s] = s }
|
||
200_000.times { syms.each { |s| h[s] } }
|
||
500_000.times { syms.each { |s| h[s] } }
|
benchmark/bm_hash_aref_sym_long.rb | ||
---|---|---|
syms.map!(&:to_sym)
|
||
end
|
||
syms.each { |s| h[s] = s }
|
||
200_000.times { syms.each { |s| h[s] } }
|
||
500_000.times { syms.each { |s| h[s] } }
|
benchmark/bm_hash_flatten.rb | ||
---|---|---|
h[i] = nil
|
||
end
|
||
1000.times do
|
||
2000.times do
|
||
h.flatten
|
||
end
|
benchmark/bm_hash_ident_flo.rb | ||
---|---|---|
h = {}.compare_by_identity
|
||
strs = (1..10000).to_a.map!(&:to_f)
|
||
strs.each { |s| h[s] = s }
|
||
50.times { strs.each { |s| h[s] } }
|
||
500.times { strs.each { |s| h[s] } }
|
benchmark/bm_hash_ident_num.rb | ||
---|---|---|
h = {}.compare_by_identity
|
||
nums = (1..26).to_a
|
||
nums.each { |n| h[n] = n }
|
||
200_000.times { nums.each { |n| h[n] } }
|
||
500_000.times { nums.each { |n| h[n] } }
|
benchmark/bm_hash_ident_obj.rb | ||
---|---|---|
h = {}.compare_by_identity
|
||
objs = 26.times.map { Object.new }
|
||
objs.each { |o| h[o] = o }
|
||
200_000.times { objs.each { |o| h[o] } }
|
||
500_000.times { objs.each { |o| h[o] } }
|
benchmark/bm_hash_ident_str.rb | ||
---|---|---|
h = {}.compare_by_identity
|
||
strs = ('a'..'z').to_a
|
||
strs.each { |s| h[s] = s }
|
||
200_000.times { strs.each { |s| h[s] } }
|
||
500_000.times { strs.each { |s| h[s] } }
|
benchmark/bm_hash_ident_sym.rb | ||
---|---|---|
h = {}.compare_by_identity
|
||
syms = ('a'..'z').to_a.map(&:to_sym)
|
||
syms.each { |s| h[s] = s }
|
||
200_000.times { syms.each { |s| h[s] } }
|
||
500_000.times { syms.each { |s| h[s] } }
|
benchmark/bm_hash_keys.rb | ||
---|---|---|
h[i] = nil
|
||
end
|
||
5000.times do
|
||
10000.times do
|
||
h.keys
|
||
end
|
benchmark/bm_hash_shift.rb | ||
---|---|---|
h[i] = nil
|
||
end
|
||
50000.times do
|
||
1000000.times do
|
||
k, v = h.shift
|
||
h[k] = v
|
||
end
|
benchmark/bm_hash_shift_u16.rb | ||
---|---|---|
h[i] = nil
|
||
end
|
||
300000.times do
|
||
1000000.times do
|
||
k, v = h.shift
|
||
h[k] = v
|
||
end
|
benchmark/bm_hash_shift_u24.rb | ||
---|---|---|
h[i] = nil
|
||
end
|
||
300000.times do
|
||
1000000.times do
|
||
k, v = h.shift
|
||
h[k] = v
|
||
end
|
benchmark/bm_hash_shift_u32.rb | ||
---|---|---|
h[i] = nil
|
||
end
|
||
300000.times do
|
||
1000000.times do
|
||
k, v = h.shift
|
||
h[k] = v
|
||
end
|
benchmark/bm_hash_small2.rb | ||
---|---|---|
1000000.times.map{|i| a={}; 2.times{|j| a[j]=j}; a}
|
benchmark/bm_hash_small4.rb | ||
---|---|---|
1000000.times.map{|i| a={}; 4.times{|j| a[j]=j}; a}
|
benchmark/bm_hash_small8.rb | ||
---|---|---|
1000000.times.map{|i| a={}; 8.times{|j| a[j]=j}; a}
|
benchmark/bm_hash_to_proc.rb | ||
---|---|---|
h[i] = nil
|
||
end
|
||
5000.times do |i|
|
||
500000.times do |i|
|
||
[i].map(&h)
|
||
end
|
benchmark/bm_hash_values.rb | ||
---|---|---|
h[i] = nil
|
||
end
|
||
5000.times do
|
||
10000.times do
|
||
h.values
|
||
end
|
ext/-test-/st/foreach/foreach.c | ||
---|---|---|
if (c->nr == 0) {
|
||
st_data_t i;
|
||
if (!c->tbl->entries_packed) rb_bug("should be packed\n");
|
||
if (c->tbl->bins == NULL) rb_bug("should be packed\n");
|
||
/* force unpacking during iteration: */
|
||
for (i = 1; i < expect_size; i++)
|
||
st_add_direct(c->tbl, i, i);
|
||
if (c->tbl->entries_packed) rb_bug("should be unpacked\n");
|
||
if (c->tbl->bins != NULL) rb_bug("should be unpacked\n");
|
||
}
|
||
if (key != c->nr) {
|
||
... | ... | |
st_add_direct(tbl, 0, 0);
|
||
if (!tbl->entries_packed) rb_bug("should still be packed\n");
|
||
if (tbl->bins == NULL) rb_bug("should still be packed\n");
|
||
st_foreach_check(tbl, unp_fec_i, (st_data_t)&c, -1);
|
||
... | ... | |
(VALUE)c.nr, (VALUE)expect_size);
|
||
}
|
||
if (tbl->entries_packed) rb_bug("should be unpacked\n");
|
||
if (tbl->bins != NULL) rb_bug("should be unpacked\n");
|
||
st_free_table(tbl);
|
||
... | ... | |
st_add_direct(tbl, 0, 0);
|
||
if (!tbl->entries_packed) rb_bug("should still be packed\n");
|
||
if (tbl->bins == NULL) rb_bug("should still be packed\n");
|
||
st_foreach(tbl, unp_fe_i, (st_data_t)&c);
|
||
... | ... | |
(VALUE)c.nr, (VALUE)expect_size);
|
||
}
|
||
if (tbl->entries_packed) rb_bug("should be unpacked\n");
|
||
if (tbl->bins != NULL) rb_bug("should be unpacked\n");
|
||
st_free_table(tbl);
|
||
hash.c | ||
---|---|---|
long rb_objid_hash(st_index_t index);
|
||
static st_index_t rb_num_hash_start(st_index_t n);
|
||
static st_index_t
|
||
any_hash(VALUE a, st_index_t (*other_func)(VALUE))
|
||
obj_any_hash(VALUE obj)
|
||
{
|
||
obj = rb_hash(obj);
|
||
return FIX2LONG(obj);
|
||
}
|
||
/* Prime number (79087987342985798987987) mod 32/64 used for hash
|
||
calculations. */
|
||
#if SIZEOF_INT == SIZEOF_VOIDP
|
||
static const st_index_t jauquet_prime_mod = 2053222611; /* mod 32 */
|
||
#else
|
||
static const st_index_t jauquet_prime_mod = 6795498992951210195ULL; /* mod 64 */
|
||
#endif
|
||
static inline st_index_t
|
||
any_hash_general(VALUE a, int strong_p, st_index_t (*other_func)(VALUE))
|
||
{
|
||
VALUE hval;
|
||
st_index_t hnum;
|
||
... | ... | |
hnum = rb_objid_hash((st_index_t)a);
|
||
}
|
||
else if (BUILTIN_TYPE(a) == T_STRING) {
|
||
hnum = rb_str_hash(a);
|
||
hnum = (strong_p
|
||
? rb_str_hash(a)
|
||
: st_hash(RSTRING_PTR(a), RSTRING_LEN(a), jauquet_prime_mod));
|
||
}
|
||
else if (BUILTIN_TYPE(a) == T_SYMBOL) {
|
||
hnum = RSYMBOL(a)->hashval;
|
||
... | ... | |
}
|
||
static st_index_t
|
||
obj_any_hash(VALUE obj)
|
||
{
|
||
obj = rb_hash(obj);
|
||
return FIX2LONG(obj);
|
||
any_hash_weak(VALUE a, st_index_t (*other_func)(VALUE)) {
|
||
return any_hash_general(a, FALSE, other_func);
|
||
}
|
||
static st_index_t
|
||
rb_any_hash_weak(VALUE a) {
|
||
return any_hash_weak(a, obj_any_hash);
|
||
}
|
||
static st_index_t
|
||
any_hash(VALUE a, st_index_t (*other_func)(VALUE)) {
|
||
return any_hash_general(a, TRUE, other_func);
|
||
}
|
||
static st_index_t
|
||
... | ... | |
static st_index_t
|
||
rb_num_hash_start(st_index_t n)
|
||
{
|
||
/*
|
||
* This hash function is lightly-tuned for Ruby. Further tuning
|
||
* should be possible. Notes:
|
||
*
|
||
* - (n >> 3) alone is great for heap objects and OK for fixnum,
|
||
* however symbols perform poorly.
|
||
* - (n >> (RUBY_SPECIAL_SHIFT+3)) was added to make symbols hash well,
|
||
* n.b.: +3 to remove most ID scope, +1 worked well initially, too
|
||
* n.b.: +1 (instead of 3) worked well initially, too
|
||
* - (n << 16) was finally added to avoid losing bits for fixnums
|
||
* - avoid expensive modulo instructions, it is currently only
|
||
* shifts and bitmask operations.
|
||
*/
|
||
return (n >> (RUBY_SPECIAL_SHIFT + 3) ^ (n << 16)) ^ (n >> 3);
|
||
/* Use a simple multiplicative hashing. It is not high quality
|
||
hash function but it is very fast and we can spent the saved
|
||
time on collission processing. Mix low and high bits not to
|
||
ignore high bits after the multiplication. */
|
||
#if SIZEOF_INT == SIZEOF_VOIDP
|
||
return ((n >> 16) ^ (n & 0xffff)) * jauquet_prime_mod;
|
||
#else
|
||
return ((n >> 32) ^ (n & 0xffffffff)) * jauquet_prime_mod;
|
||
#endif
|
||
}
|
||
long
|
||
rb_objid_hash(st_index_t index)
|
||
{
|
||
st_index_t hnum = rb_num_hash_start(index);
|
||
hnum = rb_hash_start(hnum);
|
||
hnum = rb_hash_uint(hnum, (st_index_t)rb_any_hash);
|
||
hnum = rb_hash_end(hnum);
|
||
return hnum;
|
||
return rb_num_hash_start(index);
|
||
}
|
||
static st_index_t
|
||
... | ... | |
static const struct st_hash_type objhash = {
|
||
rb_any_cmp,
|
||
rb_any_hash_weak,
|
||
rb_any_hash,
|
||
};
|
||
include/ruby/st.h | ||
---|---|---|
/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
|
||
/* This is a public domain general purpose hash table package
|
||
originally written by Peter Moore @ UCB.
|
||
/* @(#) st.h 5.1 89/12/14 */
|
||
The hash table data strutures were redesigned and the package was
|
||
rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
|
||
#ifndef RUBY_ST_H
|
||
#define RUBY_ST_H 1
|
||
... | ... | |
typedef struct st_table st_table;
|
||
typedef st_data_t st_index_t;
|
||
/* Maximal value of unsigned integer type st_index_t. */
|
||
#define MAX_ST_INDEX_VAL (~(st_index_t) 0)
|
||
|
||
typedef int st_compare_func(st_data_t, st_data_t);
|
||
typedef st_index_t st_hash_func(st_data_t);
|
||
... | ... | |
struct st_hash_type {
|
||
int (*compare)(ANYARGS /*st_data_t, st_data_t*/); /* st_compare_func* */
|
||
st_index_t (*hash)(ANYARGS /*st_data_t*/); /* st_hash_func* */
|
||
/* The following is an optional func for stronger hash. When we
|
||
have many different keys with the same hash we can switch to
|
||
use it to prevent a denial attack with usage of hash table
|
||
collisions. */
|
||
st_index_t (*strong_hash)(ANYARGS /*st_data_t*/);
|
||
};
|
||
#define ST_INDEX_BITS (sizeof(st_index_t) * CHAR_BIT)
|
||
... | ... | |
# define ST_DATA_COMPATIBLE_P(type) 0
|
||
#endif
|
||
typedef struct st_table_entry st_table_entry;
|
||
struct st_table_entry; /* defined in st.c */
|
||
struct st_table {
|
||
/* Cached features of the table -- see st.c for more details. */
|
||
unsigned char entry_power, bin_power, size_ind;
|
||
/* How many times the table was rebuilt. */
|
||
unsigned int rebuilds_num;
|
||
/* Currently used hash function. */
|
||
st_index_t (*curr_hash)(ANYARGS /*st_data_t*/);
|
||
const struct st_hash_type *type;
|
||
st_index_t num_bins;
|
||
unsigned int entries_packed : 1;
|
||
#ifdef __GNUC__
|
||
/*
|
||
* C spec says,
|
||
* A bit-field shall have a type that is a qualified or unqualified
|
||
* version of _Bool, signed int, unsigned int, or some other
|
||
* implementation-defined type. It is implementation-defined whether
|
||
* atomic types are permitted.
|
||
* In short, long and long long bit-field are implementation-defined
|
||
* feature. Therefore we want to suppress a warning explicitly.
|
||
*/
|
||
__extension__
|
||
#endif
|
||
st_index_t num_entries : ST_INDEX_BITS - 1;
|
||
union {
|
||
struct {
|
||
struct st_table_entry **bins;
|
||
void *private_list_head[2];
|
||
} big;
|
||
struct {
|
||
struct st_packed_entry *entries;
|
||
st_index_t real_entries;
|
||
} packed;
|
||
} as;
|
||
/* Number of entries currently in the table. */
|
||
st_index_t num_entries;
|
||
/* Array of bins used for access by keys. */
|
||
st_index_t *bins;
|
||
/* Start and bound index of entries in array entries.
|
||
entries_starts and entries_bound are in interval
|
||
[0,allocated_entries]. */
|
||
st_index_t entries_start, entries_bound;
|
||
/* Array of size 2^entry_power. */
|
||
st_table_entry *entries;
|
||
};
|
||
#define st_is_member(table,key) st_lookup((table),(key),(st_data_t *)0)
|
||
... | ... | |
int st_update(st_table *table, st_data_t key, st_update_callback_func *func, st_data_t arg);
|
||
int st_foreach(st_table *, int (*)(ANYARGS), st_data_t);
|
||
int st_foreach_check(st_table *, int (*)(ANYARGS), st_data_t, st_data_t);
|
||
int st_reverse_foreach(st_table *, int (*)(ANYARGS), st_data_t);
|
||
st_index_t st_keys(st_table *table, st_data_t *keys, st_index_t size);
|
||
st_index_t st_keys_check(st_table *table, st_data_t *keys, st_index_t size, st_data_t never);
|
||
st_index_t st_values(st_table *table, st_data_t *values, st_index_t size);
|
||
st_index_t st_values_check(st_table *table, st_data_t *values, st_index_t size, st_data_t never);
|
||
void st_add_direct(st_table *, st_data_t, st_data_t);
|
||
void st_free_table(st_table *);
|
||
size_t st_memsize(const st_table *);
|
||
void st_cleanup_safe(st_table *, st_data_t);
|
||
void st_clear(st_table *);
|
||
st_table *st_copy(st_table *);
|
||
... | ... | |
int st_locale_insensitive_strncasecmp(const char *s1, const char *s2, size_t n);
|
||
#define st_strcasecmp st_locale_insensitive_strcasecmp
|
||
#define st_strncasecmp st_locale_insensitive_strncasecmp
|
||
size_t st_memsize(const st_table *);
|
||
st_index_t st_hash(const void *ptr, size_t len, st_index_t h);
|
||
st_index_t st_hash_uint32(st_index_t h, uint32_t i);
|
||
st_index_t st_hash_uint(st_index_t h, st_index_t i);
|
||
... | ... | |
st_index_t st_hash_start(st_index_t h);
|
||
#define st_hash_start(h) ((st_index_t)(h))
|
||
st_index_t st_hash_index(st_index_t k);
|
||
st_index_t st_hash_double(double d);
|
||
RUBY_SYMBOL_EXPORT_END
|
||
#if defined(__cplusplus)
|
st.c | ||
---|---|---|
/* This is a public domain general purpose hash table package written by Peter Moore @ UCB. */
|
||
/* static char sccsid[] = "@(#) st.c 5.1 89/12/14 Crucible"; */
|
||
/* This is a public domain general purpose hash table package
|
||
originally written by Peter Moore @ UCB.
|
||
The hash table data structures were redesigned and the package was
|
||
rewritten by Vladimir Makarov <vmakarov@redhat.com>. */
|
||
/* The original package implemented classic bucket-based hash tables
|
||
with entries doubly linked for an access by their insertion order.
|
||
To decrease pointer chasing and as a consequence to improve a data
|
||
locality the current implementation is based on storing entries in
|
||
an array and using hash tables with open addressing. The current
|
||
entries are more compact in comparison with the original ones and
|
||
this also improves the data locality.
|
||
The hash table has two arrays called *bins* and *entries*.
|
||
bins:
|
||
-------
|
||
| | entries array:
|
||
|-------| --------------------------------
|
||
| index | | | entry: | | |
|
||
|-------| | | | | |
|
||
| ... | | ... | hash | ... | ... |
|
||
|-------| | | key | | |
|
||
| empty | | | record | | |
|
||
|-------| --------------------------------
|
||
| ... | ^ ^
|
||
|-------| |_ entries start |_ entries bound
|
||
|deleted|
|
||
-------
|
||
o The entry array contains table entries in the same order as they
|
||
were inserted.
|
||
When the first entry is deleted, a variable containing index of
|
||
the current first entry (*entries start*) is changed. In all
|
||
other cases of the deletion, we just mark the entry as deleted by
|
||
using a reserved hash value.
|
||
Such organization of the entry storage makes operations of the
|
||
table shift and the entries traversal very fast.
|
||
o The bins provide access to the entries by their keys. The
|
||
key hash is mapped to a bin containing *index* of the
|
||
corresponding entry in the entry array.
|
||
The bin array size is always power of two, it makes mapping very
|
||
fast by using the corresponding lower bits of the hash.
|
||
Generally it is not a good idea to ignore some part of the hash.
|
||
But alternative approach is worse. For example, we could use a
|
||
modulo operation for mapping and a prime number for the size of
|
||
the bin array. Unfortunately, the modulo operation for big
|
||
64-bit numbers are extremely slow (it takes more than 100 cycles
|
||
on modern Intel CPUs).
|
||
Still other bits of the hash value are used when the mapping
|
||
results in a collision. In this case we use a secondary hash
|
||
value which is a result of a function of the collision bin
|
||
index and the original hash value. The function choice
|
||
guarantees that we can traverse all bins and finally find the
|
||
corresponding bin as after several iterations the function
|
||
becomes a full cycle linear congruential generator because it
|
||
satisfies requirements of the Hull-Dobell theorem.
|
||
When an entry is removed from the table besides marking the
|
||
hash in the corresponding entry described above, we also mark
|
||
the bin by a special value in order to find entries which had
|
||
a collision with the removed entries.
|
||
There are two reserved values for the bins. One denotes an
|
||
empty bin, another one denotes a bin for a deleted entry.
|
||
o The length of the bin array is at least two times more than the
|
||
entry array length. This keeps the table load factor healthy.
|
||
The trigger of rebuilding the table is always a case when we can
|
||
not insert an entry anymore at the entries bound. We could
|
||
change the entries bound too in case of deletion but than we need
|
||
a special code to count bins with corresponding deleted entries
|
||
and reset the bin values when there are too many bins
|
||
corresponding deleted entries
|
||
Table rebuilding is done by creation of a new entry array and
|
||
bins of an appropriate size. We also try to reuse the arrays
|
||
in some cases by compacting the array and removing deleted
|
||
entries.
|
||
o To save memory very small tables have no allocated arrays
|
||
bins. We use a linear search for an access by a key.
|
||
o To save more memory we use 8-, 16-, 32- and 64- bit indexes in
|
||
bins depending on the current hash table size.
|
||
This implementation speeds up the Ruby hash table benchmarks in
|
||
average by more 40% on Intel Haswell CPU.
|
||
*/
|
||
#ifdef NOT_RUBY
|
||
#include "regint.h"
|
||
... | ... | |
#include <stdlib.h>
|
||
#endif
|
||
#include <string.h>
|
||
#include "ccan/list/list.h"
|
||
#include <assert.h>
|
||
#ifdef __GNUC__
|
||
#define PREFETCH(addr, write_p) __builtin_prefetch(addr, write_p)
|
||
#define EXPECT(expr, val) __builtin_expect(expr, val)
|
||
#define ATTRIBUTE_UNUSED __attribute__((unused))
|
||
#else
|
||
#define PREFETCH(addr, write_p)
|
||
#define EXPECT(expr, val) (expr)
|
||
#define ATTRIBUTE_UNUSED
|
||
#endif
|
||
typedef struct st_table_entry st_table_entry;
|
||
#ifdef ST_DEBUG
|
||
#define st_assert(cond) assert(cond)
|
||
#else
|
||
#define st_assert(cond) ((void)(0 && (cond)))
|
||
#endif
|
||
/* The type of hashes. */
|
||
typedef st_index_t st_hash_t;
|
||
struct st_table_entry {
|
||
st_index_t hash;
|
||
st_hash_t hash;
|
||
st_data_t key;
|
||
st_data_t record;
|
||
st_table_entry *next;
|
||
struct list_node olist;
|
||
};
|
||
typedef struct st_packed_entry {
|
||
st_index_t hash;
|
||
st_data_t key, val;
|
||
} st_packed_entry;
|
||
#ifndef STATIC_ASSERT
|
||
#define STATIC_ASSERT(name, expr) typedef int static_assert_##name##_check[(expr) ? 1 : -1]
|
||
#endif
|
||
#define ST_DEFAULT_MAX_DENSITY 5
|
||
#define ST_DEFAULT_INIT_TABLE_SIZE 16
|
||
#define ST_DEFAULT_PACKED_TABLE_SIZE 18
|
||
#define PACKED_UNIT (int)(sizeof(st_packed_entry) / sizeof(st_table_entry*))
|
||
#define MAX_PACKED_HASH (int)(ST_DEFAULT_PACKED_TABLE_SIZE * sizeof(st_table_entry*) / sizeof(st_packed_entry))
|
||
STATIC_ASSERT(st_packed_entry, sizeof(st_packed_entry) == sizeof(st_table_entry*[PACKED_UNIT]));
|
||
STATIC_ASSERT(st_packed_bins, sizeof(st_packed_entry[MAX_PACKED_HASH]) <= sizeof(st_table_entry*[ST_DEFAULT_PACKED_TABLE_SIZE]));
|
||
/*
|
||
* DEFAULT_MAX_DENSITY is the default for the largest we allow the
|
||
* average number of items per bin before increasing the number of
|
||
* bins
|
||
*
|
||
* DEFAULT_INIT_TABLE_SIZE is the default for the number of bins
|
||
* allocated initially
|
||
*
|
||
*/
|
||
#define type_numhash st_hashtype_num
|
||
const struct st_hash_type st_hashtype_num = {
|
||
st_numcmp,
|
||
... | ... | |
strcasehash,
|
||
};
|
||
static void rehash(st_table *);
|
||
/* Value used to catch uninitialized entries/bins during debugging.
|
||
There is a possibility for a false alarm, but its probability is
|
||
extremely small. */
|
||
#define ST_INIT_VAL 0xafafafafafafafaf
|
||
#define ST_INIT_VAL_BYTE 0xafa
|
||
#ifdef RUBY
|
||
#undef malloc
|
||
#undef realloc
|
||
#undef calloc
|
||
#undef free
|
||
#define malloc xmalloc
|
||
#define calloc xcalloc
|
||
#define realloc xrealloc
|
||
#define free(x) xfree(x)
|
||
#define malloc ruby_xmalloc
|
||
#define calloc ruby_xcalloc
|
||
#define realloc ruby_xrealloc
|
||
#define free ruby_xfree
|
||
#endif
|
||
#define EQUAL(table,x,ent) ((x)==(ent)->key || (*(table)->type->compare)((x),(ent)->key) == 0)
|
||
#include <stdlib.h>
|
||
#define do_hash(key,table) (st_index_t)(*(table)->type->hash)((key))
|
||
#define hash_pos(h,n) ((h) & (n - 1))
|
||
#define EQUAL(tab,x,y) ((x) == (y) || (*(tab)->type->compare)((x),(y)) == 0)
|
||
#define PTR_EQUAL(tab, ptr, hash_val, key) \
|
||
((ptr)->hash == (hash_val) && EQUAL((tab), (key), (ptr)->key))
|
||
/* Features of a table. */
|
||
struct st_features {
|
||
/* Power of 2 used for number of allocated entries. */
|
||
unsigned char entry_power;
|
||
/* Power of 2 used for number of allocated bins. Depending on the
|
||
table size, the number of bins is 2-4 times more than the
|
||
number of entries. */
|
||
unsigned char bin_power;
|
||
/* Enumeration of sizes of bins (8-bit, 16-bit etc). */
|
||
unsigned char size_ind;
|
||
/* Bins are packed in words of type st_index_t. The following is
|
||
a size of bins counted by words. */
|
||
st_index_t bins_words;
|
||
};
|
||
/* preparation for possible allocation improvements */
|
||
#define st_alloc_entry() (st_table_entry *)malloc(sizeof(st_table_entry))
|
||
#define st_free_entry(entry) free(entry)
|
||
#define st_alloc_table() (st_table *)malloc(sizeof(st_table))
|
||
#define st_dealloc_table(table) free(table)
|
||
#define st_alloc_bins(size) (st_table_entry **)calloc(size, sizeof(st_table_entry *))
|
||
#define st_free_bins(bins, size) free(bins)
|
||
static inline st_table_entry**
|
||
st_realloc_bins(st_table_entry **bins, st_index_t newsize, st_index_t oldsize)
|
||
{
|
||
bins = (st_table_entry **)realloc(bins, newsize * sizeof(st_table_entry *));
|
||
MEMZERO(bins, st_table_entry*, newsize);
|
||
return bins;
|
||
}
|
||
/* Shortcut */
|
||
#define bins as.big.bins
|
||
#define real_entries as.packed.real_entries
|
||
/* preparation for possible packing improvements */
|
||
#define PACKED_BINS(table) ((table)->as.packed.entries)
|
||
#define PACKED_ENT(table, i) PACKED_BINS(table)[i]
|
||
#define PKEY(table, i) PACKED_ENT((table), (i)).key
|
||
#define PVAL(table, i) PACKED_ENT((table), (i)).val
|
||
#define PHASH(table, i) PACKED_ENT((table), (i)).hash
|
||
#define PKEY_SET(table, i, v) (PKEY((table), (i)) = (v))
|
||
#define PVAL_SET(table, i, v) (PVAL((table), (i)) = (v))
|
||
#define PHASH_SET(table, i, v) (PHASH((table), (i)) = (v))
|
||
/* this function depends much on packed layout, so that it placed here */
|
||
static inline void
|
||
remove_packed_entry(st_table *table, st_index_t i)
|
||
{
|
||
table->real_entries--;
|
||
table->num_entries--;
|
||
if (i < table->real_entries) {
|
||
MEMMOVE(&PACKED_ENT(table, i), &PACKED_ENT(table, i+1),
|
||
st_packed_entry, table->real_entries - i);
|
||
}
|
||
}
|
||
/* Features of all possible size tables. */
|
||
#if SIZEOF_ST_INDEX_T == 8
|
||
#define MAX_POWER2 62
|
||
struct st_features features[] = {
|
||
{0, 2, 0, 0x0},
|
||
{1, 3, 0, 0x1},
|
||
{2, 4, 0, 0x2},
|
||
{3, 5, 0, 0x4},
|
||
{4, 6, 0, 0x8},
|
||
{5, 7, 0, 0x10},
|
||
{6, 8, 0, 0x20},
|
||
{7, 9, 0, 0x40},
|
||
{8, 10, 1, 0x100},
|
||
{9, 11, 1, 0x200},
|
||
{10, 12, 1, 0x400},
|
||
{11, 13, 1, 0x800},
|
||
{12, 14, 1, 0x1000},
|
||
{13, 15, 1, 0x2000},
|
||
{14, 16, 1, 0x4000},
|
||
{15, 17, 1, 0x8000},
|
||
{16, 18, 2, 0x20000},
|
||
{17, 19, 2, 0x40000},
|
||
{18, 20, 2, 0x80000},
|
||
{19, 21, 2, 0x100000},
|
||
{20, 22, 2, 0x200000},
|
||
{21, 23, 2, 0x400000},
|
||
{22, 24, 2, 0x800000},
|
||
{23, 25, 2, 0x1000000},
|
||
{24, 26, 2, 0x2000000},
|
||
{25, 27, 2, 0x4000000},
|
||
{26, 28, 2, 0x8000000},
|
||
{27, 29, 2, 0x10000000},
|
||
{28, 30, 2, 0x20000000},
|
||
{29, 31, 2, 0x40000000},
|
||
{30, 32, 2, 0x80000000},
|
||
{31, 33, 2, 0x100000000},
|
||
{32, 33, 3, 0x200000000},
|
||
{33, 34, 3, 0x400000000},
|
||
{34, 35, 3, 0x800000000},
|
||
{35, 36, 3, 0x1000000000},
|
||
{36, 37, 3, 0x2000000000},
|
||
{37, 38, 3, 0x4000000000},
|
||
{38, 39, 3, 0x8000000000},
|
||
{39, 40, 3, 0x10000000000},
|
||
{40, 41, 3, 0x20000000000},
|
||
{41, 42, 3, 0x40000000000},
|
||
{42, 43, 3, 0x80000000000},
|
||
{43, 44, 3, 0x100000000000},
|
||
{44, 45, 3, 0x200000000000},
|
||
{45, 46, 3, 0x400000000000},
|
||
{46, 47, 3, 0x800000000000},
|
||
{47, 48, 3, 0x1000000000000},
|
||
{48, 49, 3, 0x2000000000000},
|
||
{49, 50, 3, 0x4000000000000},
|
||
{50, 51, 3, 0x8000000000000},
|
||
{51, 52, 3, 0x10000000000000},
|
||
{52, 53, 3, 0x20000000000000},
|
||
{53, 54, 3, 0x40000000000000},
|
||
{54, 55, 3, 0x80000000000000},
|
||
{55, 56, 3, 0x100000000000000},
|
||
{56, 57, 3, 0x200000000000000},
|
||
{57, 58, 3, 0x400000000000000},
|
||
{58, 59, 3, 0x800000000000000},
|
||
{59, 60, 3, 0x1000000000000000},
|
||
{60, 61, 3, 0x2000000000000000},
|
||
{61, 62, 3, 0x4000000000000000},
|
||
{62, 63, 3, 0x8000000000000000},
|
||
};
|
||
static inline void
|
||
remove_safe_packed_entry(st_table *table, st_index_t i, st_data_t never)
|
||
{
|
||
table->num_entries--;
|
||
PKEY_SET(table, i, never);
|
||
PVAL_SET(table, i, never);
|
||
PHASH_SET(table, i, 0);
|
||
}
|
||
#else
|
||
#define MAX_POWER2 30
|
||
struct st_features features[] = {
|
||
{0, 2, 0, 0x1},
|
||
{1, 3, 0, 0x2},
|
||
{2, 4, 0, 0x4},
|
||
{3, 5, 0, 0x8},
|
||
{4, 6, 0, 0x10},
|
||
{5, 7, 0, 0x20},
|
||
{6, 8, 0, 0x40},
|
||
{7, 9, 0, 0x80},
|
||
{8, 10, 1, 0x200},
|
||
{9, 11, 1, 0x400},
|
||
{10, 12, 1, 0x800},
|
||
{11, 13, 1, 0x1000},
|
||
{12, 14, 1, 0x2000},
|
||
{13, 15, 1, 0x4000},
|
||
{14, 16, 1, 0x8000},
|
||
{15, 17, 1, 0x10000},
|
||
{16, 17, 2, 0x20000},
|
||
{17, 18, 2, 0x40000},
|
||
{18, 19, 2, 0x80000},
|
||
{19, 20, 2, 0x100000},
|
||
{20, 21, 2, 0x200000},
|
||
{21, 22, 2, 0x400000},
|
||
{22, 23, 2, 0x800000},
|
||
{23, 24, 2, 0x1000000},
|
||
{24, 25, 2, 0x2000000},
|
||
{25, 26, 2, 0x4000000},
|
||
{26, 27, 2, 0x8000000},
|
||
{27, 28, 2, 0x10000000},
|
||
{28, 29, 2, 0x20000000},
|
||
{29, 30, 2, 0x40000000},
|
||
{30, 31, 2, 0x80000000},
|
||
};
|
||
static st_index_t
|
||
next_pow2(st_index_t x)
|
||
{
|
||
x |= x >> 1;
|
||
x |= x >> 2;
|
||
x |= x >> 4;
|
||
x |= x >> 8;
|
||
x |= x >> 16;
|
||
#if SIZEOF_ST_INDEX_T == 8
|
||
x |= x >> 32;
|
||
#endif
|
||
return x + 1;
|
||
/* The reserved hash value and its substitution. */
|
||
#define RESERVED_HASH_VAL (~(st_hash_t) 0)
|
||
#define RESERVED_HASH_SUBSTITUTION_VAL ((st_hash_t) 0)
|
||
/* Return hash value of KEY for table TAB. */
|
||
static inline st_hash_t
|
||
do_hash(st_data_t key, st_table *tab) {
|
||
st_index_t h = (st_index_t)(tab->curr_hash)(key);
|
||
#if SIZEOF_INT == SIZEOF_VOIDP
|
||
st_hash_t hash = h;
|
||
#else
|
||
st_hash_t hash = h;
|
||
#endif
|
||
/* RESERVED_HASH_VAL is used for a deleted entry. Map it into
|
||
another value. Such mapping should be extremely rare. */
|
||
return hash == RESERVED_HASH_VAL ? RESERVED_HASH_SUBSTITUTION_VAL : hash;
|
||
}
|
||
static st_index_t
|
||
new_size(st_index_t size)
|
||
{
|
||
st_index_t n;
|
||
/* Power of 2 defining the minimal number of allocated entries. */
|
||
#define MINIMAL_POWER2 3
|
||
#if MINIMAL_POWER2 < 2
|
||
#error "MINIMAL_POWER2 should be >= 2"
|
||
#endif
|
||
/* If the power2 of the allocated `entries` is less than the following
|
||
value, don't allocate bins and use a linear search. */
|
||
#define MAX_POWER2_FOR_TABLES_WITHOUT_BINS 3
|
||
if (size && (size & ~(size - 1)) == size) /* already a power-of-two? */
|
||
return size;
|
||
/* Return smallest n >= MINIMAL_POWER2 such 2^n > SIZE. */
|
||
static int
|
||
get_power2(st_index_t size) {
|
||
unsigned int n;
|
||
n = next_pow2(size);
|
||
if (n > size)
|
||
return n;
|
||
for (n = 0; size != 0; n++)
|
||
size >>= 1;
|
||
if (n <= MAX_POWER2)
|
||
return n < MINIMAL_POWER2 ? MINIMAL_POWER2 : n;
|
||
#ifndef NOT_RUBY
|
||
/* Ran out of the table entries */
|
||
rb_raise(rb_eRuntimeError, "st_table too big");
|
||
#endif
|
||
return -1; /* should raise exception */
|
||
/* should raise exception */
|
||
return -1;
|
||
}
|
||
/* Return value of N-th bin in array BINS of table with bins size
|
||
index S. */
|
||
static inline st_index_t
|
||
get_bin(st_index_t *bins, int s, st_index_t n) {
|
||
return (s == 0 ? ((unsigned char *) bins)[n]
|
||
: s == 1 ? ((unsigned short *) bins)[n]
|
||
: s == 2 ? ((unsigned int *) bins)[n]
|
||
: ((st_index_t *) bins)[n]);
|
||
}
|
||
/* Set up N-th bin in array BINS of table with bins size index S to
|
||
value V. */
|
||
static inline void
|
||
set_bin(st_index_t *bins, int s, st_index_t n, st_index_t v) {
|
||
if (s == 0) ((unsigned char *) bins)[n] = v;
|
||
else if (s == 1) ((unsigned short *) bins)[n] = v;
|
||
else if (s == 2) ((unsigned int *) bins)[n] = v;
|
||
else ((st_index_t *) bins)[n] = v;
|
||
}
|
||
/* These macros define reserved values for empty table bin and table
|
||
bin which contains a deleted entry. We will never use such values
|
||
for an entry index in bins. */
|
||
#define EMPTY_BIN 0
|
||
#define DELETED_BIN 1
|
||
/* Base of a real entry index in the bins. */
|
||
#define ENTRY_BASE 2
|
||
/* Mark I-th bin of table TAB as empty, in other words not
|
||
corresponding to any entry. */
|
||
#define MARK_BIN_EMPTY(tab, i) (set_bin((tab)->bins, get_size_ind(tab), i, EMPTY_BIN))
|
||
/* Values used for not found entry and bin with given
|
||
characteristics. */
|
||
#define UNDEFINED_ENTRY_IND (~(st_index_t) 0)
|
||
#define UNDEFINED_BIN_IND (~(st_index_t) 0)
|
||
/* Mark I-th bin of table TAB as corresponding to a deleted table
|
||
entry. Update number of entries in the table and number of bins
|
||
corresponding to deleted entries. */
|
||
#define MARK_BIN_DELETED(tab, i) \
|
||
do { \
|
||
st_assert (i != UNDEFINED_BIN_IND); \
|
||
st_assert(! IND_EMPTY_OR_DELETED_BIN_P(tab, i)); \
|
||
set_bin((tab)->bins, get_size_ind(tab), i, DELETED_BIN); \
|
||
} while (0)
|
||
/* Macros to check that value B is used empty bins and bins
|
||
corresponding deleted entries. */
|
||
#define EMPTY_BIN_P(b) ((b) == EMPTY_BIN)
|
||
#define DELETED_BIN_P(b) ((b) == DELETED_BIN)
|
||
#define EMPTY_OR_DELETED_BIN_P(b) ((b) <= DELETED_BIN)
|
||
/* Macros to check empty bins and bins corresponding to deleted
|
||
entries. Bins are given by their index I in table TAB. */
|
||
#define IND_EMPTY_BIN_P(tab, i) (EMPTY_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
|
||
#define IND_DELETED_BIN_P(tab, i) (DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
|
||
#define IND_EMPTY_OR_DELETED_BIN_P(tab, i) (EMPTY_OR_DELETED_BIN_P(get_bin((tab)->bins, get_size_ind(tab), i)))
|
||
/* Macros for marking and checking deleted entries given by their
|
||
pointer E_PTR. */
|
||
#define MARK_ENTRY_DELETED(e_ptr) ((e_ptr)->hash = RESERVED_HASH_VAL)
|
||
#define DELETED_ENTRY_P(e_ptr) ((e_ptr)->hash == RESERVED_HASH_VAL)
|
||
/* Return bin size index of table TAB. */
|
||
static inline st_index_t
|
||
get_size_ind(const st_table *tab) {
|
||
return tab->size_ind;
|
||
}
|
||
/* Return the number of allocated bins of table TAB. */
|
||
static inline st_index_t
|
||
get_bins_num(const st_table *tab) {
|
||
return 1<<tab->bin_power;
|
||
}
|
||
/* Return mask for a bin index in table TAB. */
|
||
static inline st_index_t
|
||
bins_mask(const st_table *tab) {
|
||
return get_bins_num(tab) - 1;
|
||
}
|
||
/* Return the index of table TAB bin corresponding to
|
||
HASH_VALUE. */
|
||
static inline st_index_t
|
||
hash_bin(st_hash_t hash_value, st_table *tab) {
|
||
return hash_value & bins_mask(tab);
|
||
}
|
||
/* Return the number of allocated entries of table TAB. */
|
||
static inline st_index_t
|
||
get_allocated_entries (const st_table *tab) {
|
||
return 1<<tab->entry_power;
|
||
}
|
||
/* Return size of the allocated bins of table TAB. */
|
||
static inline st_index_t
|
||
bins_size(const st_table *tab) {
|
||
return features[tab->entry_power].bins_words * sizeof (st_index_t);
|
||
}
|
||
/* Mark all bins of table TAB as empty. */
|
||
static void
|
||
initialize_bins(st_table *tab) {
|
||
memset(tab->bins, 0, bins_size(tab));
|
||
}
|
||
/* Make table TAB empty. */
|
||
static void
|
||
make_tab_empty(st_table *tab)
|
||
{
|
||
tab->curr_hash = tab->type->hash;
|
||
tab->num_entries = 0;
|
||
tab->rebuilds_num = 0;
|
||
tab->entries_start = tab->entries_bound = 0;
|
||
if (tab->bins != NULL)
|
||
initialize_bins(tab);
|
||
}
|
||
#ifdef ST_DEBUG
|
||
/* Check the table T consistency. It can be extremely slow. So use
|
||
it only for debugging. */
|
||
static void
|
||
st_check(st_table *tab) {
|
||
st_index_t d, e, i, n, p;
|
||
for (p = get_allocated_entries(tab), i = 0; p > 1; i++, p>>=1)
|
||
;
|
||
p = i;
|
||
assert (p >= MINIMAL_POWER2);
|
||
assert (tab->entries_bound <= get_allocated_entries(tab)
|
||
&& tab->entries_start <= tab->entries_bound);
|
||
n = 0;
|
||
return;
|
||
if (tab->entries_bound != 0)
|
||
for (i = tab->entries_start; i < tab->entries_bound; i++) {
|
||
assert (tab->entries[i].hash != (st_hash_t) ST_INIT_VAL
|
||
&& tab->entries[i].key != ST_INIT_VAL
|
||
&& tab->entries[i].record != ST_INIT_VAL);
|
||
if (! DELETED_ENTRY_P(&tab->entries[i]))
|
||
n++;
|
||
}
|
||
assert (n == tab->num_entries);
|
||
if (tab->bins == NULL)
|
||
assert (p <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS);
|
||
else {
|
||
assert (p > MAX_POWER2_FOR_TABLES_WITHOUT_BINS);
|
||
for (n = d = i = 0; i < get_bins_num(tab); i++) {
|
||
assert (get_bin(tab->bins, tab->size_ind, i) != ST_INIT_VAL);
|
||
if (IND_DELETED_BIN_P(tab, i)) {
|
||
d++;
|
||
continue;
|
||
}
|
||
else if (IND_EMPTY_BIN_P(tab, i))
|
||
continue;
|
||
n++;
|
||
e = get_bin(tab->bins, tab->size_ind, i) - ENTRY_BASE;
|
||
assert (tab->entries_start <= e && e < tab->entries_bound);
|
||
assert (! DELETED_ENTRY_P(&tab->entries[e]));
|
||
assert (tab->entries[e].hash != (st_hash_tab) ST_INIT_VAL
|
||
&& tab->entries[e].key != ST_INIT_VAL
|
||
&& tab->entries[e].record != ST_INIT_VAL);
|
||
}
|
||
assert (n == tab->num_entries);
|
||
}
|
||
}
|
||
#endif
|
||
#ifdef HASH_LOG
|
||
#ifdef HAVE_UNISTD_H
|
||
... | ... | |
static struct {
|
||
int all, total, num, str, strcase;
|
||
} collision;
|
||
/* Flag switching off output of package statistics at the end of
|
||
program. */
|
||
static int init_st = 0;
|
||
/* Output overall number of table searches and collisions into a
|
||
temporary file. */
|
||
static void
|
||
stat_col(void)
|
||
{
|
||
char fname[10+sizeof(long)*3];
|
||
FILE *f = fopen((snprintf(fname, sizeof(fname), "/tmp/col%ld", (long)getpid()), fname), "w");
|
||
fprintf(f, "collision: %d / %d (%6.2f)\n", collision.all, collision.total,
|
||
((double)collision.all / (collision.total)) * 100);
|
||
((double)collision.all / (collision.total)) * 100);
|
||
fprintf(f, "num: %d, str: %d, strcase: %d\n", collision.num, collision.str, collision.strcase);
|
||
fclose(f);
|
||
}
|
||
#endif
|
||
static struct list_head *
|
||
st_head(const st_table *tbl)
|
||
{
|
||
uintptr_t addr = (uintptr_t)&tbl->as.big.private_list_head;
|
||
return (struct list_head *)addr;
|
||
}
|
||
st_table*
|
||
st_init_table_with_size(const struct st_hash_type *type, st_index_t size)
|
||
{
|
||
st_table *tbl;
|
||
/* Create and return table with TYPE which can hold at least SIZE
|
||
entries. The real number of entries which the table can hold is
|
||
the nearest power of two for SIZE. */
|
||
st_table *
|
||
st_init_table_with_size(const struct st_hash_type *type, st_index_t size) {
|
||
st_table *tab;
|
||
int n;
|
||
|
||
#ifdef HASH_LOG
|
||
# if HASH_LOG+0 < 0
|
||
#if HASH_LOG+0 < 0
|
||
{
|
||
const char *e = getenv("ST_HASH_LOG");
|
||
if (!e || !*e) init_st = 1;
|
||
const char *e = getenv("ST_HASH_LOG");
|
||
if (!e || !*e) init_st = 1;
|
||
}
|
||
# endif
|
||
#endif
|
||
if (init_st == 0) {
|
||
init_st = 1;
|
||
atexit(stat_col);
|
||
init_st = 1;
|
||
atexit(stat_col);
|
||
}
|
||
#endif
|
||
tbl = st_alloc_table();
|
||
tbl->type = type;
|
||
tbl->num_entries = 0;
|
||
tbl->entries_packed = size <= MAX_PACKED_HASH;
|
||
if (tbl->entries_packed) {
|
||
size = ST_DEFAULT_PACKED_TABLE_SIZE;
|
||
tbl->real_entries = 0;
|
||
}
|
||
else {
|
||
size = new_size(size); /* round up to power-of-two */
|
||
list_head_init(st_head(tbl));
|
||
}
|
||
tbl->num_bins = size;
|
||
tbl->bins = st_alloc_bins(size);
|
||
return tbl;
|
||
|
||
n = get_power2(size);
|
||
tab = (st_table *) malloc(sizeof (st_table));
|
||
tab->type = type;
|
||
tab->entry_power = n;
|
||
tab->bin_power = features[n].bin_power;
|
||
tab->size_ind = features[n].size_ind;
|
||
if (n <= MAX_POWER2_FOR_TABLES_WITHOUT_BINS)
|
||
tab->bins = NULL;
|
||
else
|
||
tab->bins = (st_index_t *) malloc(bins_size(tab));
|
||
tab->entries = (st_table_entry *) malloc(get_allocated_entries(tab)
|
||
* sizeof(st_table_entry));
|
||
#ifdef ST_DEBUG
|
||
memset(tab->entries, ST_INIT_VAL_BYTE,
|
||
get_allocated_entries(tab) * sizeof(st_table_entry));
|
||
if (tab->bins != NULL)
|
||
memset(tab->bins, ST_INIT_VAL_BYTE, bins_size(tab));
|
||
#endif
|
||
make_tab_empty(tab);
|
||
#ifdef ST_DEBUG
|
||
st_check(tab);
|
||
#endif
|
||
return tab;
|
||
}
|
||
st_table*
|
||
st_init_table(const struct st_hash_type *type)
|
||
{
|
||
/* Create and return table with TYPE which can hold a minimal number
|
||
of entries (see comments for get_power2). */
|
||
st_table *
|
||
st_init_table(const struct st_hash_type *type) {
|
||
return st_init_table_with_size(type, 0);
|
||
}
|
||
st_table*
|
||
st_init_numtable(void)
|
||
{
|
||
/* Create and return table which can hold a minimal number of
|
||
numbers. */
|
||
st_table *
|
||
st_init_numtable(void) {
|
||
return st_init_table(&type_numhash);
|
||
}
|
||
st_table*
|
||
st_init_numtable_with_size(st_index_t size)
|
||
{
|
||
/* Create and return table which can hold SIZE numbers. */
|
||
st_table *
|
||
st_init_numtable_with_size(st_index_t size) {
|
||
return st_init_table_with_size(&type_numhash, size);
|
||
}
|
||
st_table*
|
||
st_init_strtable(void)
|
||
{
|
||
/* Create and return table which can hold a minimal number of
|
||
strings. */
|
||
st_table *
|
||
st_init_strtable(void) {
|
||
return st_init_table(&type_strhash);
|
||
}
|
||
st_table*
|
||
st_init_strtable_with_size(st_index_t size)
|
||
{
|
||
/* Create and return table which can hold SIZE strings. */
|
||
st_table *
|
||
st_init_strtable_with_size(st_index_t size) {
|
||
return st_init_table_with_size(&type_strhash, size);
|
||
}
|
||
st_table*
|
||
st_init_strcasetable(void)
|
||
{
|
||
/* Create and return table which can hold a minimal number of strings
|
||
whose character case is ignored. */
|
||
st_table *
|
||
st_init_strcasetable(void) {
|
||
return st_init_table(&type_strcasehash);
|
||
}
|
||
st_table*
|
||
st_init_strcasetable_with_size(st_index_t size)
|
||
{
|
||
/* Create and return table which can hold SIZE strings whose character
|
||
case is ignored. */
|
||
st_table *
|
||
st_init_strcasetable_with_size(st_index_t size) {
|
||
return st_init_table_with_size(&type_strcasehash, size);
|
||
}
|
||
/* Make table TAB empty. */
|
||
void
|
||
st_clear(st_table *table)
|
||
{
|
||
register st_table_entry *ptr = 0, *next;
|
||
if (table->entries_packed) {
|
||
table->num_entries = 0;
|
||
table->real_entries = 0;
|
||
return;
|
||
}
|
||
list_for_each_safe(st_head(table), ptr, next, olist) {
|
||
/* list_del is not needed */
|
||
st_free_entry(ptr);
|
||
}
|
||
table->num_entries = 0;
|
||
MEMZERO(table->bins, st_table_entry*, table->num_bins);
|
||
list_head_init(st_head(table));
|
||
st_clear(st_table *tab) {
|
||
make_tab_empty(tab);
|
||
#ifdef ST_DEBUG
|
||
st_check(tab);
|
||
#endif
|
||
}
|
||
/* Free table TAB space. */
|
||
void
|
||
st_free_table(st_table *table)
|
||
{
|
||
st_clear(table);
|
||
st_free_bins(table->bins, table->num_bins);
|
||
st_dealloc_table(table);
|
||
st_free_table(st_table *tab) {
|
||
if (tab->bins != NULL)
|
||
free(tab->bins);
|
||
free(tab->entries);
|
||
free(tab);
|
||
}
|
||
/* Return byte size of memory allocted for table TAB. */
|
||
size_t
|
||
st_memsize(const st_table *table)
|
||
{
|
||
if (table->entries_packed) {
|
||
return table->num_bins * sizeof (void *) + sizeof(st_table);
|
||
}
|
||
else {
|
||
return table->num_entries * sizeof(struct st_table_entry) + table->num_bins * sizeof (void *) + sizeof(st_table);
|
||
}
|
||
st_memsize(const st_table *tab) {
|
||
return(sizeof(st_table)
|
||
+ (tab->bins == NULL ? 0 : bins_size(tab))
|
||
+ get_allocated_entries(tab) * sizeof(st_table_entry));
|
||
}
|
||
#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \
|
||
((ptr) != 0 && ((ptr)->hash != (hash_val) || !EQUAL((table), (key), (ptr))))
|
||
static st_index_t
|
||
find_table_entry_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
|
||
static st_index_t
|
||
find_table_bin_ind(st_table *tab, st_hash_t hash_value, st_data_t key);
|
||
static st_index_t
|
||
find_table_bin_ind_direct(st_table *table, st_hash_t hash_value, st_data_t key);
|
||
static st_index_t
|
||
find_table_bin_ptr_and_reserve(st_table *tab, st_hash_t *hash_value,
|
||
st_data_t key, st_index_t *bin_ind);
|
||
#ifdef HASH_LOG
|
||
static void
|
||
count_collision(const struct st_hash_type *type)
|
||
{
|
||
collision.all++;
|
||
if (type == &type_numhash) {
|
||
collision.num++;
|
||
}
|
||
count_collision(const struct st_hash_type *type) {
|
||
collision.all++;
|
||
if (type == &type_numhash) {
|
||
collision.num++;
|
||
}
|
||
else if (type == &type_strhash) {
|
||
collision.strcase++;
|
||
collision.strcase++;
|
||
}
|
||
else if (type == &type_strcasehash) {
|
||
collision.str++;
|
||
}
|
||
collision.str++;
|
||
}
|
||
}
|
||
#define COLLISION (collision_check ? count_collision(table->type) : (void)0)
|
||
#define FOUND_ENTRY (collision_check ? collision.total++ : (void)0)
|
||
#define collision_check 1
|
||
#define COLLISION (collision_check ? count_collision(tab->type) : (void)0)
|
||
#define FOUND_BIN (collision_check ? collision.total++ : (void)0)
|
||
#else
|
||
#define COLLISION
|
||
#define FOUND_ENTRY
|
||
#define FOUND_BIN
|
||
#endif
|
||
#define FIND_ENTRY(table, ptr, hash_val, bin_pos) \
|
||
((ptr) = find_entry((table), key, (hash_val), ((bin_pos) = hash_pos(hash_val, (table)->num_bins))))
|
||
/* If the number of entries in the table is at least REBUILD_THRESHOLD
|
||
times less than the entry array length, decrease the table
|
||
size. */
|
||
#define REBUILD_THRESHOLD 4
|
||
static st_table_entry *
|
||
find_entry(const st_table *table, st_data_t key, st_index_t hash_val,
|
||
st_index_t bin_pos)
|
||
{
|
||
register st_table_entry *ptr = table->bins[bin_pos];
|
||
FOUND_ENTRY;
|
||
if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {
|
||
COLLISION;
|
||
while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {
|
||
ptr = ptr->next;
|
||
}
|
||
ptr = ptr->next;
|
||
}
|
||
return ptr;
|
||
}
|
||
static inline st_index_t
|
||
find_packed_index_from(const st_table *table, st_index_t hash_val,
|
||
st_data_t key, st_index_t i)
|
||
{
|
||
while (i < table->real_entries &&
|
||
(PHASH(table, i) != hash_val || !EQUAL(table, key, &PACKED_ENT(table, i)))) {
|
||
i++;
|
||
}
|
||
return i;
|
||
}
|
||
static inline st_index_t
|
||
find_packed_index(const st_table *table, st_index_t hash_val, st_data_t key)
|
||
{
|
||
return find_packed_index_from(table, hash_val, key, 0);
|
||
}
|
||
#define collision_check 0
|
||
int
|
||
st_lookup(st_table *table, register st_data_t key, st_data_t *value)
|
||
{
|
||
st_index_t hash_val;
|
||
register st_table_entry *ptr;
|
||
#if REBUILD_THRESHOLD < 2
|
||
#error "REBUILD_THRESHOLD should be >= 2"
|
||
#endif
|
||
hash_val = do_hash(key, table);
|
||
static int inside_table_rebuild_p = FALSE;
|
||
if (table->entries_packed) {
|
||
st_index_t i = find_packed_index(table, hash_val, key);
|
||
if (i < table->real_entries) {
|
||
if (value != 0) *value = PVAL(table, i);
|