Project

General

Profile

Bug #13019 ยป 0001-st.c-fix-st_hash-functions.patch

funny_falcon (Yura Sokolov), 12/09/2016 02:02 PM

View differences:

include/ruby/st.h
63 63
    st_index_t (*hash)(ANYARGS /*st_data_t*/);        /* st_hash_func* */
64 64
};
65 65

  
66
#define ST_INDEX_BITS (SIZEOF_ST_INDEX_T * CHAR_BIT)
67

  
66 68
#if defined(HAVE_BUILTIN___BUILTIN_CHOOSE_EXPR) && defined(HAVE_BUILTIN___BUILTIN_TYPES_COMPATIBLE_P)
67 69
# define ST_DATA_COMPATIBLE_P(type) \
68 70
   __builtin_choose_expr(__builtin_types_compatible_p(type, st_data_t), 1, 0)
st.c
1609 1609
		st_data_t never ATTRIBUTE_UNUSED) {
1610 1610
    return st_general_values(tab, values, size);
1611 1611
}
1612
/*
1613
 * hash_32 - 32 bit Fowler/Noll/Vo FNV-1a hash code
1614
 *
1615
 * @(#) $Hash32: Revision: 1.1 $
1616
 * @(#) $Hash32: Id: hash_32a.c,v 1.1 2003/10/03 20:38:53 chongo Exp $
1617
 * @(#) $Hash32: Source: /usr/local/src/cmd/fnv/RCS/hash_32a.c,v $
1618
 *
1619
 ***
1620
 *
1621
 * Fowler/Noll/Vo hash
1622
 *
1623
 * The basis of this hash algorithm was taken from an idea sent
1624
 * as reviewer comments to the IEEE POSIX P1003.2 committee by:
1625
 *
1626
 *      Phong Vo (http://www.research.att.com/info/kpv/)
1627
 *      Glenn Fowler (http://www.research.att.com/~gsf/)
1628
 *
1629
 * In a subsequent ballot round:
1630
 *
1631
 *      Landon Curt Noll (http://www.isthe.com/chongo/)
1632
 *
1633
 * improved on their algorithm.  Some people tried this hash
1634
 * and found that it worked rather well.  In an EMail message
1635
 * to Landon, they named it the ``Fowler/Noll/Vo'' or FNV hash.
1636
 *
1637
 * FNV hashes are designed to be fast while maintaining a low
1638
 * collision rate. The FNV speed allows one to quickly hash lots
1639
 * of data while maintaining a reasonable collision rate.  See:
1640
 *
1641
 *      http://www.isthe.com/chongo/tech/comp/fnv/index.html
1642
 *
1643
 * for more details as well as other forms of the FNV hash.
1644
 ***
1645
 *
1646
 * To use the recommended 32 bit FNV-1a hash, pass FNV1_32A_INIT as the
1647
 * Fnv32_t hashval argument to fnv_32a_buf() or fnv_32a_str().
1648
 *
1649
 ***
1650
 *
1651
 * Please do not copyright this code.  This code is in the public domain.
1652
 *
1653
 * LANDON CURT NOLL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE,
1654
 * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO
1655
 * EVENT SHALL LANDON CURT NOLL BE LIABLE FOR ANY SPECIAL, INDIRECT OR
1656
 * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF
1657
 * USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR
1658
 * OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR
1659
 * PERFORMANCE OF THIS SOFTWARE.
1660
 *
1661
 * By:
1662
 *	chongo <Landon Curt Noll> /\oo/\
1663
 *      http://www.isthe.com/chongo/
1664
 *
1665
 * Share and Enjoy!	:-)
1666
 */
1667 1612

  
1668
/*
1669
 * 32 bit FNV-1 and FNV-1a non-zero initial basis
1670
 *
1671
 * The FNV-1 initial basis is the FNV-0 hash of the following 32 octets:
1672
 *
1673
 *              chongo <Landon Curt Noll> /\../\
1674
 *
1675
 * NOTE: The \'s above are not back-slashing escape characters.
1676
 * They are literal ASCII  backslash 0x5c characters.
1677
 *
1678
 * NOTE: The FNV-1a initial basis is the same value as FNV-1 by definition.
1679
 */
1680 1613
#define FNV1_32A_INIT 0x811c9dc5
1681 1614

  
1682 1615
/*
......
1684 1617
 */
1685 1618
#define FNV_32_PRIME 0x01000193
1686 1619

  
1687
#ifdef ST_USE_FNV1
1688
static st_index_t
1689
strhash(st_data_t arg)
1690
{
1691
    register const char *string = (const char *)arg;
1692
    register st_index_t hval = FNV1_32A_INIT;
1693

  
1694
    /*
1695
     * FNV-1a hash each octet in the buffer
1696
     */
1697
    while (*string) {
1698
	/* xor the bottom with the current octet */
1699
	hval ^= (unsigned int)*string++;
1700

  
1701
	/* multiply by the 32 bit FNV magic prime mod 2^32 */
1702
	hval *= FNV_32_PRIME;
1703
    }
1704
    return hval;
1705
}
1706
#else
1707

  
1708 1620
#ifndef UNALIGNED_WORD_ACCESS
1709 1621
# if defined(__i386) || defined(__i386__) || defined(_M_IX86) || \
1710 1622
     defined(__x86_64) || defined(__x86_64__) || defined(_M_AMD64) || \
......
1717 1629
# define UNALIGNED_WORD_ACCESS 0
1718 1630
#endif
1719 1631

  
1720
/* MurmurHash described in http://murmurhash.googlepages.com/ */
1721
#ifndef MURMUR
1722
#define MURMUR 2
1723
#endif
1632
/* This hash function is quite simplified MurmurHash3
1633
 * Simplification is legal, cause most of magic still happens in finalizator.
1634
 * And finalizator is almost the same as in MurmurHash3 */
1635
#define BIG_CONSTANT(x,y) ((st_index_t)(x)<<32|(st_index_t)(y))
1636
#define ROTL(x,n) ((x)<<(n)|(x)>>(SIZEOF_ST_INDEX_T*CHAR_BIT-(n)))
1724 1637

  
1725
#define MurmurMagic_1 (st_index_t)0xc6a4a793
1726
#define MurmurMagic_2 (st_index_t)0x5bd1e995
1727
#if MURMUR == 1
1728
#define MurmurMagic MurmurMagic_1
1729
#elif MURMUR == 2
1730
#if SIZEOF_ST_INDEX_T > 4
1731
#define MurmurMagic ((MurmurMagic_1 << 32) | MurmurMagic_2)
1638
#if ST_INDEX_BITS <= 32
1639
#define C1 (st_index_t)0xcc9e2d51
1640
#define C2 (st_index_t)0x1b873593
1732 1641
#else
1733
#define MurmurMagic MurmurMagic_2
1734
#endif
1642
#define C1 BIG_CONSTANT(0x87c37b91,0x114253d5);
1643
#define C2 BIG_CONSTANT(0x4cf5ad43,0x2745937f);
1735 1644
#endif
1736

  
1737 1645
static inline st_index_t
1738
murmur(st_index_t h, st_index_t k, int r)
1646
murmur_step(st_index_t h, st_index_t k)
1739 1647
{
1740
    const st_index_t m = MurmurMagic;
1741
#if MURMUR == 1
1742
    h += k;
1743
    h *= m;
1744
    h ^= h >> r;
1745
#elif MURMUR == 2
1746
    k *= m;
1747
    k ^= k >> r;
1748
    k *= m;
1749

  
1750
    h *= m;
1751
    h ^= k;
1648
#if ST_INDEX_BITS <= 32
1649
#define r1 (17)
1650
#define r2 (11)
1651
#else
1652
#define r1 (33)
1653
#define r2 (24)
1752 1654
#endif
1655
    k *= C1;
1656
    h ^= ROTL(k, r1);
1657
    h *= C2;
1658
    h = ROTL(h, r2);
1753 1659
    return h;
1754 1660
}
1661
#undef r1
1662
#undef r2
1755 1663

  
1756 1664
static inline st_index_t
1757 1665
murmur_finish(st_index_t h)
1758 1666
{
1759
#if MURMUR == 1
1760
    h = murmur(h, 0, 10);
1761
    h = murmur(h, 0, 17);
1762
#elif MURMUR == 2
1763
    h ^= h >> 13;
1764
    h *= MurmurMagic;
1765
    h ^= h >> 15;
1766
#endif
1667
#if ST_INDEX_BITS <= 32
1668
#define r1 (16)
1669
#define r2 (13)
1670
#define r3 (16)
1671
    const st_index_t c1 = 0x85ebca6b;
1672
    const st_index_t c2 = 0xc2b2ae35;
1673
#else
1674
/* values are taken from Mix13 on http://zimbry.blogspot.ru/2011/09/better-bit-mixing-improving-on.html */
1675
#define r1 (30)
1676
#define r2 (27)
1677
#define r3 (31)
1678
    const st_index_t c1 = BIG_CONSTANT(0xbf58476d,0x1ce4e5b9);
1679
    const st_index_t c2 = BIG_CONSTANT(0x94d049bb,0x133111eb);
1680
#endif
1681
#if ST_INDEX_BITS > 64
1682
    h ^= h >> 64;
1683
    h *= c2;
1684
    h ^= h >> 65;
1685
#endif
1686
    h ^= h >> r1;
1687
    h *= c1;
1688
    h ^= h >> r2;
1689
    h *= c2;
1690
    h ^= h >> r3;
1767 1691
    return h;
1768 1692
}
1769

  
1770
#define murmur_step(h, k) murmur((h), (k), 16)
1771

  
1772
#if MURMUR == 1
1773
#define murmur1(h) murmur_step((h), 16)
1774
#else
1775
#define murmur1(h) murmur_step((h), 24)
1776
#endif
1693
#undef r1
1694
#undef r2
1695
#undef r3
1777 1696

  
1778 1697
st_index_t
1779 1698
st_hash(const void *ptr, size_t len, st_index_t h)
1780 1699
{
1781 1700
    const char *data = ptr;
1782 1701
    st_index_t t = 0;
1783

  
1784
    h += 0xdeadbeef;
1702
    size_t l = len;
1785 1703

  
1786 1704
#define data_at(n) (st_index_t)((unsigned char)data[(n)])
1787 1705
#define UNALIGNED_ADD_4 UNALIGNED_ADD(2); UNALIGNED_ADD(1); UNALIGNED_ADD(0)
......
1859 1777
	    t = (t >> sr) | (d << sl);
1860 1778
#endif
1861 1779

  
1862
#if MURMUR == 2
1863 1780
	    if (len < (size_t)align) goto skip_tail;
1864
#endif
1865 1781
	    h = murmur_step(h, t);
1866 1782
	    data += pack;
1867 1783
	    len -= pack;
......
1879 1795

  
1880 1796
    t = 0;
1881 1797
    switch (len) {
1798
#if UNALIGNED_WORD_ACCESS && SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8
1799
    /* in this case byteorder doesn't really matter */
1800
#if SIZEOF_ST_INDEX_T > 4
1801
	case 7: t |= data_at(6) << 48;
1802
	case 6: t |= data_at(5) << 40;
1803
	case 5: t |= data_at(4) << 32;
1804
	case 4:
1805
	    t |= (st_index_t)*(uint32_t*)data;
1806
	    goto skip_tail;
1807
#endif
1808
	case 3: t |= data_at(2) << 16;
1809
	case 2: t |= data_at(1) << 8;
1810
	case 1: t |= data_at(0);
1811
#else
1882 1812
#ifdef WORDS_BIGENDIAN
1883 1813
# define UNALIGNED_ADD(n) case (n) + 1: \
1884 1814
	t |= data_at(n) << CHAR_BIT*(SIZEOF_ST_INDEX_T - (n) - 1)
......
1888 1818
#endif
1889 1819
	UNALIGNED_ADD_ALL;
1890 1820
#undef UNALIGNED_ADD
1891
#if MURMUR == 1
1892
	h = murmur_step(h, t);
1893
#elif MURMUR == 2
1894
# if !UNALIGNED_WORD_ACCESS
1821
#endif
1822
#if !UNALIGNED_WORD_ACCESS || (SIZEOF_ST_INDEX_T <= 8 && CHAR_BIT == 8)
1895 1823
      skip_tail:
1896
# endif
1897
	h ^= t;
1898
	h *= MurmurMagic;
1899 1824
#endif
1825
	h ^= t; h -= ROTL(t, 7);
1826
	h *= C2;
1900 1827
    }
1828
    h ^= l;
1901 1829

  
1902 1830
    return murmur_finish(h);
1903 1831
}
......
1905 1833
st_index_t
1906 1834
st_hash_uint32(st_index_t h, uint32_t i)
1907 1835
{
1908
    return murmur_step(h + i, 16);
1836
    return murmur_step(h, i);
1909 1837
}
1910 1838

  
1911 1839
st_index_t
1912 1840
st_hash_uint(st_index_t h, st_index_t i)
1913 1841
{
1914
    st_index_t v = 0;
1915
    h += i;
1916
#ifdef WORDS_BIGENDIAN
1917
#if SIZEOF_ST_INDEX_T*CHAR_BIT > 12*8
1918
    v = murmur1(v + (h >> 12*8));
1919
#endif
1920
#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
1921
    v = murmur1(v + (h >> 8*8));
1922
#endif
1923
#if SIZEOF_ST_INDEX_T*CHAR_BIT > 4*8
1924
    v = murmur1(v + (h >> 4*8));
1925
#endif
1926
#endif
1927
    v = murmur1(v + h);
1928
#ifndef WORDS_BIGENDIAN
1929
#if SIZEOF_ST_INDEX_T*CHAR_BIT > 4*8
1930
    v = murmur1(v + (h >> 4*8));
1931
#endif
1842
    i += h;
1843
/* no matter if it is BigEndian or LittleEndian,
1844
 * we hash just integers */
1932 1845
#if SIZEOF_ST_INDEX_T*CHAR_BIT > 8*8
1933
    v = murmur1(v + (h >> 8*8));
1934
#endif
1935
#if SIZEOF_ST_INDEX_T*CHAR_BIT > 12*8
1936
    v = murmur1(v + (h >> 12*8));
1846
    h = murmur_step(h, i >> 8*8);
1937 1847
#endif
1938
#endif
1939
    return v;
1848
    h = murmur_step(h, i);
1849
    return h;
1940 1850
}
1941 1851

  
1942 1852
st_index_t
1943 1853
st_hash_end(st_index_t h)
1944 1854
{
1945
    h = murmur_step(h, 10);
1946
    h = murmur_step(h, 17);
1855
    h = murmur_finish(h);
1947 1856
    return h;
1948 1857
}
1949 1858

  
......
1960 1869
    register const char *string = (const char *)arg;
1961 1870
    return st_hash(string, strlen(string), FNV1_32A_INIT);
1962 1871
}
1963
#endif
1964 1872

  
1965 1873
int
1966 1874
st_locale_insensitive_strcasecmp(const char *s1, const char *s2)
1967
-