diff options
| author | vasko <[email protected]> | 2026-03-05 11:17:14 +0300 |
|---|---|---|
| committer | vasko <[email protected]> | 2026-03-05 13:11:05 +0300 |
| commit | d0739058c7dbc0e2e2b8aa6c5bc399522d5e6fca (patch) | |
| tree | 84434155cffc87b80bdf7f2e8d4d8693552efe26 /library/cpp/yt | |
| parent | dca9e7af12a14e97bc954627d386a2266eb1a8be (diff) | |
[yt] make SplitMix & HashCombine bit-independent
add realization of hash-functions for 32-bit platforms
commit_hash:3247a0524d3b66d759bf5ebd598be84c8dfb5837
Diffstat (limited to 'library/cpp/yt')
| -rw-r--r-- | library/cpp/yt/misc/hash-inl.h | 46 | ||||
| -rw-r--r-- | library/cpp/yt/misc/hash.h | 2 | ||||
| -rw-r--r-- | library/cpp/yt/misc/unittests/hash_ut.cpp | 4 |
3 files changed, 39 insertions, 13 deletions
diff --git a/library/cpp/yt/misc/hash-inl.h b/library/cpp/yt/misc/hash-inl.h index bd4af300a32..b9009b9072b 100644 --- a/library/cpp/yt/misc/hash-inl.h +++ b/library/cpp/yt/misc/hash-inl.h @@ -10,24 +10,50 @@ namespace NYT { //////////////////////////////////////////////////////////////////////////////// -inline size_t SplitMix64(size_t value) +inline size_t SplitMix(size_t value) { - static_assert(sizeof(size_t) == 8, "size_t must be 64 bit."); + // GOLDEN_GAMMA is the odd integer closest to (2 ^ {bit_depth}) / phi, + // where phi = (1 + sqrt(5))/2 is the golden ratio. + // Defined in https://gee.cs.oswego.edu/dl/papers/oopsla14.pdf + // + // The second part of algorithm is finalization mix and uses some magic constants + // For 32bit platforms it taken from https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp#L70-L74 + // For 64bit from http://zimbry.blogspot.com/2011/09/better-bit-mixing-improving-on.html +#ifdef _64_ + constexpr size_t GOLDEN_GAMMA = 0x9e3779b97f4a7c15ULL; + constexpr size_t C1 = 0xbf58476d1ce4e5b9ULL; + constexpr size_t C2 = 0x94d049bb133111ebULL; + constexpr size_t S0 = 30; + constexpr size_t S1 = 27; + constexpr size_t S2 = 31; +#else + constexpr size_t GOLDEN_GAMMA = 0x9e3779b9UL; + constexpr size_t C1 = 0x85ebca6bUL; + constexpr size_t C2 = 0xc2b2ae35UL; + constexpr size_t S0 = 16; + constexpr size_t S1 = 13; + constexpr size_t S2 = 16; +#endif - value += 0x9e3779b97f4a7c15ULL; - value = (value ^ (value >> 30)) * 0xbf58476d1ce4e5b9ULL; - value = (value ^ (value >> 27)) * 0x94d049bb133111ebULL; - return value ^ (value >> 31); + value += GOLDEN_GAMMA; + value = (value ^ (value >> S0)) * C1; + value = (value ^ (value >> S1)) * C2; + return value ^ (value >> S2); } //////////////////////////////////////////////////////////////////////////////// inline void HashCombine(size_t& h, size_t k) { - static_assert(sizeof(size_t) == 8, "size_t must be 64 bit."); - - const size_t m = 0xc6a4a7935bd1e995ULL; - const int r = 47; + // Combine step from MurmurHash https://github.com/abrandoned/murmur2/blob/master/MurmurHash2.c + // 64bit constants are taken from MurmurHash64A, 32bit — MurmurHash2 +#ifdef _64_ + constexpr size_t m = 0xc6a4a7935bd1e995ULL; + constexpr size_t r = 47; +#else + constexpr size_t m = 0x5bd1e995; + constexpr size_t r = 24; +#endif k *= m; k ^= k >> r; diff --git a/library/cpp/yt/misc/hash.h b/library/cpp/yt/misc/hash.h index e2b6f72029b..2281ed707f3 100644 --- a/library/cpp/yt/misc/hash.h +++ b/library/cpp/yt/misc/hash.h @@ -13,7 +13,7 @@ namespace NYT { //! as opposed to raw collision minimization. //! This is also SplitMix64 PRNG. //! Cf. |http://zimbry.blogspot.com/2011/09/better-bit-mixing-improving-on.html|, |boost::random::splitmix64|. -size_t SplitMix64(size_t value); +size_t SplitMix(size_t value); //////////////////////////////////////////////////////////////////////////////// diff --git a/library/cpp/yt/misc/unittests/hash_ut.cpp b/library/cpp/yt/misc/unittests/hash_ut.cpp index a8e60faaae0..0695d32d47a 100644 --- a/library/cpp/yt/misc/unittests/hash_ut.cpp +++ b/library/cpp/yt/misc/unittests/hash_ut.cpp @@ -17,8 +17,8 @@ TEST(THashTest, NaNSafeHash) TEST(THashTest, SplitMix64Test) { - EXPECT_EQ(SplitMix64(0), 0xe220a8397b1dcdafULL); - EXPECT_EQ(SplitMix64(12345), 0x22118258a9d111a0ULL); + EXPECT_EQ(SplitMix(0), 0xe220a8397b1dcdafULL); + EXPECT_EQ(SplitMix(12345), 0x22118258a9d111a0ULL); } //////////////////////////////////////////////////////////////////////////////// |
