summaryrefslogtreecommitdiffstats
path: root/library/cpp/yt
diff options
context:
space:
mode:
authorvasko <[email protected]>2026-03-05 11:17:14 +0300
committervasko <[email protected]>2026-03-05 13:11:05 +0300
commitd0739058c7dbc0e2e2b8aa6c5bc399522d5e6fca (patch)
tree84434155cffc87b80bdf7f2e8d4d8693552efe26 /library/cpp/yt
parentdca9e7af12a14e97bc954627d386a2266eb1a8be (diff)
[yt] make SplitMix & HashCombine bit-independent
add realization of hash-functions for 32-bit platforms commit_hash:3247a0524d3b66d759bf5ebd598be84c8dfb5837
Diffstat (limited to 'library/cpp/yt')
-rw-r--r--library/cpp/yt/misc/hash-inl.h46
-rw-r--r--library/cpp/yt/misc/hash.h2
-rw-r--r--library/cpp/yt/misc/unittests/hash_ut.cpp4
3 files changed, 39 insertions, 13 deletions
diff --git a/library/cpp/yt/misc/hash-inl.h b/library/cpp/yt/misc/hash-inl.h
index bd4af300a32..b9009b9072b 100644
--- a/library/cpp/yt/misc/hash-inl.h
+++ b/library/cpp/yt/misc/hash-inl.h
@@ -10,24 +10,50 @@ namespace NYT {
////////////////////////////////////////////////////////////////////////////////
-inline size_t SplitMix64(size_t value)
+inline size_t SplitMix(size_t value)
{
- static_assert(sizeof(size_t) == 8, "size_t must be 64 bit.");
+ // GOLDEN_GAMMA is the odd integer closest to (2 ^ {bit_depth}) / phi,
+ // where phi = (1 + sqrt(5))/2 is the golden ratio.
+ // Defined in https://gee.cs.oswego.edu/dl/papers/oopsla14.pdf
+ //
+ // The second part of algorithm is finalization mix and uses some magic constants
+ // For 32bit platforms it taken from https://github.com/aappleby/smhasher/blob/master/src/MurmurHash3.cpp#L70-L74
+ // For 64bit from http://zimbry.blogspot.com/2011/09/better-bit-mixing-improving-on.html
+#ifdef _64_
+ constexpr size_t GOLDEN_GAMMA = 0x9e3779b97f4a7c15ULL;
+ constexpr size_t C1 = 0xbf58476d1ce4e5b9ULL;
+ constexpr size_t C2 = 0x94d049bb133111ebULL;
+ constexpr size_t S0 = 30;
+ constexpr size_t S1 = 27;
+ constexpr size_t S2 = 31;
+#else
+ constexpr size_t GOLDEN_GAMMA = 0x9e3779b9UL;
+ constexpr size_t C1 = 0x85ebca6bUL;
+ constexpr size_t C2 = 0xc2b2ae35UL;
+ constexpr size_t S0 = 16;
+ constexpr size_t S1 = 13;
+ constexpr size_t S2 = 16;
+#endif
- value += 0x9e3779b97f4a7c15ULL;
- value = (value ^ (value >> 30)) * 0xbf58476d1ce4e5b9ULL;
- value = (value ^ (value >> 27)) * 0x94d049bb133111ebULL;
- return value ^ (value >> 31);
+ value += GOLDEN_GAMMA;
+ value = (value ^ (value >> S0)) * C1;
+ value = (value ^ (value >> S1)) * C2;
+ return value ^ (value >> S2);
}
////////////////////////////////////////////////////////////////////////////////
inline void HashCombine(size_t& h, size_t k)
{
- static_assert(sizeof(size_t) == 8, "size_t must be 64 bit.");
-
- const size_t m = 0xc6a4a7935bd1e995ULL;
- const int r = 47;
+ // Combine step from MurmurHash https://github.com/abrandoned/murmur2/blob/master/MurmurHash2.c
+ // 64bit constants are taken from MurmurHash64A, 32bit — MurmurHash2
+#ifdef _64_
+ constexpr size_t m = 0xc6a4a7935bd1e995ULL;
+ constexpr size_t r = 47;
+#else
+ constexpr size_t m = 0x5bd1e995;
+ constexpr size_t r = 24;
+#endif
k *= m;
k ^= k >> r;
diff --git a/library/cpp/yt/misc/hash.h b/library/cpp/yt/misc/hash.h
index e2b6f72029b..2281ed707f3 100644
--- a/library/cpp/yt/misc/hash.h
+++ b/library/cpp/yt/misc/hash.h
@@ -13,7 +13,7 @@ namespace NYT {
//! as opposed to raw collision minimization.
//! This is also SplitMix64 PRNG.
//! Cf. |http://zimbry.blogspot.com/2011/09/better-bit-mixing-improving-on.html|, |boost::random::splitmix64|.
-size_t SplitMix64(size_t value);
+size_t SplitMix(size_t value);
////////////////////////////////////////////////////////////////////////////////
diff --git a/library/cpp/yt/misc/unittests/hash_ut.cpp b/library/cpp/yt/misc/unittests/hash_ut.cpp
index a8e60faaae0..0695d32d47a 100644
--- a/library/cpp/yt/misc/unittests/hash_ut.cpp
+++ b/library/cpp/yt/misc/unittests/hash_ut.cpp
@@ -17,8 +17,8 @@ TEST(THashTest, NaNSafeHash)
TEST(THashTest, SplitMix64Test)
{
- EXPECT_EQ(SplitMix64(0), 0xe220a8397b1dcdafULL);
- EXPECT_EQ(SplitMix64(12345), 0x22118258a9d111a0ULL);
+ EXPECT_EQ(SplitMix(0), 0xe220a8397b1dcdafULL);
+ EXPECT_EQ(SplitMix(12345), 0x22118258a9d111a0ULL);
}
////////////////////////////////////////////////////////////////////////////////