aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorVitaly Stoyan <vvvv@ydb.tech>2024-04-26 17:37:57 +0300
committerGitHub <noreply@github.com>2024-04-26 17:37:57 +0300
commit3221bc68fac443f7857f3c8aed1279bcfe6cfe25 (patch)
tree0d7e7d0244df0a2bf2167ae7886209e98ac2d4d0
parent23529c87ee732870364782b446f902d33e9baac7 (diff)
downloadydb-3221bc68fac443f7857f3c8aed1279bcfe6cfe25.tar.gz
Improved spreading of hash (#4145)
-rw-r--r--ydb/library/yql/minikql/comp_nodes/mkql_rh_hash.h2
-rw-r--r--ydb/library/yql/minikql/comp_nodes/ut/mkql_rh_hash_ut.cpp30
2 files changed, 30 insertions, 2 deletions
diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_rh_hash.h b/ydb/library/yql/minikql/comp_nodes/mkql_rh_hash.h
index ae3e9e4fa50..bc26285e8e2 100644
--- a/ydb/library/yql/minikql/comp_nodes/mkql_rh_hash.h
+++ b/ydb/library/yql/minikql/comp_nodes/mkql_rh_hash.h
@@ -214,7 +214,7 @@ private:
Y_FORCE_INLINE char* MakeIterator(const ui64 hash, char* data, ui64 capacity) {
// https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
- ui64 bucket = ((SelfHash ^ hash) * 11400714819323198485llu) & (capacity - 1);
+ ui64 bucket = (((unsigned __int128)(SelfHash ^ hash) * 11400714819323198485llu) >> 64) & (capacity - 1);
char* ptr = data + AsDeriv().GetCellSize() * bucket;
return ptr;
}
diff --git a/ydb/library/yql/minikql/comp_nodes/ut/mkql_rh_hash_ut.cpp b/ydb/library/yql/minikql/comp_nodes/ut/mkql_rh_hash_ut.cpp
index 91cf6e3c77e..797344a37df 100644
--- a/ydb/library/yql/minikql/comp_nodes/ut/mkql_rh_hash_ut.cpp
+++ b/ydb/library/yql/minikql/comp_nodes/ut/mkql_rh_hash_ut.cpp
@@ -264,7 +264,35 @@ Y_UNIT_TEST_SUITE(TMiniKQLRobinHoodHashTest) {
}
UNIT_ASSERT(h.empty());
- }
+ }
+
+ Y_UNIT_TEST(Power2Collisions) {
+ TRobinHoodHashSet<ui64> rh;
+ for (ui64 i = 0; i < 10000; ++i) {
+ auto k = i << 32;
+ bool isNew;
+ auto iter = rh.Insert(k, isNew);
+ UNIT_ASSERT_VALUES_EQUAL(rh.GetKey(iter), k);
+ if (isNew) {
+ rh.CheckGrow();
+ }
+
+ UNIT_ASSERT_VALUES_EQUAL(i + 1, rh.GetSize());
+ }
+
+ i32 maxDistance = 0;
+ for (auto it = rh.Begin(); it != rh.End(); rh.Advance(it)) {
+ if (!rh.IsValid(it)) {
+ continue;
+ }
+
+ auto distance = rh.GetPSL(it).Distance;
+ maxDistance = Max(maxDistance, distance);
+ }
+
+ Cerr << "maxDistance: " << maxDistance << "\n";
+ UNIT_ASSERT(maxDistance < 10);
+ }
}
}