aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorvvvv <vvvv@ydb.tech>2023-08-17 17:37:55 +0300
committervvvv <vvvv@ydb.tech>2023-08-17 18:38:57 +0300
commitb1143dd2f477c0c1cb2b44317c7dbaaffa587776 (patch)
treebc96f74cc8613e5355cbb8aefea66ff6821cc414
parent609fcbed594087e35e347f1d03918d12436be2e6 (diff)
downloadydb-b1143dd2f477c0c1cb2b44317c7dbaaffa587776.tar.gz
Use power of 2 hash size + fibonacci hashing
измерения на q32 (remote) trunk 17.06813s pr 16.27866s
-rw-r--r--ydb/library/yql/minikql/comp_nodes/mkql_rh_hash.h10
1 files changed, 5 insertions, 5 deletions
diff --git a/ydb/library/yql/minikql/comp_nodes/mkql_rh_hash.h b/ydb/library/yql/minikql/comp_nodes/mkql_rh_hash.h
index 2cc74026a5..23699b1c53 100644
--- a/ydb/library/yql/minikql/comp_nodes/mkql_rh_hash.h
+++ b/ydb/library/yql/minikql/comp_nodes/mkql_rh_hash.h
@@ -7,8 +7,6 @@
#include <util/digest/city.h>
#include <util/generic/scope.h>
-#include <ydb/library/yql/minikql/primes.h>
-
namespace NKikimr {
namespace NMiniKQL {
@@ -56,10 +54,11 @@ protected:
explicit TRobinHoodHashBase(const ui64 initialCapacity, THash hash, TEqual equal)
: HashLocal(std::move(hash))
, EqualLocal(std::move(equal))
- , Capacity(FindNearestPrime(initialCapacity))
+ , Capacity(initialCapacity)
, Allocator()
, SelfHash(GetSelfHash(this))
{
+ Y_ENSURE((Capacity & (Capacity - 1)) == 0);
}
~TRobinHoodHashBase() {
@@ -165,7 +164,8 @@ private:
Y_FORCE_INLINE char* InsertImpl(TKey key, const ui64 hash, bool& isNew, ui64 capacity, char* data, char* dataEnd) {
isNew = false;
TPSLStorage psl(hash);
- ui64 bucket = (SelfHash ^ hash) % capacity;
+ // https://probablydance.com/2018/06/16/fibonacci-hashing-the-optimization-that-the-world-forgot-or-a-better-alternative-to-integer-modulo/
+ ui64 bucket = ((SelfHash ^ hash) * 11400714819323198485llu) & (capacity - 1);
char* ptr = data + AsDeriv().GetCellSize() * bucket;
char* returnPtr;
typename TDeriv::TPayloadStore tmpPayload;
@@ -235,7 +235,7 @@ private:
} else {
growFactor = 2;
}
- auto newCapacity = FindNearestPrime(Capacity * growFactor);
+ auto newCapacity = Capacity * growFactor;
char *newData, *newDataEnd;
Allocate(newCapacity, newData, newDataEnd);
Y_DEFER {