diff options
author | arcadia-devtools <arcadia-devtools@yandex-team.ru> | 2022-02-17 12:04:09 +0300 |
---|---|---|
committer | arcadia-devtools <arcadia-devtools@yandex-team.ru> | 2022-02-17 12:04:09 +0300 |
commit | 2c8e314f8fff8633fe2cf026badfbf6180845ae0 (patch) | |
tree | c3b650d13934ec1315e3660d60fd2275f09b03a7 /library/cpp/digest/murmur | |
parent | a49ae9d891c35087b242c854f69880fd9fecbddd (diff) | |
download | ydb-2c8e314f8fff8633fe2cf026badfbf6180845ae0.tar.gz |
intermediate changes
ref:d5f945ecdc1f5af1ad57e12787c6b8ed1a9f0f12
Diffstat (limited to 'library/cpp/digest/murmur')
-rw-r--r-- | library/cpp/digest/murmur/murmur.cpp | 1 | ||||
-rw-r--r-- | library/cpp/digest/murmur/murmur.h | 100 | ||||
-rw-r--r-- | library/cpp/digest/murmur/murmur_ut.cpp | 61 | ||||
-rw-r--r-- | library/cpp/digest/murmur/ut/ya.make | 15 | ||||
-rw-r--r-- | library/cpp/digest/murmur/ya.make | 9 |
5 files changed, 186 insertions, 0 deletions
diff --git a/library/cpp/digest/murmur/murmur.cpp b/library/cpp/digest/murmur/murmur.cpp new file mode 100644 index 0000000000..7c7c1c0ff7 --- /dev/null +++ b/library/cpp/digest/murmur/murmur.cpp @@ -0,0 +1 @@ +#include "murmur.h" diff --git a/library/cpp/digest/murmur/murmur.h b/library/cpp/digest/murmur/murmur.h new file mode 100644 index 0000000000..cbf2886412 --- /dev/null +++ b/library/cpp/digest/murmur/murmur.h @@ -0,0 +1,100 @@ +#pragma once + +#include <util/system/defaults.h> +#include <util/system/unaligned_mem.h> + +/* + * https://sites.google.com/site/murmurhash/ + */ + +namespace NMurmurPrivate { + template <size_t> + struct TMurmurHash2ATraits; + + template <> + struct TMurmurHash2ATraits<32> { + using TValue = ui32; + static const TValue Multiplier = 0x5bd1e995; + enum { R1 = 24, + R2 = 13, + R3 = 15 }; + }; + + template <> + struct TMurmurHash2ATraits<64> { + using TValue = ui64; + static const TValue Multiplier = ULL(0xc6a4a7935bd1e995); + enum { R1 = 47, + R2 = 47, + R3 = 47 }; + }; +} + +template <class T> +class TMurmurHash2A { +private: + using TTraits = typename NMurmurPrivate::TMurmurHash2ATraits<8 * sizeof(T)>; + using TValue = typename TTraits::TValue; + +public: + inline TMurmurHash2A(TValue seed = 0) + : Hash(seed) + { + } + + inline TMurmurHash2A& Update(const void* buf, size_t len) noexcept { + Size += len; + + MixTail(buf, len); + + while (len >= sizeof(TValue)) { + Hash = Mix(Hash, ReadUnaligned<TValue>(buf)); + buf = static_cast<const char*>(buf) + sizeof(TValue); + len -= sizeof(TValue); + } + + MixTail(buf, len); + + return *this; + } + + inline TValue Value() const noexcept { + TValue hash = Mix(Mix(Hash, Tail), (TValue)Size); + + hash ^= hash >> TTraits::R2; + hash *= TTraits::Multiplier; + hash ^= hash >> TTraits::R3; + + return hash; + } + +private: + static inline TValue Mix(TValue h, TValue k) noexcept { + k *= TTraits::Multiplier; + k ^= k >> TTraits::R1; + k *= TTraits::Multiplier; + h *= TTraits::Multiplier; + h ^= k; + return h; + } + + inline void MixTail(const void*& buf, size_t& len) noexcept { + while (len && (len < sizeof(TValue) || Count)) { + Tail |= (TValue) * ((const unsigned char*&)buf)++ << (Count++ * 8); + + --len; + + if (Count == sizeof(TValue)) { + Hash = Mix(Hash, Tail); + Tail = 0; + Count = 0; + } + } + } + +private: + TValue Hash = 0; + TValue Tail = 0; + size_t Count = 0; + size_t Size = 0; +}; diff --git a/library/cpp/digest/murmur/murmur_ut.cpp b/library/cpp/digest/murmur/murmur_ut.cpp new file mode 100644 index 0000000000..3447980c8c --- /dev/null +++ b/library/cpp/digest/murmur/murmur_ut.cpp @@ -0,0 +1,61 @@ +#include "murmur.h" + +#include <library/cpp/testing/unittest/registar.h> + +class TMurmurHashTest: public TTestBase { +private: + UNIT_TEST_SUITE(TMurmurHashTest); + UNIT_TEST(TestHash2A32) + UNIT_TEST(TestHash2A64) + UNIT_TEST(TestUnaligned) + UNIT_TEST_SUITE_END(); + +private: + inline void TestHash2A32() { + ui8 buf[256]; + + for (size_t i = 0; i < 256; ++i) { + buf[i] = i; + } + + Test2A<ui32>(buf, 256, 0, 97, 178525084UL); + Test2A<ui32>(buf, 256, 128, 193, 178525084UL); + Test2A<ui32>(buf, 255, 0, 97, 2459858906UL); + Test2A<ui32>(buf, 255, 128, 193, 2459858906UL); + } + + inline void TestHash2A64() { + ui8 buf[256]; + + for (size_t i = 0; i < 256; ++i) { + buf[i] = i; + } + + Test2A<ui64>(buf, 256, 0, 97, ULL(15099340606808450747)); + Test2A<ui64>(buf, 256, 128, 193, ULL(15099340606808450747)); + Test2A<ui64>(buf, 255, 0, 97, ULL(8331973280124075880)); + Test2A<ui64>(buf, 255, 128, 193, ULL(8331973280124075880)); + } + + inline void TestUnaligned() { + ui8 buf[257]; + for (size_t i = 0; i < 256; ++i) { + buf[i + 1] = i; + } + Test2A<ui64>(buf + 1, 256, 0, 97, ULL(15099340606808450747)); + Test2A<ui64>(buf + 1, 256, 128, 193, ULL(15099340606808450747)); + Test2A<ui64>(buf + 1, 255, 0, 97, ULL(8331973280124075880)); + Test2A<ui64>(buf + 1, 255, 128, 193, ULL(8331973280124075880)); + } + +private: + template <class T> + static inline void Test2A(const ui8* data, size_t len, size_t split1, size_t split2, T expected) { + const T value1 = TMurmurHash2A<T>().Update(data, split1).Update(data + split1, len - split1).Value(); + const T value2 = TMurmurHash2A<T>().Update(data, split2).Update(data + split2, len - split2).Value(); + UNIT_ASSERT_VALUES_EQUAL(value1, value2); + UNIT_ASSERT_VALUES_EQUAL(value1, expected); + } +}; + +UNIT_TEST_SUITE_REGISTRATION(TMurmurHashTest); diff --git a/library/cpp/digest/murmur/ut/ya.make b/library/cpp/digest/murmur/ut/ya.make new file mode 100644 index 0000000000..3405399779 --- /dev/null +++ b/library/cpp/digest/murmur/ut/ya.make @@ -0,0 +1,15 @@ +UNITTEST() + +OWNER(elantsev) + +PEERDIR( + ADDINCL library/cpp/digest/murmur +) + +SRCDIR(library/cpp/digest/murmur) + +SRCS( + murmur_ut.cpp +) + +END() diff --git a/library/cpp/digest/murmur/ya.make b/library/cpp/digest/murmur/ya.make new file mode 100644 index 0000000000..5647cc75e1 --- /dev/null +++ b/library/cpp/digest/murmur/ya.make @@ -0,0 +1,9 @@ +LIBRARY() + +OWNER(elantsev) + +SRCS( + murmur.cpp +) + +END() |