aboutsummaryrefslogtreecommitdiffstats
path: root/library/cpp/digest/murmur
diff options
context:
space:
mode:
authorarcadia-devtools <arcadia-devtools@yandex-team.ru>2022-02-17 12:04:09 +0300
committerarcadia-devtools <arcadia-devtools@yandex-team.ru>2022-02-17 12:04:09 +0300
commit2c8e314f8fff8633fe2cf026badfbf6180845ae0 (patch)
treec3b650d13934ec1315e3660d60fd2275f09b03a7 /library/cpp/digest/murmur
parenta49ae9d891c35087b242c854f69880fd9fecbddd (diff)
downloadydb-2c8e314f8fff8633fe2cf026badfbf6180845ae0.tar.gz
intermediate changes
ref:d5f945ecdc1f5af1ad57e12787c6b8ed1a9f0f12
Diffstat (limited to 'library/cpp/digest/murmur')
-rw-r--r--library/cpp/digest/murmur/murmur.cpp1
-rw-r--r--library/cpp/digest/murmur/murmur.h100
-rw-r--r--library/cpp/digest/murmur/murmur_ut.cpp61
-rw-r--r--library/cpp/digest/murmur/ut/ya.make15
-rw-r--r--library/cpp/digest/murmur/ya.make9
5 files changed, 186 insertions, 0 deletions
diff --git a/library/cpp/digest/murmur/murmur.cpp b/library/cpp/digest/murmur/murmur.cpp
new file mode 100644
index 0000000000..7c7c1c0ff7
--- /dev/null
+++ b/library/cpp/digest/murmur/murmur.cpp
@@ -0,0 +1 @@
+#include "murmur.h"
diff --git a/library/cpp/digest/murmur/murmur.h b/library/cpp/digest/murmur/murmur.h
new file mode 100644
index 0000000000..cbf2886412
--- /dev/null
+++ b/library/cpp/digest/murmur/murmur.h
@@ -0,0 +1,100 @@
+#pragma once
+
+#include <util/system/defaults.h>
+#include <util/system/unaligned_mem.h>
+
+/*
+ * https://sites.google.com/site/murmurhash/
+ */
+
+namespace NMurmurPrivate {
+ template <size_t>
+ struct TMurmurHash2ATraits;
+
+ template <>
+ struct TMurmurHash2ATraits<32> {
+ using TValue = ui32;
+ static const TValue Multiplier = 0x5bd1e995;
+ enum { R1 = 24,
+ R2 = 13,
+ R3 = 15 };
+ };
+
+ template <>
+ struct TMurmurHash2ATraits<64> {
+ using TValue = ui64;
+ static const TValue Multiplier = ULL(0xc6a4a7935bd1e995);
+ enum { R1 = 47,
+ R2 = 47,
+ R3 = 47 };
+ };
+}
+
+template <class T>
+class TMurmurHash2A {
+private:
+ using TTraits = typename NMurmurPrivate::TMurmurHash2ATraits<8 * sizeof(T)>;
+ using TValue = typename TTraits::TValue;
+
+public:
+ inline TMurmurHash2A(TValue seed = 0)
+ : Hash(seed)
+ {
+ }
+
+ inline TMurmurHash2A& Update(const void* buf, size_t len) noexcept {
+ Size += len;
+
+ MixTail(buf, len);
+
+ while (len >= sizeof(TValue)) {
+ Hash = Mix(Hash, ReadUnaligned<TValue>(buf));
+ buf = static_cast<const char*>(buf) + sizeof(TValue);
+ len -= sizeof(TValue);
+ }
+
+ MixTail(buf, len);
+
+ return *this;
+ }
+
+ inline TValue Value() const noexcept {
+ TValue hash = Mix(Mix(Hash, Tail), (TValue)Size);
+
+ hash ^= hash >> TTraits::R2;
+ hash *= TTraits::Multiplier;
+ hash ^= hash >> TTraits::R3;
+
+ return hash;
+ }
+
+private:
+ static inline TValue Mix(TValue h, TValue k) noexcept {
+ k *= TTraits::Multiplier;
+ k ^= k >> TTraits::R1;
+ k *= TTraits::Multiplier;
+ h *= TTraits::Multiplier;
+ h ^= k;
+ return h;
+ }
+
+ inline void MixTail(const void*& buf, size_t& len) noexcept {
+ while (len && (len < sizeof(TValue) || Count)) {
+ Tail |= (TValue) * ((const unsigned char*&)buf)++ << (Count++ * 8);
+
+ --len;
+
+ if (Count == sizeof(TValue)) {
+ Hash = Mix(Hash, Tail);
+ Tail = 0;
+ Count = 0;
+ }
+ }
+ }
+
+private:
+ TValue Hash = 0;
+ TValue Tail = 0;
+ size_t Count = 0;
+ size_t Size = 0;
+};
diff --git a/library/cpp/digest/murmur/murmur_ut.cpp b/library/cpp/digest/murmur/murmur_ut.cpp
new file mode 100644
index 0000000000..3447980c8c
--- /dev/null
+++ b/library/cpp/digest/murmur/murmur_ut.cpp
@@ -0,0 +1,61 @@
+#include "murmur.h"
+
+#include <library/cpp/testing/unittest/registar.h>
+
+class TMurmurHashTest: public TTestBase {
+private:
+ UNIT_TEST_SUITE(TMurmurHashTest);
+ UNIT_TEST(TestHash2A32)
+ UNIT_TEST(TestHash2A64)
+ UNIT_TEST(TestUnaligned)
+ UNIT_TEST_SUITE_END();
+
+private:
+ inline void TestHash2A32() {
+ ui8 buf[256];
+
+ for (size_t i = 0; i < 256; ++i) {
+ buf[i] = i;
+ }
+
+ Test2A<ui32>(buf, 256, 0, 97, 178525084UL);
+ Test2A<ui32>(buf, 256, 128, 193, 178525084UL);
+ Test2A<ui32>(buf, 255, 0, 97, 2459858906UL);
+ Test2A<ui32>(buf, 255, 128, 193, 2459858906UL);
+ }
+
+ inline void TestHash2A64() {
+ ui8 buf[256];
+
+ for (size_t i = 0; i < 256; ++i) {
+ buf[i] = i;
+ }
+
+ Test2A<ui64>(buf, 256, 0, 97, ULL(15099340606808450747));
+ Test2A<ui64>(buf, 256, 128, 193, ULL(15099340606808450747));
+ Test2A<ui64>(buf, 255, 0, 97, ULL(8331973280124075880));
+ Test2A<ui64>(buf, 255, 128, 193, ULL(8331973280124075880));
+ }
+
+ inline void TestUnaligned() {
+ ui8 buf[257];
+ for (size_t i = 0; i < 256; ++i) {
+ buf[i + 1] = i;
+ }
+ Test2A<ui64>(buf + 1, 256, 0, 97, ULL(15099340606808450747));
+ Test2A<ui64>(buf + 1, 256, 128, 193, ULL(15099340606808450747));
+ Test2A<ui64>(buf + 1, 255, 0, 97, ULL(8331973280124075880));
+ Test2A<ui64>(buf + 1, 255, 128, 193, ULL(8331973280124075880));
+ }
+
+private:
+ template <class T>
+ static inline void Test2A(const ui8* data, size_t len, size_t split1, size_t split2, T expected) {
+ const T value1 = TMurmurHash2A<T>().Update(data, split1).Update(data + split1, len - split1).Value();
+ const T value2 = TMurmurHash2A<T>().Update(data, split2).Update(data + split2, len - split2).Value();
+ UNIT_ASSERT_VALUES_EQUAL(value1, value2);
+ UNIT_ASSERT_VALUES_EQUAL(value1, expected);
+ }
+};
+
+UNIT_TEST_SUITE_REGISTRATION(TMurmurHashTest);
diff --git a/library/cpp/digest/murmur/ut/ya.make b/library/cpp/digest/murmur/ut/ya.make
new file mode 100644
index 0000000000..3405399779
--- /dev/null
+++ b/library/cpp/digest/murmur/ut/ya.make
@@ -0,0 +1,15 @@
+UNITTEST()
+
+OWNER(elantsev)
+
+PEERDIR(
+ ADDINCL library/cpp/digest/murmur
+)
+
+SRCDIR(library/cpp/digest/murmur)
+
+SRCS(
+ murmur_ut.cpp
+)
+
+END()
diff --git a/library/cpp/digest/murmur/ya.make b/library/cpp/digest/murmur/ya.make
new file mode 100644
index 0000000000..5647cc75e1
--- /dev/null
+++ b/library/cpp/digest/murmur/ya.make
@@ -0,0 +1,9 @@
+LIBRARY()
+
+OWNER(elantsev)
+
+SRCS(
+ murmur.cpp
+)
+
+END()