aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authora11ax <a11ax@yandex-team.com>2024-11-20 02:29:59 +0300
committera11ax <a11ax@yandex-team.com>2024-11-20 02:40:36 +0300
commit1f50694cfb5c440be984de5cae781b8d5948fea6 (patch)
treee333a908be139d081bf2478409567248fbfc58de
parent7963b1e3e8537150d193c39a9d7a008fc82c6fc8 (diff)
downloadydb-1f50694cfb5c440be984de5cae781b8d5948fea6.tar.gz
/util: streaming CityHash
commit_hash:bd4032fbf4c4ece089ad071747194b38df6c5edd
-rw-r--r--util/digest/city.cpp62
-rw-r--r--util/digest/city_streaming.h21
2 files changed, 83 insertions, 0 deletions
diff --git a/util/digest/city.cpp b/util/digest/city.cpp
index c0c7c1a2e9..0976169447 100644
--- a/util/digest/city.cpp
+++ b/util/digest/city.cpp
@@ -28,6 +28,7 @@
// compromising on hash quality.
#include "city.h"
+#include "city_streaming.h"
using uint8 = ui8;
using uint32 = ui32;
@@ -307,3 +308,64 @@ uint128 CityHash128(const char* s, size_t len) noexcept {
return CityHash128WithSeed(s, len, uint128(k0, k1));
}
}
+
+TStreamingCityHash64::TStreamingCityHash64(size_t len, const char *head64, const char *tail64) {
+ Y_ASSERT(len > 64);
+ x = UNALIGNED_LOAD64(head64);
+ y = UNALIGNED_LOAD64(tail64 + 48) ^ k1;
+ z = UNALIGNED_LOAD64(tail64 + 8) ^ k0;
+ v = WeakHashLen32WithSeeds(tail64, len, y);
+ w = WeakHashLen32WithSeeds(tail64 + 32, len * k1, k0);
+ z += ShiftMix(v.second) * k1;
+ x = Rotate(z + x, 39) * k1;
+ y = Rotate(y, 33) * k1;
+ Rest64_ = (len - 1) / 64;
+ UnalignBufSz_ = 0;
+}
+
+void TStreamingCityHash64::Process(const char *s, size_t avail) {
+ if (Y_UNLIKELY(!Rest64_)) return;
+ if (UnalignBufSz_) {
+ if (UnalignBufSz_ + avail < 64) {
+ memcpy(&UnalignBuf_[UnalignBufSz_], s, avail);
+ UnalignBufSz_ += avail;
+ return;
+ } else {
+ memcpy(&UnalignBuf_[UnalignBufSz_], s, 64 - UnalignBufSz_);
+ x = Rotate(x + y + v.first + UNALIGNED_LOAD64(UnalignBuf_ + 16), 37) * k1;
+ y = Rotate(y + v.second + UNALIGNED_LOAD64(UnalignBuf_ + 48), 42) * k1;
+ x ^= w.second;
+ y ^= v.first;
+ z = Rotate(z ^ w.first, 33);
+ v = WeakHashLen32WithSeeds(UnalignBuf_, v.second * k1, x + w.first);
+ w = WeakHashLen32WithSeeds(UnalignBuf_ + 32, z + w.second, y);
+ DoSwap(z, x);
+ s += 64 - UnalignBufSz_;
+ avail -= 64 - UnalignBufSz_;
+ Rest64_--;
+ UnalignBufSz_ = 0;
+ }
+ }
+ while(Rest64_ && avail >= 64) {
+ x = Rotate(x + y + v.first + UNALIGNED_LOAD64(s + 16), 37) * k1;
+ y = Rotate(y + v.second + UNALIGNED_LOAD64(s + 48), 42) * k1;
+ x ^= w.second;
+ y ^= v.first;
+ z = Rotate(z ^ w.first, 33);
+ v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first);
+ w = WeakHashLen32WithSeeds(s + 32, z + w.second, y);
+ DoSwap(z, x);
+ s += 64;
+ avail -= 64;
+ Rest64_--;
+ }
+ if (Rest64_ && avail) {
+ memcpy(UnalignBuf_, s, avail);
+ UnalignBufSz_ = avail;
+ }
+}
+
+uint64 TStreamingCityHash64::operator() () {
+ return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z,
+ HashLen16(v.second, w.second) + x);
+}
diff --git a/util/digest/city_streaming.h b/util/digest/city_streaming.h
new file mode 100644
index 0000000000..f9c5209e0a
--- /dev/null
+++ b/util/digest/city_streaming.h
@@ -0,0 +1,21 @@
+#pragma once
+#include <util/digest/city.h>
+
+/**
+ * (partially) streaming version of CityHash64 for large data.
+ * You need to know length and first/last 64 bytes.
+ * Those bytes should be passed twice: in constructor and thru process().
+ * Length must be STRICTLY larger than 64 bytes.
+ * XXX: Dont use CityHash64 if you can use something else and need streaming
+ */
+class TStreamingCityHash64 {
+ ui64 x, y, z;
+ std::pair<ui64, ui64> v, w;
+ char UnalignBuf_[64];
+ size_t UnalignBufSz_, Rest64_;
+
+public:
+ TStreamingCityHash64(size_t len, const char* head64, const char* tail64);
+ void Process(const char* s, size_t avail);
+ ui64 operator()();
+};