diff options
author | a11ax <a11ax@yandex-team.com> | 2024-11-20 02:29:59 +0300 |
---|---|---|
committer | a11ax <a11ax@yandex-team.com> | 2024-11-20 02:40:36 +0300 |
commit | 1f50694cfb5c440be984de5cae781b8d5948fea6 (patch) | |
tree | e333a908be139d081bf2478409567248fbfc58de | |
parent | 7963b1e3e8537150d193c39a9d7a008fc82c6fc8 (diff) | |
download | ydb-1f50694cfb5c440be984de5cae781b8d5948fea6.tar.gz |
/util: streaming CityHash
commit_hash:bd4032fbf4c4ece089ad071747194b38df6c5edd
-rw-r--r-- | util/digest/city.cpp | 62 | ||||
-rw-r--r-- | util/digest/city_streaming.h | 21 |
2 files changed, 83 insertions, 0 deletions
diff --git a/util/digest/city.cpp b/util/digest/city.cpp index c0c7c1a2e9..0976169447 100644 --- a/util/digest/city.cpp +++ b/util/digest/city.cpp @@ -28,6 +28,7 @@ // compromising on hash quality. #include "city.h" +#include "city_streaming.h" using uint8 = ui8; using uint32 = ui32; @@ -307,3 +308,64 @@ uint128 CityHash128(const char* s, size_t len) noexcept { return CityHash128WithSeed(s, len, uint128(k0, k1)); } } + +TStreamingCityHash64::TStreamingCityHash64(size_t len, const char *head64, const char *tail64) { + Y_ASSERT(len > 64); + x = UNALIGNED_LOAD64(head64); + y = UNALIGNED_LOAD64(tail64 + 48) ^ k1; + z = UNALIGNED_LOAD64(tail64 + 8) ^ k0; + v = WeakHashLen32WithSeeds(tail64, len, y); + w = WeakHashLen32WithSeeds(tail64 + 32, len * k1, k0); + z += ShiftMix(v.second) * k1; + x = Rotate(z + x, 39) * k1; + y = Rotate(y, 33) * k1; + Rest64_ = (len - 1) / 64; + UnalignBufSz_ = 0; +} + +void TStreamingCityHash64::Process(const char *s, size_t avail) { + if (Y_UNLIKELY(!Rest64_)) return; + if (UnalignBufSz_) { + if (UnalignBufSz_ + avail < 64) { + memcpy(&UnalignBuf_[UnalignBufSz_], s, avail); + UnalignBufSz_ += avail; + return; + } else { + memcpy(&UnalignBuf_[UnalignBufSz_], s, 64 - UnalignBufSz_); + x = Rotate(x + y + v.first + UNALIGNED_LOAD64(UnalignBuf_ + 16), 37) * k1; + y = Rotate(y + v.second + UNALIGNED_LOAD64(UnalignBuf_ + 48), 42) * k1; + x ^= w.second; + y ^= v.first; + z = Rotate(z ^ w.first, 33); + v = WeakHashLen32WithSeeds(UnalignBuf_, v.second * k1, x + w.first); + w = WeakHashLen32WithSeeds(UnalignBuf_ + 32, z + w.second, y); + DoSwap(z, x); + s += 64 - UnalignBufSz_; + avail -= 64 - UnalignBufSz_; + Rest64_--; + UnalignBufSz_ = 0; + } + } + while(Rest64_ && avail >= 64) { + x = Rotate(x + y + v.first + UNALIGNED_LOAD64(s + 16), 37) * k1; + y = Rotate(y + v.second + UNALIGNED_LOAD64(s + 48), 42) * k1; + x ^= w.second; + y ^= v.first; + z = Rotate(z ^ w.first, 33); + v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); + w = WeakHashLen32WithSeeds(s + 32, z + w.second, y); + DoSwap(z, x); + s += 64; + avail -= 64; + Rest64_--; + } + if (Rest64_ && avail) { + memcpy(UnalignBuf_, s, avail); + UnalignBufSz_ = avail; + } +} + +uint64 TStreamingCityHash64::operator() () { + return HashLen16(HashLen16(v.first, w.first) + ShiftMix(y) * k1 + z, + HashLen16(v.second, w.second) + x); +} diff --git a/util/digest/city_streaming.h b/util/digest/city_streaming.h new file mode 100644 index 0000000000..f9c5209e0a --- /dev/null +++ b/util/digest/city_streaming.h @@ -0,0 +1,21 @@ +#pragma once +#include <util/digest/city.h> + +/** + * (partially) streaming version of CityHash64 for large data. + * You need to know length and first/last 64 bytes. + * Those bytes should be passed twice: in constructor and thru process(). + * Length must be STRICTLY larger than 64 bytes. + * XXX: Dont use CityHash64 if you can use something else and need streaming + */ +class TStreamingCityHash64 { + ui64 x, y, z; + std::pair<ui64, ui64> v, w; + char UnalignBuf_[64]; + size_t UnalignBufSz_, Rest64_; + +public: + TStreamingCityHash64(size_t len, const char* head64, const char* tail64); + void Process(const char* s, size_t avail); + ui64 operator()(); +}; |