diff options
author | somov <somov@yandex-team.ru> | 2022-02-10 16:45:49 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:49 +0300 |
commit | 7489e4682331202b9c7d863c0898eb83d7b12c2b (patch) | |
tree | 9142afc54d335ea52910662635b898e79e192e49 /contrib/libs/farmhash/farmhashna.cc | |
parent | a5950576e397b1909261050b8c7da16db58f10b1 (diff) | |
download | ydb-7489e4682331202b9c7d863c0898eb83d7b12c2b.tar.gz |
Restoring authorship annotation for <somov@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/farmhash/farmhashna.cc')
-rw-r--r-- | contrib/libs/farmhash/farmhashna.cc | 346 |
1 files changed, 173 insertions, 173 deletions
diff --git a/contrib/libs/farmhash/farmhashna.cc b/contrib/libs/farmhash/farmhashna.cc index a2941aa535..76ef2a3168 100644 --- a/contrib/libs/farmhash/farmhashna.cc +++ b/contrib/libs/farmhash/farmhashna.cc @@ -1,173 +1,173 @@ -#include "common.h" - -namespace farmhashna { -#undef Fetch -#define Fetch Fetch64 - -#undef Rotate -#define Rotate Rotate64 - -#undef Bswap -#define Bswap Bswap64 - -STATIC_INLINE uint64_t ShiftMix(uint64_t val) { - return val ^ (val >> 47); -} - -STATIC_INLINE uint64_t HashLen16(uint64_t u, uint64_t v) { - return Hash128to64(Uint128(u, v)); -} - -STATIC_INLINE uint64_t HashLen16(uint64_t u, uint64_t v, uint64_t mul) { - // Murmur-inspired hashing. - uint64_t a = (u ^ v) * mul; - a ^= (a >> 47); - uint64_t b = (v ^ a) * mul; - b ^= (b >> 47); - b *= mul; - return b; -} - -STATIC_INLINE uint64_t HashLen0to16(const char *s, size_t len) { - if (len >= 8) { - uint64_t mul = k2 + len * 2; - uint64_t a = Fetch(s) + k2; - uint64_t b = Fetch(s + len - 8); - uint64_t c = Rotate(b, 37) * mul + a; - uint64_t d = (Rotate(a, 25) + b) * mul; - return HashLen16(c, d, mul); - } - if (len >= 4) { - uint64_t mul = k2 + len * 2; - uint64_t a = Fetch32(s); - return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul); - } - if (len > 0) { - uint8_t a = s[0]; - uint8_t b = s[len >> 1]; - uint8_t c = s[len - 1]; - uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8); - uint32_t z = len + (static_cast<uint32_t>(c) << 2); - return ShiftMix(y * k2 ^ z * k0) * k2; - } - return k2; -} - -// This probably works well for 16-byte strings as well, but it may be overkill -// in that case. -STATIC_INLINE uint64_t HashLen17to32(const char *s, size_t len) { - uint64_t mul = k2 + len * 2; - uint64_t a = Fetch(s) * k1; - uint64_t b = Fetch(s + 8); - uint64_t c = Fetch(s + len - 8) * mul; - uint64_t d = Fetch(s + len - 16) * k2; - return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d, - a + Rotate(b + k2, 18) + c, mul); -} - -// Return a 16-byte hash for 48 bytes. Quick and dirty. -// Callers do best to use "random-looking" values for a and b. -STATIC_INLINE pair<uint64_t, uint64_t> WeakHashLen32WithSeeds( - uint64_t w, uint64_t x, uint64_t y, uint64_t z, uint64_t a, uint64_t b) { - a += w; - b = Rotate(b + a + z, 21); - uint64_t c = a; - a += x; - a += y; - b += Rotate(a, 44); - return make_pair(a + z, b + c); -} - -// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty. -STATIC_INLINE pair<uint64_t, uint64_t> WeakHashLen32WithSeeds( - const char* s, uint64_t a, uint64_t b) { - return WeakHashLen32WithSeeds(Fetch(s), - Fetch(s + 8), - Fetch(s + 16), - Fetch(s + 24), - a, - b); -} - -// Return an 8-byte hash for 33 to 64 bytes. -STATIC_INLINE uint64_t HashLen33to64(const char *s, size_t len) { - uint64_t mul = k2 + len * 2; - uint64_t a = Fetch(s) * k2; - uint64_t b = Fetch(s + 8); - uint64_t c = Fetch(s + len - 8) * mul; - uint64_t d = Fetch(s + len - 16) * k2; - uint64_t y = Rotate(a + b, 43) + Rotate(c, 30) + d; - uint64_t z = HashLen16(y, a + Rotate(b + k2, 18) + c, mul); - uint64_t e = Fetch(s + 16) * mul; - uint64_t f = Fetch(s + 24); - uint64_t g = (y + Fetch(s + len - 32)) * mul; - uint64_t h = (z + Fetch(s + len - 24)) * mul; - return HashLen16(Rotate(e + f, 43) + Rotate(g, 30) + h, - e + Rotate(f + a, 18) + g, mul); -} - -uint64_t Hash64(const char *s, size_t len) { - const uint64_t seed = 81; - if (len <= 32) { - if (len <= 16) { - return HashLen0to16(s, len); - } else { - return HashLen17to32(s, len); - } - } else if (len <= 64) { - return HashLen33to64(s, len); - } - - // For strings over 64 bytes we loop. Internal state consists of - // 56 bytes: v, w, x, y, and z. - uint64_t x = seed; - uint64_t y = seed * k1 + 113; - uint64_t z = ShiftMix(y * k2 + 113) * k2; - pair<uint64_t, uint64_t> v = make_pair(0, 0); - pair<uint64_t, uint64_t> w = make_pair(0, 0); - x = x * k2 + Fetch(s); - - // Set end so that after the loop we have 1 to 64 bytes left to process. - const char* end = s + ((len - 1) / 64) * 64; - const char* last64 = end + ((len - 1) & 63) - 63; - assert(s + len - 64 == last64); - do { - x = Rotate(x + y + v.first + Fetch(s + 8), 37) * k1; - y = Rotate(y + v.second + Fetch(s + 48), 42) * k1; - x ^= w.second; - y += v.first + Fetch(s + 40); - z = Rotate(z + w.first, 33) * k1; - v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); - w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch(s + 16)); - std::swap(z, x); - s += 64; - } while (s != end); - uint64_t mul = k1 + ((z & 0xff) << 1); - // Make s point to the last 64 bytes of input. - s = last64; - w.first += ((len - 1) & 63); - v.first += w.first; - w.first += v.first; - x = Rotate(x + y + v.first + Fetch(s + 8), 37) * mul; - y = Rotate(y + v.second + Fetch(s + 48), 42) * mul; - x ^= w.second * 9; - y += v.first * 9 + Fetch(s + 40); - z = Rotate(z + w.first, 33) * mul; - v = WeakHashLen32WithSeeds(s, v.second * mul, x + w.first); - w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch(s + 16)); - std::swap(z, x); - return HashLen16(HashLen16(v.first, w.first, mul) + ShiftMix(y) * k0 + z, - HashLen16(v.second, w.second, mul) + x, - mul); -} - -uint64_t Hash64WithSeeds(const char *s, size_t len, uint64_t seed0, uint64_t seed1); - -uint64_t Hash64WithSeed(const char *s, size_t len, uint64_t seed) { - return Hash64WithSeeds(s, len, k2, seed); -} - -uint64_t Hash64WithSeeds(const char *s, size_t len, uint64_t seed0, uint64_t seed1) { - return HashLen16(Hash64(s, len) - seed0, seed1); -} -} // namespace farmhashna +#include "common.h" + +namespace farmhashna { +#undef Fetch +#define Fetch Fetch64 + +#undef Rotate +#define Rotate Rotate64 + +#undef Bswap +#define Bswap Bswap64 + +STATIC_INLINE uint64_t ShiftMix(uint64_t val) { + return val ^ (val >> 47); +} + +STATIC_INLINE uint64_t HashLen16(uint64_t u, uint64_t v) { + return Hash128to64(Uint128(u, v)); +} + +STATIC_INLINE uint64_t HashLen16(uint64_t u, uint64_t v, uint64_t mul) { + // Murmur-inspired hashing. + uint64_t a = (u ^ v) * mul; + a ^= (a >> 47); + uint64_t b = (v ^ a) * mul; + b ^= (b >> 47); + b *= mul; + return b; +} + +STATIC_INLINE uint64_t HashLen0to16(const char *s, size_t len) { + if (len >= 8) { + uint64_t mul = k2 + len * 2; + uint64_t a = Fetch(s) + k2; + uint64_t b = Fetch(s + len - 8); + uint64_t c = Rotate(b, 37) * mul + a; + uint64_t d = (Rotate(a, 25) + b) * mul; + return HashLen16(c, d, mul); + } + if (len >= 4) { + uint64_t mul = k2 + len * 2; + uint64_t a = Fetch32(s); + return HashLen16(len + (a << 3), Fetch32(s + len - 4), mul); + } + if (len > 0) { + uint8_t a = s[0]; + uint8_t b = s[len >> 1]; + uint8_t c = s[len - 1]; + uint32_t y = static_cast<uint32_t>(a) + (static_cast<uint32_t>(b) << 8); + uint32_t z = len + (static_cast<uint32_t>(c) << 2); + return ShiftMix(y * k2 ^ z * k0) * k2; + } + return k2; +} + +// This probably works well for 16-byte strings as well, but it may be overkill +// in that case. +STATIC_INLINE uint64_t HashLen17to32(const char *s, size_t len) { + uint64_t mul = k2 + len * 2; + uint64_t a = Fetch(s) * k1; + uint64_t b = Fetch(s + 8); + uint64_t c = Fetch(s + len - 8) * mul; + uint64_t d = Fetch(s + len - 16) * k2; + return HashLen16(Rotate(a + b, 43) + Rotate(c, 30) + d, + a + Rotate(b + k2, 18) + c, mul); +} + +// Return a 16-byte hash for 48 bytes. Quick and dirty. +// Callers do best to use "random-looking" values for a and b. +STATIC_INLINE pair<uint64_t, uint64_t> WeakHashLen32WithSeeds( + uint64_t w, uint64_t x, uint64_t y, uint64_t z, uint64_t a, uint64_t b) { + a += w; + b = Rotate(b + a + z, 21); + uint64_t c = a; + a += x; + a += y; + b += Rotate(a, 44); + return make_pair(a + z, b + c); +} + +// Return a 16-byte hash for s[0] ... s[31], a, and b. Quick and dirty. +STATIC_INLINE pair<uint64_t, uint64_t> WeakHashLen32WithSeeds( + const char* s, uint64_t a, uint64_t b) { + return WeakHashLen32WithSeeds(Fetch(s), + Fetch(s + 8), + Fetch(s + 16), + Fetch(s + 24), + a, + b); +} + +// Return an 8-byte hash for 33 to 64 bytes. +STATIC_INLINE uint64_t HashLen33to64(const char *s, size_t len) { + uint64_t mul = k2 + len * 2; + uint64_t a = Fetch(s) * k2; + uint64_t b = Fetch(s + 8); + uint64_t c = Fetch(s + len - 8) * mul; + uint64_t d = Fetch(s + len - 16) * k2; + uint64_t y = Rotate(a + b, 43) + Rotate(c, 30) + d; + uint64_t z = HashLen16(y, a + Rotate(b + k2, 18) + c, mul); + uint64_t e = Fetch(s + 16) * mul; + uint64_t f = Fetch(s + 24); + uint64_t g = (y + Fetch(s + len - 32)) * mul; + uint64_t h = (z + Fetch(s + len - 24)) * mul; + return HashLen16(Rotate(e + f, 43) + Rotate(g, 30) + h, + e + Rotate(f + a, 18) + g, mul); +} + +uint64_t Hash64(const char *s, size_t len) { + const uint64_t seed = 81; + if (len <= 32) { + if (len <= 16) { + return HashLen0to16(s, len); + } else { + return HashLen17to32(s, len); + } + } else if (len <= 64) { + return HashLen33to64(s, len); + } + + // For strings over 64 bytes we loop. Internal state consists of + // 56 bytes: v, w, x, y, and z. + uint64_t x = seed; + uint64_t y = seed * k1 + 113; + uint64_t z = ShiftMix(y * k2 + 113) * k2; + pair<uint64_t, uint64_t> v = make_pair(0, 0); + pair<uint64_t, uint64_t> w = make_pair(0, 0); + x = x * k2 + Fetch(s); + + // Set end so that after the loop we have 1 to 64 bytes left to process. + const char* end = s + ((len - 1) / 64) * 64; + const char* last64 = end + ((len - 1) & 63) - 63; + assert(s + len - 64 == last64); + do { + x = Rotate(x + y + v.first + Fetch(s + 8), 37) * k1; + y = Rotate(y + v.second + Fetch(s + 48), 42) * k1; + x ^= w.second; + y += v.first + Fetch(s + 40); + z = Rotate(z + w.first, 33) * k1; + v = WeakHashLen32WithSeeds(s, v.second * k1, x + w.first); + w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch(s + 16)); + std::swap(z, x); + s += 64; + } while (s != end); + uint64_t mul = k1 + ((z & 0xff) << 1); + // Make s point to the last 64 bytes of input. + s = last64; + w.first += ((len - 1) & 63); + v.first += w.first; + w.first += v.first; + x = Rotate(x + y + v.first + Fetch(s + 8), 37) * mul; + y = Rotate(y + v.second + Fetch(s + 48), 42) * mul; + x ^= w.second * 9; + y += v.first * 9 + Fetch(s + 40); + z = Rotate(z + w.first, 33) * mul; + v = WeakHashLen32WithSeeds(s, v.second * mul, x + w.first); + w = WeakHashLen32WithSeeds(s + 32, z + w.second, y + Fetch(s + 16)); + std::swap(z, x); + return HashLen16(HashLen16(v.first, w.first, mul) + ShiftMix(y) * k0 + z, + HashLen16(v.second, w.second, mul) + x, + mul); +} + +uint64_t Hash64WithSeeds(const char *s, size_t len, uint64_t seed0, uint64_t seed1); + +uint64_t Hash64WithSeed(const char *s, size_t len, uint64_t seed) { + return Hash64WithSeeds(s, len, k2, seed); +} + +uint64_t Hash64WithSeeds(const char *s, size_t len, uint64_t seed0, uint64_t seed1) { + return HashLen16(Hash64(s, len) - seed0, seed1); +} +} // namespace farmhashna |