diff options
author | somov <somov@yandex-team.ru> | 2022-02-10 16:45:49 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:49 +0300 |
commit | 7489e4682331202b9c7d863c0898eb83d7b12c2b (patch) | |
tree | 9142afc54d335ea52910662635b898e79e192e49 /contrib/libs/farmhash/farmhashsu.cc | |
parent | a5950576e397b1909261050b8c7da16db58f10b1 (diff) | |
download | ydb-7489e4682331202b9c7d863c0898eb83d7b12c2b.tar.gz |
Restoring authorship annotation for <somov@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/farmhash/farmhashsu.cc')
-rw-r--r-- | contrib/libs/farmhash/farmhashsu.cc | 452 |
1 files changed, 226 insertions, 226 deletions
diff --git a/contrib/libs/farmhash/farmhashsu.cc b/contrib/libs/farmhash/farmhashsu.cc index 49b48d5f3c..319ac35912 100644 --- a/contrib/libs/farmhash/farmhashsu.cc +++ b/contrib/libs/farmhash/farmhashsu.cc @@ -1,226 +1,226 @@ -#include "common.h" - -namespace { - #include "farmhashmk.cc" -} - -namespace farmhashsu { -#if !can_use_sse42 || !can_use_aesni - -uint32_t Hash32(const char *s, size_t len) { - FARMHASH_DIE_IF_MISCONFIGURED; - return s == NULL ? 0 : len; -} - -uint32_t Hash32WithSeed(const char *s, size_t len, uint32_t seed) { - FARMHASH_DIE_IF_MISCONFIGURED; - return seed + Hash32(s, len); -} - -#else - -#undef Fetch -#define Fetch Fetch32 - -#undef Rotate -#define Rotate Rotate32 - -#undef Bswap -#define Bswap Bswap32 - -// Helpers for data-parallel operations (4x 32-bits). -STATIC_INLINE __m128i Add(__m128i x, __m128i y) { return _mm_add_epi32(x, y); } -STATIC_INLINE __m128i Xor(__m128i x, __m128i y) { return _mm_xor_si128(x, y); } -STATIC_INLINE __m128i Or(__m128i x, __m128i y) { return _mm_or_si128(x, y); } -STATIC_INLINE __m128i Mul(__m128i x, __m128i y) { return _mm_mullo_epi32(x, y); } -STATIC_INLINE __m128i Mul5(__m128i x) { return Add(x, _mm_slli_epi32(x, 2)); } -STATIC_INLINE __m128i RotateLeft(__m128i x, int c) { - return Or(_mm_slli_epi32(x, c), - _mm_srli_epi32(x, 32 - c)); -} -STATIC_INLINE __m128i Rol17(__m128i x) { return RotateLeft(x, 17); } -STATIC_INLINE __m128i Rol19(__m128i x) { return RotateLeft(x, 19); } -STATIC_INLINE __m128i Shuffle0321(__m128i x) { - return _mm_shuffle_epi32(x, (0 << 6) + (3 << 4) + (2 << 2) + (1 << 0)); -} - -uint32_t Hash32(const char *s, size_t len) { - const uint32_t seed = 81; - if (len <= 24) { - return len <= 12 ? - (len <= 4 ? - farmhashmk::Hash32Len0to4(s, len) : - farmhashmk::Hash32Len5to12(s, len)) : - farmhashmk::Hash32Len13to24(s, len); - } - - if (len < 40) { - uint32_t a = len, b = seed * c2, c = a + b; - a += Fetch(s + len - 4); - b += Fetch(s + len - 20); - c += Fetch(s + len - 16); - uint32_t d = a; - a = NAMESPACE_FOR_HASH_FUNCTIONS::Rotate32(a, 21); - a = Mur(a, Mur(b, _mm_crc32_u32(c, d))); - a += Fetch(s + len - 12); - b += Fetch(s + len - 8); - d += a; - a += d; - b = Mur(b, d) * c2; - a = _mm_crc32_u32(a, b + c); - return farmhashmk::Hash32Len13to24(s, (len + 1) / 2, a) + b; - } - -#undef Mulc1 -#define Mulc1(x) Mul((x), cc1) - -#undef Mulc2 -#define Mulc2(x) Mul((x), cc2) - -#undef Murk -#define Murk(a, h) \ - Add(k, \ - Mul5( \ - Rol19( \ - Xor( \ - Mulc2( \ - Rol17( \ - Mulc1(a))), \ - (h))))) - - const __m128i cc1 = _mm_set1_epi32(c1); - const __m128i cc2 = _mm_set1_epi32(c2); - __m128i h = _mm_set1_epi32(seed); - __m128i g = _mm_set1_epi32(c1 * seed); - __m128i f = g; - __m128i k = _mm_set1_epi32(0xe6546b64); - __m128i q; - if (len < 80) { - __m128i a = Fetch128(s); - __m128i b = Fetch128(s + 16); - __m128i c = Fetch128(s + (len - 15) / 2); - __m128i d = Fetch128(s + len - 32); - __m128i e = Fetch128(s + len - 16); - h = Add(h, a); - g = Add(g, b); - q = g; - g = Shuffle0321(g); - f = Add(f, c); - __m128i be = Add(b, Mulc1(e)); - h = Add(h, f); - f = Add(f, h); - h = Add(Murk(d, h), e); - k = Xor(k, _mm_shuffle_epi8(g, f)); - g = Add(Xor(c, g), a); - f = Add(Xor(be, f), d); - k = Add(k, be); - k = Add(k, _mm_shuffle_epi8(f, h)); - f = Add(f, g); - g = Add(g, f); - g = Add(_mm_set1_epi32(len), Mulc1(g)); - } else { - // len >= 80 - // The following is loosely modelled after farmhashmk::Hash32. - size_t iters = (len - 1) / 80; - len -= iters * 80; - -#undef Chunk -#define Chunk() do { \ - __m128i a = Fetch128(s); \ - __m128i b = Fetch128(s + 16); \ - __m128i c = Fetch128(s + 32); \ - __m128i d = Fetch128(s + 48); \ - __m128i e = Fetch128(s + 64); \ - h = Add(h, a); \ - g = Add(g, b); \ - g = Shuffle0321(g); \ - f = Add(f, c); \ - __m128i be = Add(b, Mulc1(e)); \ - h = Add(h, f); \ - f = Add(f, h); \ - h = Add(h, d); \ - q = Add(q, e); \ - h = Rol17(h); \ - h = Mulc1(h); \ - k = Xor(k, _mm_shuffle_epi8(g, f)); \ - g = Add(Xor(c, g), a); \ - f = Add(Xor(be, f), d); \ - std::swap(f, q); \ - q = _mm_aesimc_si128(q); \ - k = Add(k, be); \ - k = Add(k, _mm_shuffle_epi8(f, h)); \ - f = Add(f, g); \ - g = Add(g, f); \ - f = Mulc1(f); \ -} while (0) - - q = g; - while (iters-- != 0) { - Chunk(); - s += 80; - } - - if (len != 0) { - h = Add(h, _mm_set1_epi32(len)); - s = s + len - 80; - Chunk(); - } - } - - g = Shuffle0321(g); - k = Xor(k, g); - k = Xor(k, q); - h = Xor(h, q); - f = Mulc1(f); - k = Mulc2(k); - g = Mulc1(g); - h = Mulc2(h); - k = Add(k, _mm_shuffle_epi8(g, f)); - h = Add(h, f); - f = Add(f, h); - g = Add(g, k); - k = Add(k, g); - k = Xor(k, _mm_shuffle_epi8(f, h)); - __m128i buf[4]; - buf[0] = f; - buf[1] = g; - buf[2] = k; - buf[3] = h; - s = reinterpret_cast<char*>(buf); - uint32_t x = Fetch(s); - uint32_t y = Fetch(s+4); - uint32_t z = Fetch(s+8); - x = _mm_crc32_u32(x, Fetch(s+12)); - y = _mm_crc32_u32(y, Fetch(s+16)); - z = _mm_crc32_u32(z * c1, Fetch(s+20)); - x = _mm_crc32_u32(x, Fetch(s+24)); - y = _mm_crc32_u32(y * c1, Fetch(s+28)); - uint32_t o = y; - z = _mm_crc32_u32(z, Fetch(s+32)); - x = _mm_crc32_u32(x * c1, Fetch(s+36)); - y = _mm_crc32_u32(y, Fetch(s+40)); - z = _mm_crc32_u32(z * c1, Fetch(s+44)); - x = _mm_crc32_u32(x, Fetch(s+48)); - y = _mm_crc32_u32(y * c1, Fetch(s+52)); - z = _mm_crc32_u32(z, Fetch(s+56)); - x = _mm_crc32_u32(x, Fetch(s+60)); - return (o - x + y - z) * c1; -} - -#undef Chunk -#undef Murk -#undef Mulc2 -#undef Mulc1 - -uint32_t Hash32WithSeed(const char *s, size_t len, uint32_t seed) { - if (len <= 24) { - if (len >= 13) return farmhashmk::Hash32Len13to24(s, len, seed * c1); - else if (len >= 5) return farmhashmk::Hash32Len5to12(s, len, seed); - else return farmhashmk::Hash32Len0to4(s, len, seed); - } - uint32_t h = farmhashmk::Hash32Len13to24(s, 24, seed ^ len); - return _mm_crc32_u32(Hash32(s + 24, len - 24) + seed, h); -} - -#endif -} // namespace farmhashsu +#include "common.h" + +namespace { + #include "farmhashmk.cc" +} + +namespace farmhashsu { +#if !can_use_sse42 || !can_use_aesni + +uint32_t Hash32(const char *s, size_t len) { + FARMHASH_DIE_IF_MISCONFIGURED; + return s == NULL ? 0 : len; +} + +uint32_t Hash32WithSeed(const char *s, size_t len, uint32_t seed) { + FARMHASH_DIE_IF_MISCONFIGURED; + return seed + Hash32(s, len); +} + +#else + +#undef Fetch +#define Fetch Fetch32 + +#undef Rotate +#define Rotate Rotate32 + +#undef Bswap +#define Bswap Bswap32 + +// Helpers for data-parallel operations (4x 32-bits). +STATIC_INLINE __m128i Add(__m128i x, __m128i y) { return _mm_add_epi32(x, y); } +STATIC_INLINE __m128i Xor(__m128i x, __m128i y) { return _mm_xor_si128(x, y); } +STATIC_INLINE __m128i Or(__m128i x, __m128i y) { return _mm_or_si128(x, y); } +STATIC_INLINE __m128i Mul(__m128i x, __m128i y) { return _mm_mullo_epi32(x, y); } +STATIC_INLINE __m128i Mul5(__m128i x) { return Add(x, _mm_slli_epi32(x, 2)); } +STATIC_INLINE __m128i RotateLeft(__m128i x, int c) { + return Or(_mm_slli_epi32(x, c), + _mm_srli_epi32(x, 32 - c)); +} +STATIC_INLINE __m128i Rol17(__m128i x) { return RotateLeft(x, 17); } +STATIC_INLINE __m128i Rol19(__m128i x) { return RotateLeft(x, 19); } +STATIC_INLINE __m128i Shuffle0321(__m128i x) { + return _mm_shuffle_epi32(x, (0 << 6) + (3 << 4) + (2 << 2) + (1 << 0)); +} + +uint32_t Hash32(const char *s, size_t len) { + const uint32_t seed = 81; + if (len <= 24) { + return len <= 12 ? + (len <= 4 ? + farmhashmk::Hash32Len0to4(s, len) : + farmhashmk::Hash32Len5to12(s, len)) : + farmhashmk::Hash32Len13to24(s, len); + } + + if (len < 40) { + uint32_t a = len, b = seed * c2, c = a + b; + a += Fetch(s + len - 4); + b += Fetch(s + len - 20); + c += Fetch(s + len - 16); + uint32_t d = a; + a = NAMESPACE_FOR_HASH_FUNCTIONS::Rotate32(a, 21); + a = Mur(a, Mur(b, _mm_crc32_u32(c, d))); + a += Fetch(s + len - 12); + b += Fetch(s + len - 8); + d += a; + a += d; + b = Mur(b, d) * c2; + a = _mm_crc32_u32(a, b + c); + return farmhashmk::Hash32Len13to24(s, (len + 1) / 2, a) + b; + } + +#undef Mulc1 +#define Mulc1(x) Mul((x), cc1) + +#undef Mulc2 +#define Mulc2(x) Mul((x), cc2) + +#undef Murk +#define Murk(a, h) \ + Add(k, \ + Mul5( \ + Rol19( \ + Xor( \ + Mulc2( \ + Rol17( \ + Mulc1(a))), \ + (h))))) + + const __m128i cc1 = _mm_set1_epi32(c1); + const __m128i cc2 = _mm_set1_epi32(c2); + __m128i h = _mm_set1_epi32(seed); + __m128i g = _mm_set1_epi32(c1 * seed); + __m128i f = g; + __m128i k = _mm_set1_epi32(0xe6546b64); + __m128i q; + if (len < 80) { + __m128i a = Fetch128(s); + __m128i b = Fetch128(s + 16); + __m128i c = Fetch128(s + (len - 15) / 2); + __m128i d = Fetch128(s + len - 32); + __m128i e = Fetch128(s + len - 16); + h = Add(h, a); + g = Add(g, b); + q = g; + g = Shuffle0321(g); + f = Add(f, c); + __m128i be = Add(b, Mulc1(e)); + h = Add(h, f); + f = Add(f, h); + h = Add(Murk(d, h), e); + k = Xor(k, _mm_shuffle_epi8(g, f)); + g = Add(Xor(c, g), a); + f = Add(Xor(be, f), d); + k = Add(k, be); + k = Add(k, _mm_shuffle_epi8(f, h)); + f = Add(f, g); + g = Add(g, f); + g = Add(_mm_set1_epi32(len), Mulc1(g)); + } else { + // len >= 80 + // The following is loosely modelled after farmhashmk::Hash32. + size_t iters = (len - 1) / 80; + len -= iters * 80; + +#undef Chunk +#define Chunk() do { \ + __m128i a = Fetch128(s); \ + __m128i b = Fetch128(s + 16); \ + __m128i c = Fetch128(s + 32); \ + __m128i d = Fetch128(s + 48); \ + __m128i e = Fetch128(s + 64); \ + h = Add(h, a); \ + g = Add(g, b); \ + g = Shuffle0321(g); \ + f = Add(f, c); \ + __m128i be = Add(b, Mulc1(e)); \ + h = Add(h, f); \ + f = Add(f, h); \ + h = Add(h, d); \ + q = Add(q, e); \ + h = Rol17(h); \ + h = Mulc1(h); \ + k = Xor(k, _mm_shuffle_epi8(g, f)); \ + g = Add(Xor(c, g), a); \ + f = Add(Xor(be, f), d); \ + std::swap(f, q); \ + q = _mm_aesimc_si128(q); \ + k = Add(k, be); \ + k = Add(k, _mm_shuffle_epi8(f, h)); \ + f = Add(f, g); \ + g = Add(g, f); \ + f = Mulc1(f); \ +} while (0) + + q = g; + while (iters-- != 0) { + Chunk(); + s += 80; + } + + if (len != 0) { + h = Add(h, _mm_set1_epi32(len)); + s = s + len - 80; + Chunk(); + } + } + + g = Shuffle0321(g); + k = Xor(k, g); + k = Xor(k, q); + h = Xor(h, q); + f = Mulc1(f); + k = Mulc2(k); + g = Mulc1(g); + h = Mulc2(h); + k = Add(k, _mm_shuffle_epi8(g, f)); + h = Add(h, f); + f = Add(f, h); + g = Add(g, k); + k = Add(k, g); + k = Xor(k, _mm_shuffle_epi8(f, h)); + __m128i buf[4]; + buf[0] = f; + buf[1] = g; + buf[2] = k; + buf[3] = h; + s = reinterpret_cast<char*>(buf); + uint32_t x = Fetch(s); + uint32_t y = Fetch(s+4); + uint32_t z = Fetch(s+8); + x = _mm_crc32_u32(x, Fetch(s+12)); + y = _mm_crc32_u32(y, Fetch(s+16)); + z = _mm_crc32_u32(z * c1, Fetch(s+20)); + x = _mm_crc32_u32(x, Fetch(s+24)); + y = _mm_crc32_u32(y * c1, Fetch(s+28)); + uint32_t o = y; + z = _mm_crc32_u32(z, Fetch(s+32)); + x = _mm_crc32_u32(x * c1, Fetch(s+36)); + y = _mm_crc32_u32(y, Fetch(s+40)); + z = _mm_crc32_u32(z * c1, Fetch(s+44)); + x = _mm_crc32_u32(x, Fetch(s+48)); + y = _mm_crc32_u32(y * c1, Fetch(s+52)); + z = _mm_crc32_u32(z, Fetch(s+56)); + x = _mm_crc32_u32(x, Fetch(s+60)); + return (o - x + y - z) * c1; +} + +#undef Chunk +#undef Murk +#undef Mulc2 +#undef Mulc1 + +uint32_t Hash32WithSeed(const char *s, size_t len, uint32_t seed) { + if (len <= 24) { + if (len >= 13) return farmhashmk::Hash32Len13to24(s, len, seed * c1); + else if (len >= 5) return farmhashmk::Hash32Len5to12(s, len, seed); + else return farmhashmk::Hash32Len0to4(s, len, seed); + } + uint32_t h = farmhashmk::Hash32Len13to24(s, 24, seed ^ len); + return _mm_crc32_u32(Hash32(s + 24, len - 24) + seed, h); +} + +#endif +} // namespace farmhashsu |