aboutsummaryrefslogblamecommitdiffstats
path: root/contrib/libs/farmhash/farmhashsu.cc
blob: 319ac3591208a44a8022f3e5c58547456104f0c9 (plain) (tree)
































































































































































































































                                                                                 
#include "common.h"

namespace {
    #include "farmhashmk.cc"
}

namespace farmhashsu {
#if !can_use_sse42 || !can_use_aesni

uint32_t Hash32(const char *s, size_t len) {
  FARMHASH_DIE_IF_MISCONFIGURED;
  return s == NULL ? 0 : len;
}

uint32_t Hash32WithSeed(const char *s, size_t len, uint32_t seed) {
  FARMHASH_DIE_IF_MISCONFIGURED;
  return seed + Hash32(s, len);
}

#else

#undef Fetch
#define Fetch Fetch32

#undef Rotate
#define Rotate Rotate32

#undef Bswap
#define Bswap Bswap32

// Helpers for data-parallel operations (4x 32-bits).
STATIC_INLINE __m128i Add(__m128i x, __m128i y) { return _mm_add_epi32(x, y); }
STATIC_INLINE __m128i Xor(__m128i x, __m128i y) { return _mm_xor_si128(x, y); }
STATIC_INLINE __m128i Or(__m128i x, __m128i y) { return _mm_or_si128(x, y); }
STATIC_INLINE __m128i Mul(__m128i x, __m128i y) { return _mm_mullo_epi32(x, y); }
STATIC_INLINE __m128i Mul5(__m128i x) { return Add(x, _mm_slli_epi32(x, 2)); }
STATIC_INLINE __m128i RotateLeft(__m128i x, int c) {
  return Or(_mm_slli_epi32(x, c),
            _mm_srli_epi32(x, 32 - c));
}
STATIC_INLINE __m128i Rol17(__m128i x) { return RotateLeft(x, 17); }
STATIC_INLINE __m128i Rol19(__m128i x) { return RotateLeft(x, 19); }
STATIC_INLINE __m128i Shuffle0321(__m128i x) {
  return _mm_shuffle_epi32(x, (0 << 6) + (3 << 4) + (2 << 2) + (1 << 0));
}

uint32_t Hash32(const char *s, size_t len) {
  const uint32_t seed = 81;
  if (len <= 24) {
    return len <= 12 ?
        (len <= 4 ?
         farmhashmk::Hash32Len0to4(s, len) :
         farmhashmk::Hash32Len5to12(s, len)) :
        farmhashmk::Hash32Len13to24(s, len);
  }

  if (len < 40) {
    uint32_t a = len, b = seed * c2, c = a + b;
    a += Fetch(s + len - 4);
    b += Fetch(s + len - 20);
    c += Fetch(s + len - 16);
    uint32_t d = a;
    a = NAMESPACE_FOR_HASH_FUNCTIONS::Rotate32(a, 21);
    a = Mur(a, Mur(b, _mm_crc32_u32(c, d)));
    a += Fetch(s + len - 12);
    b += Fetch(s + len - 8);
    d += a;
    a += d;
    b = Mur(b, d) * c2;
    a = _mm_crc32_u32(a, b + c);
    return farmhashmk::Hash32Len13to24(s, (len + 1) / 2, a) + b;
  }

#undef Mulc1
#define Mulc1(x) Mul((x), cc1)

#undef Mulc2
#define Mulc2(x) Mul((x), cc2)

#undef Murk
#define Murk(a, h)                              \
  Add(k,                                        \
      Mul5(                                     \
          Rol19(                                \
              Xor(                              \
                  Mulc2(                        \
                      Rol17(                    \
                          Mulc1(a))),           \
                  (h)))))

  const __m128i cc1 = _mm_set1_epi32(c1);
  const __m128i cc2 = _mm_set1_epi32(c2);
  __m128i h = _mm_set1_epi32(seed);
  __m128i g = _mm_set1_epi32(c1 * seed);
  __m128i f = g;
  __m128i k = _mm_set1_epi32(0xe6546b64);
  __m128i q;
  if (len < 80) {
    __m128i a = Fetch128(s);
    __m128i b = Fetch128(s + 16);
    __m128i c = Fetch128(s + (len - 15) / 2);
    __m128i d = Fetch128(s + len - 32);
    __m128i e = Fetch128(s + len - 16);
    h = Add(h, a);
    g = Add(g, b);
    q = g;
    g = Shuffle0321(g);
    f = Add(f, c);
    __m128i be = Add(b, Mulc1(e));
    h = Add(h, f);
    f = Add(f, h);
    h = Add(Murk(d, h), e);
    k = Xor(k, _mm_shuffle_epi8(g, f));
    g = Add(Xor(c, g), a);
    f = Add(Xor(be, f), d);
    k = Add(k, be);
    k = Add(k, _mm_shuffle_epi8(f, h));
    f = Add(f, g);
    g = Add(g, f);
    g = Add(_mm_set1_epi32(len), Mulc1(g));
  } else {
    // len >= 80
    // The following is loosely modelled after farmhashmk::Hash32.
    size_t iters = (len - 1) / 80;
    len -= iters * 80;

#undef Chunk
#define Chunk() do {                            \
  __m128i a = Fetch128(s);                      \
  __m128i b = Fetch128(s + 16);                 \
  __m128i c = Fetch128(s + 32);                 \
  __m128i d = Fetch128(s + 48);                 \
  __m128i e = Fetch128(s + 64);                 \
  h = Add(h, a);                                \
  g = Add(g, b);                                \
  g = Shuffle0321(g);                           \
  f = Add(f, c);                                \
  __m128i be = Add(b, Mulc1(e));                \
  h = Add(h, f);                                \
  f = Add(f, h);                                \
  h = Add(h, d);                                \
  q = Add(q, e);                                \
  h = Rol17(h);                                 \
  h = Mulc1(h);                                 \
  k = Xor(k, _mm_shuffle_epi8(g, f));           \
  g = Add(Xor(c, g), a);                        \
  f = Add(Xor(be, f), d);                       \
  std::swap(f, q);                              \
  q = _mm_aesimc_si128(q);                      \
  k = Add(k, be);                               \
  k = Add(k, _mm_shuffle_epi8(f, h));           \
  f = Add(f, g);                                \
  g = Add(g, f);                                \
  f = Mulc1(f);                                 \
} while (0)

    q = g;
    while (iters-- != 0) {
      Chunk();
      s += 80;
    }

    if (len != 0) {
      h = Add(h, _mm_set1_epi32(len));
      s = s + len - 80;
      Chunk();
    }
  }

  g = Shuffle0321(g);
  k = Xor(k, g);
  k = Xor(k, q);
  h = Xor(h, q);
  f = Mulc1(f);
  k = Mulc2(k);
  g = Mulc1(g);
  h = Mulc2(h);
  k = Add(k, _mm_shuffle_epi8(g, f));
  h = Add(h, f);
  f = Add(f, h);
  g = Add(g, k);
  k = Add(k, g);
  k = Xor(k, _mm_shuffle_epi8(f, h));
  __m128i buf[4];
  buf[0] = f;
  buf[1] = g;
  buf[2] = k;
  buf[3] = h;
  s = reinterpret_cast<char*>(buf);
  uint32_t x = Fetch(s);
  uint32_t y = Fetch(s+4);
  uint32_t z = Fetch(s+8);
  x = _mm_crc32_u32(x, Fetch(s+12));
  y = _mm_crc32_u32(y, Fetch(s+16));
  z = _mm_crc32_u32(z * c1, Fetch(s+20));
  x = _mm_crc32_u32(x, Fetch(s+24));
  y = _mm_crc32_u32(y * c1, Fetch(s+28));
  uint32_t o = y;
  z = _mm_crc32_u32(z, Fetch(s+32));
  x = _mm_crc32_u32(x * c1, Fetch(s+36));
  y = _mm_crc32_u32(y, Fetch(s+40));
  z = _mm_crc32_u32(z * c1, Fetch(s+44));
  x = _mm_crc32_u32(x, Fetch(s+48));
  y = _mm_crc32_u32(y * c1, Fetch(s+52));
  z = _mm_crc32_u32(z, Fetch(s+56));
  x = _mm_crc32_u32(x, Fetch(s+60));
  return (o - x + y - z) * c1;
}

#undef Chunk
#undef Murk
#undef Mulc2
#undef Mulc1

uint32_t Hash32WithSeed(const char *s, size_t len, uint32_t seed) {
  if (len <= 24) {
    if (len >= 13) return farmhashmk::Hash32Len13to24(s, len, seed * c1);
    else if (len >= 5) return farmhashmk::Hash32Len5to12(s, len, seed);
    else return farmhashmk::Hash32Len0to4(s, len, seed);
  }
  uint32_t h = farmhashmk::Hash32Len13to24(s, 24, seed ^ len);
  return _mm_crc32_u32(Hash32(s + 24, len - 24) + seed, h);
}

#endif
}  // namespace farmhashsu