diff options
author | Vladislav Kuznetsov <va.kuznecov@physics.msu.ru> | 2022-02-10 16:46:54 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:54 +0300 |
commit | 3cbae1ba94bff7a82ee848c3e9b2cebd96a69dd5 (patch) | |
tree | 49e222ea1c5804306084bb3ae065bb702625360f /contrib/libs/t1ha/src | |
parent | de20f5598f0832a6e646f61b4feca942c00da928 (diff) | |
download | ydb-3cbae1ba94bff7a82ee848c3e9b2cebd96a69dd5.tar.gz |
Restoring authorship annotation for Vladislav Kuznetsov <va.kuznecov@physics.msu.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/t1ha/src')
-rw-r--r-- | contrib/libs/t1ha/src/t1ha0.c | 904 | ||||
-rw-r--r-- | contrib/libs/t1ha/src/t1ha0_ia32aes_a.h | 358 | ||||
-rw-r--r-- | contrib/libs/t1ha/src/t1ha0_ia32aes_avx.c | 8 | ||||
-rw-r--r-- | contrib/libs/t1ha/src/t1ha0_ia32aes_avx2.c | 8 | ||||
-rw-r--r-- | contrib/libs/t1ha/src/t1ha0_ia32aes_b.h | 328 | ||||
-rw-r--r-- | contrib/libs/t1ha/src/t1ha0_ia32aes_noavx.c | 8 | ||||
-rw-r--r-- | contrib/libs/t1ha/src/t1ha0_selfcheck.c | 402 | ||||
-rw-r--r-- | contrib/libs/t1ha/src/t1ha1.c | 316 | ||||
-rw-r--r-- | contrib/libs/t1ha/src/t1ha1_selfcheck.c | 218 | ||||
-rw-r--r-- | contrib/libs/t1ha/src/t1ha2.c | 652 | ||||
-rw-r--r-- | contrib/libs/t1ha/src/t1ha2_selfcheck.c | 368 | ||||
-rw-r--r-- | contrib/libs/t1ha/src/t1ha_bits.h | 2336 | ||||
-rw-r--r-- | contrib/libs/t1ha/src/t1ha_selfcheck.c | 190 | ||||
-rw-r--r-- | contrib/libs/t1ha/src/t1ha_selfcheck.h | 146 | ||||
-rw-r--r-- | contrib/libs/t1ha/src/t1ha_selfcheck_all.c | 120 |
15 files changed, 3181 insertions, 3181 deletions
diff --git a/contrib/libs/t1ha/src/t1ha0.c b/contrib/libs/t1ha/src/t1ha0.c index c51d25957d..bde71299cb 100644 --- a/contrib/libs/t1ha/src/t1ha0.c +++ b/contrib/libs/t1ha/src/t1ha0.c @@ -1,462 +1,462 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#ifndef T1HA0_DISABLED -#include "t1ha_bits.h" -#include "t1ha_selfcheck.h" - -static __maybe_unused __always_inline uint32_t tail32_le_aligned(const void *v, - size_t tail) { - const uint8_t *const p = (const uint8_t *)v; -#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__) - /* We can perform a 'oneshot' read, which is little bit faster. */ - const unsigned shift = ((4 - tail) & 3) << 3; - return fetch32_le_aligned(p) & ((~UINT32_C(0)) >> shift); -#else - uint32_t r = 0; - switch (tail & 3) { - default: - unreachable(); -/* fall through */ -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - /* For most CPUs this code is better when not needed - * copying for alignment or byte reordering. */ - case 0: - return fetch32_le_aligned(p); - case 3: - r = (uint32_t)p[2] << 16; - /* fall through */ - case 2: - return r + fetch16_le_aligned(p); - case 1: - return p[0]; -#else - case 0: - r += p[3]; - r <<= 8; - /* fall through */ - case 3: - r += p[2]; - r <<= 8; - /* fall through */ - case 2: - r += p[1]; - r <<= 8; - /* fall through */ - case 1: - return r + p[0]; -#endif - } -#endif /* T1HA_USE_FAST_ONESHOT_READ */ -} - -static __maybe_unused __always_inline uint32_t -tail32_le_unaligned(const void *v, size_t tail) { - const uint8_t *p = (const uint8_t *)v; -#ifdef can_read_underside - /* On some systems (e.g. x86) we can perform a 'oneshot' read, which - * is little bit faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com> - * for the reminder. */ - const unsigned offset = (4 - tail) & 3; - const unsigned shift = offset << 3; - if (likely(can_read_underside(p, 4))) { - p -= offset; - return fetch32_le_unaligned(p) >> shift; - } - return fetch32_le_unaligned(p) & ((~UINT32_C(0)) >> shift); -#else - uint32_t r = 0; - switch (tail & 3) { - default: - unreachable(); -/* fall through */ -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT && \ - __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - /* For most CPUs this code is better when not needed - * copying for alignment or byte reordering. */ - case 0: - return fetch32_le_unaligned(p); - case 3: - r = (uint32_t)p[2] << 16; - /* fall through */ - case 2: - return r + fetch16_le_unaligned(p); - case 1: - return p[0]; -#else - /* For most CPUs this code is better than a - * copying for alignment and/or byte reordering. */ - case 0: - r += p[3]; - r <<= 8; - /* fall through */ - case 3: - r += p[2]; - r <<= 8; - /* fall through */ - case 2: - r += p[1]; - r <<= 8; - /* fall through */ - case 1: - return r + p[0]; -#endif - } -#endif /* can_read_underside */ -} - -static __maybe_unused __always_inline uint32_t tail32_be_aligned(const void *v, - size_t tail) { - const uint8_t *const p = (const uint8_t *)v; -#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__) - /* We can perform a 'oneshot' read, which is little bit faster. */ - const unsigned shift = ((4 - tail) & 3) << 3; - return fetch32_be_aligned(p) >> shift; -#else - switch (tail & 3) { - default: - unreachable(); -/* fall through */ -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - /* For most CPUs this code is better when not needed - * copying for alignment or byte reordering. */ - case 1: - return p[0]; - case 2: - return fetch16_be_aligned(p); - case 3: - return fetch16_be_aligned(p) << 8 | p[2]; - case 0: - return fetch32_be_aligned(p); -#else - case 1: - return p[0]; - case 2: - return p[1] | (uint32_t)p[0] << 8; - case 3: - return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16; - case 0: - return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 | - (uint32_t)p[0] << 24; -#endif - } -#endif /* T1HA_USE_FAST_ONESHOT_READ */ -} - -static __maybe_unused __always_inline uint32_t -tail32_be_unaligned(const void *v, size_t tail) { - const uint8_t *p = (const uint8_t *)v; -#ifdef can_read_underside - /* On some systems we can perform a 'oneshot' read, which is little bit - * faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com> for the - * reminder. */ - const unsigned offset = (4 - tail) & 3; - const unsigned shift = offset << 3; - if (likely(can_read_underside(p, 4))) { - p -= offset; - return fetch32_be_unaligned(p) & ((~UINT32_C(0)) >> shift); - } - return fetch32_be_unaligned(p) >> shift; -#else - switch (tail & 3) { - default: - unreachable(); -/* fall through */ -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT && \ - __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - /* For most CPUs this code is better when not needed - * copying for alignment or byte reordering. */ - case 1: - return p[0]; - case 2: - return fetch16_be_unaligned(p); - case 3: - return fetch16_be_unaligned(p) << 8 | p[2]; - case 0: - return fetch32_be_unaligned(p); -#else - /* For most CPUs this code is better than a - * copying for alignment and/or byte reordering. */ - case 1: - return p[0]; - case 2: - return p[1] | (uint32_t)p[0] << 8; - case 3: - return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16; - case 0: - return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 | - (uint32_t)p[0] << 24; -#endif - } -#endif /* can_read_underside */ -} - -/***************************************************************************/ - -#ifndef rot32 -static __maybe_unused __always_inline uint32_t rot32(uint32_t v, unsigned s) { - return (v >> s) | (v << (32 - s)); -} -#endif /* rot32 */ - -static __always_inline void mixup32(uint32_t *a, uint32_t *b, uint32_t v, - uint32_t prime) { - uint64_t l = mul_32x32_64(*b + v, prime); - *a ^= (uint32_t)l; - *b += (uint32_t)(l >> 32); -} - -static __always_inline uint64_t final32(uint32_t a, uint32_t b) { - uint64_t l = (b ^ rot32(a, 13)) | (uint64_t)a << 32; - l *= prime_0; - l ^= l >> 41; - l *= prime_4; - l ^= l >> 47; - l *= prime_6; - return l; -} - -/* 32-bit 'magic' primes */ -static const uint32_t prime32_0 = UINT32_C(0x92D78269); -static const uint32_t prime32_1 = UINT32_C(0xCA9B4735); -static const uint32_t prime32_2 = UINT32_C(0xA4ABA1C3); -static const uint32_t prime32_3 = UINT32_C(0xF6499843); -static const uint32_t prime32_4 = UINT32_C(0x86F0FD61); -static const uint32_t prime32_5 = UINT32_C(0xCA2DA6FB); -static const uint32_t prime32_6 = UINT32_C(0xC4BB3575); - -/* TODO: C++ template in the next version */ -#define T1HA0_BODY(ENDIANNES, ALIGNESS) \ - const uint32_t *v = (const uint32_t *)data; \ - if (unlikely(len > 16)) { \ - uint32_t c = ~a; \ - uint32_t d = rot32(b, 5); \ - const uint32_t *detent = \ - (const uint32_t *)((const uint8_t *)data + len - 15); \ - do { \ - const uint32_t w0 = fetch32_##ENDIANNES##_##ALIGNESS(v + 0); \ - const uint32_t w1 = fetch32_##ENDIANNES##_##ALIGNESS(v + 1); \ - const uint32_t w2 = fetch32_##ENDIANNES##_##ALIGNESS(v + 2); \ - const uint32_t w3 = fetch32_##ENDIANNES##_##ALIGNESS(v + 3); \ - v += 4; \ - prefetch(v); \ - \ - const uint32_t d13 = w1 + rot32(w3 + d, 17); \ - const uint32_t c02 = w0 ^ rot32(w2 + c, 11); \ - d ^= rot32(a + w0, 3); \ - c ^= rot32(b + w1, 7); \ - b = prime32_1 * (c02 + w3); \ - a = prime32_0 * (d13 ^ w2); \ - } while (likely(v < detent)); \ - \ - c += a; \ - d += b; \ - a ^= prime32_6 * (rot32(c, 16) + d); \ - b ^= prime32_5 * (c + rot32(d, 16)); \ - \ - len &= 15; \ - } \ - \ - switch (len) { \ - default: \ - mixup32(&a, &b, fetch32_##ENDIANNES##_##ALIGNESS(v++), prime32_4); \ - /* fall through */ \ - case 12: \ - case 11: \ - case 10: \ - case 9: \ - mixup32(&b, &a, fetch32_##ENDIANNES##_##ALIGNESS(v++), prime32_3); \ - /* fall through */ \ - case 8: \ - case 7: \ - case 6: \ - case 5: \ - mixup32(&a, &b, fetch32_##ENDIANNES##_##ALIGNESS(v++), prime32_2); \ - /* fall through */ \ - case 4: \ - case 3: \ - case 2: \ - case 1: \ - mixup32(&b, &a, tail32_##ENDIANNES##_##ALIGNESS(v, len), prime32_1); \ - /* fall through */ \ - case 0: \ - return final32(a, b); \ - } - -uint64_t t1ha0_32le(const void *data, size_t len, uint64_t seed) { - uint32_t a = rot32((uint32_t)len, 17) + (uint32_t)seed; - uint32_t b = (uint32_t)len ^ (uint32_t)(seed >> 32); - -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT - T1HA0_BODY(le, unaligned); -#else - const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_32 - 1)) != 0; - if (misaligned) { - T1HA0_BODY(le, unaligned); - } else { - T1HA0_BODY(le, aligned); - } -#endif -} - -uint64_t t1ha0_32be(const void *data, size_t len, uint64_t seed) { - uint32_t a = rot32((uint32_t)len, 17) + (uint32_t)seed; - uint32_t b = (uint32_t)len ^ (uint32_t)(seed >> 32); - -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT - T1HA0_BODY(be, unaligned); -#else - const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_32 - 1)) != 0; - if (misaligned) { - T1HA0_BODY(be, unaligned); - } else { - T1HA0_BODY(be, aligned); - } -#endif -} - -/***************************************************************************/ - -#if T1HA0_AESNI_AVAILABLE && defined(__ia32__) -__cold uint64_t t1ha_ia32cpu_features(void) { - uint32_t features = 0; - uint32_t extended = 0; + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#ifndef T1HA0_DISABLED +#include "t1ha_bits.h" +#include "t1ha_selfcheck.h" + +static __maybe_unused __always_inline uint32_t tail32_le_aligned(const void *v, + size_t tail) { + const uint8_t *const p = (const uint8_t *)v; +#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__) + /* We can perform a 'oneshot' read, which is little bit faster. */ + const unsigned shift = ((4 - tail) & 3) << 3; + return fetch32_le_aligned(p) & ((~UINT32_C(0)) >> shift); +#else + uint32_t r = 0; + switch (tail & 3) { + default: + unreachable(); +/* fall through */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + /* For most CPUs this code is better when not needed + * copying for alignment or byte reordering. */ + case 0: + return fetch32_le_aligned(p); + case 3: + r = (uint32_t)p[2] << 16; + /* fall through */ + case 2: + return r + fetch16_le_aligned(p); + case 1: + return p[0]; +#else + case 0: + r += p[3]; + r <<= 8; + /* fall through */ + case 3: + r += p[2]; + r <<= 8; + /* fall through */ + case 2: + r += p[1]; + r <<= 8; + /* fall through */ + case 1: + return r + p[0]; +#endif + } +#endif /* T1HA_USE_FAST_ONESHOT_READ */ +} + +static __maybe_unused __always_inline uint32_t +tail32_le_unaligned(const void *v, size_t tail) { + const uint8_t *p = (const uint8_t *)v; +#ifdef can_read_underside + /* On some systems (e.g. x86) we can perform a 'oneshot' read, which + * is little bit faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com> + * for the reminder. */ + const unsigned offset = (4 - tail) & 3; + const unsigned shift = offset << 3; + if (likely(can_read_underside(p, 4))) { + p -= offset; + return fetch32_le_unaligned(p) >> shift; + } + return fetch32_le_unaligned(p) & ((~UINT32_C(0)) >> shift); +#else + uint32_t r = 0; + switch (tail & 3) { + default: + unreachable(); +/* fall through */ +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT && \ + __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + /* For most CPUs this code is better when not needed + * copying for alignment or byte reordering. */ + case 0: + return fetch32_le_unaligned(p); + case 3: + r = (uint32_t)p[2] << 16; + /* fall through */ + case 2: + return r + fetch16_le_unaligned(p); + case 1: + return p[0]; +#else + /* For most CPUs this code is better than a + * copying for alignment and/or byte reordering. */ + case 0: + r += p[3]; + r <<= 8; + /* fall through */ + case 3: + r += p[2]; + r <<= 8; + /* fall through */ + case 2: + r += p[1]; + r <<= 8; + /* fall through */ + case 1: + return r + p[0]; +#endif + } +#endif /* can_read_underside */ +} + +static __maybe_unused __always_inline uint32_t tail32_be_aligned(const void *v, + size_t tail) { + const uint8_t *const p = (const uint8_t *)v; +#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__) + /* We can perform a 'oneshot' read, which is little bit faster. */ + const unsigned shift = ((4 - tail) & 3) << 3; + return fetch32_be_aligned(p) >> shift; +#else + switch (tail & 3) { + default: + unreachable(); +/* fall through */ +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + /* For most CPUs this code is better when not needed + * copying for alignment or byte reordering. */ + case 1: + return p[0]; + case 2: + return fetch16_be_aligned(p); + case 3: + return fetch16_be_aligned(p) << 8 | p[2]; + case 0: + return fetch32_be_aligned(p); +#else + case 1: + return p[0]; + case 2: + return p[1] | (uint32_t)p[0] << 8; + case 3: + return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16; + case 0: + return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 | + (uint32_t)p[0] << 24; +#endif + } +#endif /* T1HA_USE_FAST_ONESHOT_READ */ +} + +static __maybe_unused __always_inline uint32_t +tail32_be_unaligned(const void *v, size_t tail) { + const uint8_t *p = (const uint8_t *)v; +#ifdef can_read_underside + /* On some systems we can perform a 'oneshot' read, which is little bit + * faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com> for the + * reminder. */ + const unsigned offset = (4 - tail) & 3; + const unsigned shift = offset << 3; + if (likely(can_read_underside(p, 4))) { + p -= offset; + return fetch32_be_unaligned(p) & ((~UINT32_C(0)) >> shift); + } + return fetch32_be_unaligned(p) >> shift; +#else + switch (tail & 3) { + default: + unreachable(); +/* fall through */ +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT && \ + __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + /* For most CPUs this code is better when not needed + * copying for alignment or byte reordering. */ + case 1: + return p[0]; + case 2: + return fetch16_be_unaligned(p); + case 3: + return fetch16_be_unaligned(p) << 8 | p[2]; + case 0: + return fetch32_be_unaligned(p); +#else + /* For most CPUs this code is better than a + * copying for alignment and/or byte reordering. */ + case 1: + return p[0]; + case 2: + return p[1] | (uint32_t)p[0] << 8; + case 3: + return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16; + case 0: + return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 | + (uint32_t)p[0] << 24; +#endif + } +#endif /* can_read_underside */ +} + +/***************************************************************************/ + +#ifndef rot32 +static __maybe_unused __always_inline uint32_t rot32(uint32_t v, unsigned s) { + return (v >> s) | (v << (32 - s)); +} +#endif /* rot32 */ + +static __always_inline void mixup32(uint32_t *a, uint32_t *b, uint32_t v, + uint32_t prime) { + uint64_t l = mul_32x32_64(*b + v, prime); + *a ^= (uint32_t)l; + *b += (uint32_t)(l >> 32); +} + +static __always_inline uint64_t final32(uint32_t a, uint32_t b) { + uint64_t l = (b ^ rot32(a, 13)) | (uint64_t)a << 32; + l *= prime_0; + l ^= l >> 41; + l *= prime_4; + l ^= l >> 47; + l *= prime_6; + return l; +} + +/* 32-bit 'magic' primes */ +static const uint32_t prime32_0 = UINT32_C(0x92D78269); +static const uint32_t prime32_1 = UINT32_C(0xCA9B4735); +static const uint32_t prime32_2 = UINT32_C(0xA4ABA1C3); +static const uint32_t prime32_3 = UINT32_C(0xF6499843); +static const uint32_t prime32_4 = UINT32_C(0x86F0FD61); +static const uint32_t prime32_5 = UINT32_C(0xCA2DA6FB); +static const uint32_t prime32_6 = UINT32_C(0xC4BB3575); + +/* TODO: C++ template in the next version */ +#define T1HA0_BODY(ENDIANNES, ALIGNESS) \ + const uint32_t *v = (const uint32_t *)data; \ + if (unlikely(len > 16)) { \ + uint32_t c = ~a; \ + uint32_t d = rot32(b, 5); \ + const uint32_t *detent = \ + (const uint32_t *)((const uint8_t *)data + len - 15); \ + do { \ + const uint32_t w0 = fetch32_##ENDIANNES##_##ALIGNESS(v + 0); \ + const uint32_t w1 = fetch32_##ENDIANNES##_##ALIGNESS(v + 1); \ + const uint32_t w2 = fetch32_##ENDIANNES##_##ALIGNESS(v + 2); \ + const uint32_t w3 = fetch32_##ENDIANNES##_##ALIGNESS(v + 3); \ + v += 4; \ + prefetch(v); \ + \ + const uint32_t d13 = w1 + rot32(w3 + d, 17); \ + const uint32_t c02 = w0 ^ rot32(w2 + c, 11); \ + d ^= rot32(a + w0, 3); \ + c ^= rot32(b + w1, 7); \ + b = prime32_1 * (c02 + w3); \ + a = prime32_0 * (d13 ^ w2); \ + } while (likely(v < detent)); \ + \ + c += a; \ + d += b; \ + a ^= prime32_6 * (rot32(c, 16) + d); \ + b ^= prime32_5 * (c + rot32(d, 16)); \ + \ + len &= 15; \ + } \ + \ + switch (len) { \ + default: \ + mixup32(&a, &b, fetch32_##ENDIANNES##_##ALIGNESS(v++), prime32_4); \ + /* fall through */ \ + case 12: \ + case 11: \ + case 10: \ + case 9: \ + mixup32(&b, &a, fetch32_##ENDIANNES##_##ALIGNESS(v++), prime32_3); \ + /* fall through */ \ + case 8: \ + case 7: \ + case 6: \ + case 5: \ + mixup32(&a, &b, fetch32_##ENDIANNES##_##ALIGNESS(v++), prime32_2); \ + /* fall through */ \ + case 4: \ + case 3: \ + case 2: \ + case 1: \ + mixup32(&b, &a, tail32_##ENDIANNES##_##ALIGNESS(v, len), prime32_1); \ + /* fall through */ \ + case 0: \ + return final32(a, b); \ + } + +uint64_t t1ha0_32le(const void *data, size_t len, uint64_t seed) { + uint32_t a = rot32((uint32_t)len, 17) + (uint32_t)seed; + uint32_t b = (uint32_t)len ^ (uint32_t)(seed >> 32); + +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT + T1HA0_BODY(le, unaligned); +#else + const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_32 - 1)) != 0; + if (misaligned) { + T1HA0_BODY(le, unaligned); + } else { + T1HA0_BODY(le, aligned); + } +#endif +} + +uint64_t t1ha0_32be(const void *data, size_t len, uint64_t seed) { + uint32_t a = rot32((uint32_t)len, 17) + (uint32_t)seed; + uint32_t b = (uint32_t)len ^ (uint32_t)(seed >> 32); + +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT + T1HA0_BODY(be, unaligned); +#else + const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_32 - 1)) != 0; + if (misaligned) { + T1HA0_BODY(be, unaligned); + } else { + T1HA0_BODY(be, aligned); + } +#endif +} + +/***************************************************************************/ + +#if T1HA0_AESNI_AVAILABLE && defined(__ia32__) +__cold uint64_t t1ha_ia32cpu_features(void) { + uint32_t features = 0; + uint32_t extended = 0; #if defined(__GNUC__) || defined(__clang__) - uint32_t eax, ebx, ecx, edx; - const unsigned cpuid_max = __get_cpuid_max(0, NULL); - if (cpuid_max >= 1) { - __cpuid_count(1, 0, eax, ebx, features, edx); - if (cpuid_max >= 7) - __cpuid_count(7, 0, eax, extended, ecx, edx); - } -#elif defined(_MSC_VER) - int info[4]; - __cpuid(info, 0); - const unsigned cpuid_max = info[0]; - if (cpuid_max >= 1) { - __cpuidex(info, 1, 0); - features = info[2]; - if (cpuid_max >= 7) { - __cpuidex(info, 7, 0); - extended = info[1]; - } - } -#endif - return features | (uint64_t)extended << 32; -} -#endif /* T1HA0_AESNI_AVAILABLE && __ia32__ */ - -#if T1HA0_RUNTIME_SELECT - -__cold t1ha0_function_t t1ha0_resolve(void) { - -#if T1HA0_AESNI_AVAILABLE && defined(__ia32__) - uint64_t features = t1ha_ia32cpu_features(); - if (t1ha_ia32_AESNI_avail(features)) { - if (t1ha_ia32_AVX_avail(features)) - return t1ha_ia32_AVX2_avail(features) ? t1ha0_ia32aes_avx2 - : t1ha0_ia32aes_avx; - return t1ha0_ia32aes_noavx; - } -#endif /* T1HA0_AESNI_AVAILABLE && __ia32__ */ - -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ -#if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \ - (!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED)) -#ifndef T1HA1_DISABLED - return t1ha1_be; -#else - return t1ha2_atonce; -#endif /* T1HA1_DISABLED */ -#else - return t1ha0_32be; -#endif -#else /* __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ */ -#if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \ - (!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED)) -#ifndef T1HA1_DISABLED - return t1ha1_le; -#else - return t1ha2_atonce; -#endif /* T1HA1_DISABLED */ -#else - return t1ha0_32le; -#endif -#endif /* __BYTE_ORDER__ */ -} - -#if T1HA_USE_INDIRECT_FUNCTIONS -/* Use IFUNC (GNU ELF indirect functions) to choice implementation at runtime. - * For more info please see - * https://en.wikipedia.org/wiki/Executable_and_Linkable_Format - * and https://sourceware.org/glibc/wiki/GNU_IFUNC */ + uint32_t eax, ebx, ecx, edx; + const unsigned cpuid_max = __get_cpuid_max(0, NULL); + if (cpuid_max >= 1) { + __cpuid_count(1, 0, eax, ebx, features, edx); + if (cpuid_max >= 7) + __cpuid_count(7, 0, eax, extended, ecx, edx); + } +#elif defined(_MSC_VER) + int info[4]; + __cpuid(info, 0); + const unsigned cpuid_max = info[0]; + if (cpuid_max >= 1) { + __cpuidex(info, 1, 0); + features = info[2]; + if (cpuid_max >= 7) { + __cpuidex(info, 7, 0); + extended = info[1]; + } + } +#endif + return features | (uint64_t)extended << 32; +} +#endif /* T1HA0_AESNI_AVAILABLE && __ia32__ */ + +#if T1HA0_RUNTIME_SELECT + +__cold t1ha0_function_t t1ha0_resolve(void) { + +#if T1HA0_AESNI_AVAILABLE && defined(__ia32__) + uint64_t features = t1ha_ia32cpu_features(); + if (t1ha_ia32_AESNI_avail(features)) { + if (t1ha_ia32_AVX_avail(features)) + return t1ha_ia32_AVX2_avail(features) ? t1ha0_ia32aes_avx2 + : t1ha0_ia32aes_avx; + return t1ha0_ia32aes_noavx; + } +#endif /* T1HA0_AESNI_AVAILABLE && __ia32__ */ + +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +#if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \ + (!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED)) +#ifndef T1HA1_DISABLED + return t1ha1_be; +#else + return t1ha2_atonce; +#endif /* T1HA1_DISABLED */ +#else + return t1ha0_32be; +#endif +#else /* __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ */ +#if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \ + (!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED)) +#ifndef T1HA1_DISABLED + return t1ha1_le; +#else + return t1ha2_atonce; +#endif /* T1HA1_DISABLED */ +#else + return t1ha0_32le; +#endif +#endif /* __BYTE_ORDER__ */ +} + +#if T1HA_USE_INDIRECT_FUNCTIONS +/* Use IFUNC (GNU ELF indirect functions) to choice implementation at runtime. + * For more info please see + * https://en.wikipedia.org/wiki/Executable_and_Linkable_Format + * and https://sourceware.org/glibc/wiki/GNU_IFUNC */ #if __has_attribute(__ifunc__) -uint64_t t1ha0(const void *data, size_t len, uint64_t seed) +uint64_t t1ha0(const void *data, size_t len, uint64_t seed) __attribute__((__ifunc__("t1ha0_resolve"))); -#else -__asm("\t.globl\tt1ha0\n\t.type\tt1ha0, " - "%gnu_indirect_function\n\t.set\tt1ha0,t1ha0_resolve"); +#else +__asm("\t.globl\tt1ha0\n\t.type\tt1ha0, " + "%gnu_indirect_function\n\t.set\tt1ha0,t1ha0_resolve"); #endif /* __has_attribute(__ifunc__) */ - + #elif __GNUC_PREREQ(4, 0) || __has_attribute(__constructor__) - + uint64_t (*t1ha0_funcptr)(const void *, size_t, uint64_t); - + static __cold void __attribute__((__constructor__)) t1ha0_init(void) { - t1ha0_funcptr = t1ha0_resolve(); -} - -#else /* T1HA_USE_INDIRECT_FUNCTIONS */ - -static __cold uint64_t t1ha0_proxy(const void *data, size_t len, - uint64_t seed) { - t1ha0_funcptr = t1ha0_resolve(); - return t1ha0_funcptr(data, len, seed); -} - -uint64_t (*t1ha0_funcptr)(const void *, size_t, uint64_t) = t1ha0_proxy; - -#endif /* !T1HA_USE_INDIRECT_FUNCTIONS */ -#endif /* T1HA0_RUNTIME_SELECT */ - -#endif /* T1HA0_DISABLED */ + t1ha0_funcptr = t1ha0_resolve(); +} + +#else /* T1HA_USE_INDIRECT_FUNCTIONS */ + +static __cold uint64_t t1ha0_proxy(const void *data, size_t len, + uint64_t seed) { + t1ha0_funcptr = t1ha0_resolve(); + return t1ha0_funcptr(data, len, seed); +} + +uint64_t (*t1ha0_funcptr)(const void *, size_t, uint64_t) = t1ha0_proxy; + +#endif /* !T1HA_USE_INDIRECT_FUNCTIONS */ +#endif /* T1HA0_RUNTIME_SELECT */ + +#endif /* T1HA0_DISABLED */ diff --git a/contrib/libs/t1ha/src/t1ha0_ia32aes_a.h b/contrib/libs/t1ha/src/t1ha0_ia32aes_a.h index fa1a753f34..a2372d5201 100644 --- a/contrib/libs/t1ha/src/t1ha0_ia32aes_a.h +++ b/contrib/libs/t1ha/src/t1ha0_ia32aes_a.h @@ -1,182 +1,182 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#include "t1ha_bits.h" -#include "t1ha_selfcheck.h" - -#if T1HA0_AESNI_AVAILABLE - -uint64_t T1HA_IA32AES_NAME(const void *data, size_t len, uint64_t seed) { - uint64_t a = seed; - uint64_t b = len; - - if (unlikely(len > 32)) { - __m128i x = _mm_set_epi64x(a, b); - __m128i y = _mm_aesenc_si128(x, _mm_set_epi64x(prime_5, prime_6)); - - const __m128i *__restrict v = (const __m128i *)data; - const __m128i *__restrict const detent = - (const __m128i *)((const uint8_t *)data + len - 127); - - while (v < detent) { - __m128i v0 = _mm_loadu_si128(v + 0); - __m128i v1 = _mm_loadu_si128(v + 1); - __m128i v2 = _mm_loadu_si128(v + 2); - __m128i v3 = _mm_loadu_si128(v + 3); - __m128i v4 = _mm_loadu_si128(v + 4); - __m128i v5 = _mm_loadu_si128(v + 5); - __m128i v6 = _mm_loadu_si128(v + 6); - __m128i v7 = _mm_loadu_si128(v + 7); - - __m128i v0y = _mm_aesenc_si128(v0, y); - __m128i v2x6 = _mm_aesenc_si128(v2, _mm_xor_si128(x, v6)); - __m128i v45_67 = - _mm_xor_si128(_mm_aesenc_si128(v4, v5), _mm_add_epi64(v6, v7)); - - __m128i v0y7_1 = _mm_aesdec_si128(_mm_sub_epi64(v7, v0y), v1); - __m128i v2x6_3 = _mm_aesenc_si128(v2x6, v3); - - x = _mm_aesenc_si128(v45_67, _mm_add_epi64(x, y)); - y = _mm_aesenc_si128(v2x6_3, _mm_xor_si128(v0y7_1, v5)); - v += 8; - } - - if (len & 64) { - __m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++)); - __m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++)); - x = _mm_aesdec_si128(x, v0y); - y = _mm_aesdec_si128(y, v1x); - - __m128i v2y = _mm_add_epi64(y, _mm_loadu_si128(v++)); - __m128i v3x = _mm_sub_epi64(x, _mm_loadu_si128(v++)); - x = _mm_aesdec_si128(x, v2y); - y = _mm_aesdec_si128(y, v3x); - } - - if (len & 32) { - __m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++)); - __m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++)); - x = _mm_aesdec_si128(x, v0y); - y = _mm_aesdec_si128(y, v1x); - } - - if (len & 16) { - y = _mm_add_epi64(x, y); - x = _mm_aesdec_si128(x, _mm_loadu_si128(v++)); - } - - x = _mm_add_epi64(_mm_aesdec_si128(x, _mm_aesenc_si128(y, x)), y); -#if defined(__x86_64__) || defined(_M_X64) -#if defined(__SSE4_1__) || defined(__AVX__) - a = _mm_extract_epi64(x, 0); - b = _mm_extract_epi64(x, 1); -#else - a = _mm_cvtsi128_si64(x); - b = _mm_cvtsi128_si64(_mm_unpackhi_epi64(x, x)); -#endif -#else -#if defined(__SSE4_1__) || defined(__AVX__) - a = (uint32_t)_mm_extract_epi32(x, 0) | (uint64_t)_mm_extract_epi32(x, 1) - << 32; - b = (uint32_t)_mm_extract_epi32(x, 2) | (uint64_t)_mm_extract_epi32(x, 3) - << 32; -#else - a = (uint32_t)_mm_cvtsi128_si32(x); - a |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32; - x = _mm_unpackhi_epi64(x, x); - b = (uint32_t)_mm_cvtsi128_si32(x); - b |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32; -#endif -#endif -#ifdef __AVX__ - _mm256_zeroupper(); -#elif !(defined(_X86_64_) || defined(__x86_64__) || defined(_M_X64) || \ - defined(__e2k__)) - _mm_empty(); -#endif - data = v; - len &= 15; - } - - const uint64_t *v = (const uint64_t *)data; - switch (len) { - default: - mixup64(&a, &b, fetch64_le_unaligned(v++), prime_4); - /* fall through */ - case 24: - case 23: - case 22: - case 21: - case 20: - case 19: - case 18: - case 17: - mixup64(&b, &a, fetch64_le_unaligned(v++), prime_3); - /* fall through */ - case 16: - case 15: - case 14: - case 13: - case 12: - case 11: - case 10: - case 9: - mixup64(&a, &b, fetch64_le_unaligned(v++), prime_2); - /* fall through */ - case 8: - case 7: - case 6: - case 5: - case 4: - case 3: - case 2: - case 1: - mixup64(&b, &a, tail64_le_unaligned(v, len), prime_1); - /* fall through */ - case 0: - return final64(a, b); - } -} - -#endif /* T1HA0_AESNI_AVAILABLE */ -#undef T1HA_IA32AES_NAME + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#include "t1ha_bits.h" +#include "t1ha_selfcheck.h" + +#if T1HA0_AESNI_AVAILABLE + +uint64_t T1HA_IA32AES_NAME(const void *data, size_t len, uint64_t seed) { + uint64_t a = seed; + uint64_t b = len; + + if (unlikely(len > 32)) { + __m128i x = _mm_set_epi64x(a, b); + __m128i y = _mm_aesenc_si128(x, _mm_set_epi64x(prime_5, prime_6)); + + const __m128i *__restrict v = (const __m128i *)data; + const __m128i *__restrict const detent = + (const __m128i *)((const uint8_t *)data + len - 127); + + while (v < detent) { + __m128i v0 = _mm_loadu_si128(v + 0); + __m128i v1 = _mm_loadu_si128(v + 1); + __m128i v2 = _mm_loadu_si128(v + 2); + __m128i v3 = _mm_loadu_si128(v + 3); + __m128i v4 = _mm_loadu_si128(v + 4); + __m128i v5 = _mm_loadu_si128(v + 5); + __m128i v6 = _mm_loadu_si128(v + 6); + __m128i v7 = _mm_loadu_si128(v + 7); + + __m128i v0y = _mm_aesenc_si128(v0, y); + __m128i v2x6 = _mm_aesenc_si128(v2, _mm_xor_si128(x, v6)); + __m128i v45_67 = + _mm_xor_si128(_mm_aesenc_si128(v4, v5), _mm_add_epi64(v6, v7)); + + __m128i v0y7_1 = _mm_aesdec_si128(_mm_sub_epi64(v7, v0y), v1); + __m128i v2x6_3 = _mm_aesenc_si128(v2x6, v3); + + x = _mm_aesenc_si128(v45_67, _mm_add_epi64(x, y)); + y = _mm_aesenc_si128(v2x6_3, _mm_xor_si128(v0y7_1, v5)); + v += 8; + } + + if (len & 64) { + __m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++)); + __m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++)); + x = _mm_aesdec_si128(x, v0y); + y = _mm_aesdec_si128(y, v1x); + + __m128i v2y = _mm_add_epi64(y, _mm_loadu_si128(v++)); + __m128i v3x = _mm_sub_epi64(x, _mm_loadu_si128(v++)); + x = _mm_aesdec_si128(x, v2y); + y = _mm_aesdec_si128(y, v3x); + } + + if (len & 32) { + __m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++)); + __m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++)); + x = _mm_aesdec_si128(x, v0y); + y = _mm_aesdec_si128(y, v1x); + } + + if (len & 16) { + y = _mm_add_epi64(x, y); + x = _mm_aesdec_si128(x, _mm_loadu_si128(v++)); + } + + x = _mm_add_epi64(_mm_aesdec_si128(x, _mm_aesenc_si128(y, x)), y); +#if defined(__x86_64__) || defined(_M_X64) +#if defined(__SSE4_1__) || defined(__AVX__) + a = _mm_extract_epi64(x, 0); + b = _mm_extract_epi64(x, 1); +#else + a = _mm_cvtsi128_si64(x); + b = _mm_cvtsi128_si64(_mm_unpackhi_epi64(x, x)); +#endif +#else +#if defined(__SSE4_1__) || defined(__AVX__) + a = (uint32_t)_mm_extract_epi32(x, 0) | (uint64_t)_mm_extract_epi32(x, 1) + << 32; + b = (uint32_t)_mm_extract_epi32(x, 2) | (uint64_t)_mm_extract_epi32(x, 3) + << 32; +#else + a = (uint32_t)_mm_cvtsi128_si32(x); + a |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32; + x = _mm_unpackhi_epi64(x, x); + b = (uint32_t)_mm_cvtsi128_si32(x); + b |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32; +#endif +#endif +#ifdef __AVX__ + _mm256_zeroupper(); +#elif !(defined(_X86_64_) || defined(__x86_64__) || defined(_M_X64) || \ + defined(__e2k__)) + _mm_empty(); +#endif + data = v; + len &= 15; + } + + const uint64_t *v = (const uint64_t *)data; + switch (len) { + default: + mixup64(&a, &b, fetch64_le_unaligned(v++), prime_4); + /* fall through */ + case 24: + case 23: + case 22: + case 21: + case 20: + case 19: + case 18: + case 17: + mixup64(&b, &a, fetch64_le_unaligned(v++), prime_3); + /* fall through */ + case 16: + case 15: + case 14: + case 13: + case 12: + case 11: + case 10: + case 9: + mixup64(&a, &b, fetch64_le_unaligned(v++), prime_2); + /* fall through */ + case 8: + case 7: + case 6: + case 5: + case 4: + case 3: + case 2: + case 1: + mixup64(&b, &a, tail64_le_unaligned(v, len), prime_1); + /* fall through */ + case 0: + return final64(a, b); + } +} + +#endif /* T1HA0_AESNI_AVAILABLE */ +#undef T1HA_IA32AES_NAME diff --git a/contrib/libs/t1ha/src/t1ha0_ia32aes_avx.c b/contrib/libs/t1ha/src/t1ha0_ia32aes_avx.c index a19e7d9b4a..a344bfd98c 100644 --- a/contrib/libs/t1ha/src/t1ha0_ia32aes_avx.c +++ b/contrib/libs/t1ha/src/t1ha0_ia32aes_avx.c @@ -1,4 +1,4 @@ -#ifndef T1HA0_DISABLED -#define T1HA_IA32AES_NAME t1ha0_ia32aes_avx -#include "t1ha0_ia32aes_a.h" -#endif /* T1HA0_DISABLED */ +#ifndef T1HA0_DISABLED +#define T1HA_IA32AES_NAME t1ha0_ia32aes_avx +#include "t1ha0_ia32aes_a.h" +#endif /* T1HA0_DISABLED */ diff --git a/contrib/libs/t1ha/src/t1ha0_ia32aes_avx2.c b/contrib/libs/t1ha/src/t1ha0_ia32aes_avx2.c index cd00f29290..a0b3a2d142 100644 --- a/contrib/libs/t1ha/src/t1ha0_ia32aes_avx2.c +++ b/contrib/libs/t1ha/src/t1ha0_ia32aes_avx2.c @@ -1,4 +1,4 @@ -#ifndef T1HA0_DISABLED -#define T1HA_IA32AES_NAME t1ha0_ia32aes_avx2 -#include "t1ha0_ia32aes_b.h" -#endif /* T1HA0_DISABLED */ +#ifndef T1HA0_DISABLED +#define T1HA_IA32AES_NAME t1ha0_ia32aes_avx2 +#include "t1ha0_ia32aes_b.h" +#endif /* T1HA0_DISABLED */ diff --git a/contrib/libs/t1ha/src/t1ha0_ia32aes_b.h b/contrib/libs/t1ha/src/t1ha0_ia32aes_b.h index 9f63476c77..f8759dde82 100644 --- a/contrib/libs/t1ha/src/t1ha0_ia32aes_b.h +++ b/contrib/libs/t1ha/src/t1ha0_ia32aes_b.h @@ -1,167 +1,167 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#include "t1ha_bits.h" -#include "t1ha_selfcheck.h" - -#if T1HA0_AESNI_AVAILABLE - -uint64_t T1HA_IA32AES_NAME(const void *data, size_t len, uint64_t seed) { - uint64_t a = seed; - uint64_t b = len; - - if (unlikely(len > 32)) { - __m128i x = _mm_set_epi64x(a, b); - __m128i y = _mm_aesenc_si128(x, _mm_set_epi64x(prime_0, prime_1)); - - const __m128i *v = (const __m128i *)data; - const __m128i *const detent = - (const __m128i *)((const uint8_t *)data + (len & ~15ul)); - data = detent; - - if (len & 16) { - x = _mm_add_epi64(x, _mm_loadu_si128(v++)); - y = _mm_aesenc_si128(x, y); - } - len &= 15; - - if (v + 7 < detent) { - __m128i salt = y; - do { - __m128i t = _mm_aesenc_si128(_mm_loadu_si128(v++), salt); - t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); - t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); - t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); - - t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); - t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); - t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); - t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); - - salt = _mm_add_epi64(salt, _mm_set_epi64x(prime_5, prime_6)); - t = _mm_aesenc_si128(x, t); - x = _mm_add_epi64(y, x); - y = t; - } while (v + 7 < detent); - } - - while (v < detent) { - __m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++)); - __m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++)); - x = _mm_aesdec_si128(x, v0y); - y = _mm_aesdec_si128(y, v1x); - } - - x = _mm_add_epi64(_mm_aesdec_si128(x, _mm_aesenc_si128(y, x)), y); -#if defined(__x86_64__) || defined(_M_X64) -#if defined(__SSE4_1__) || defined(__AVX__) - a = _mm_extract_epi64(x, 0); - b = _mm_extract_epi64(x, 1); -#else - a = _mm_cvtsi128_si64(x); - b = _mm_cvtsi128_si64(_mm_unpackhi_epi64(x, x)); -#endif -#else -#if defined(__SSE4_1__) || defined(__AVX__) - a = (uint32_t)_mm_extract_epi32(x, 0) | (uint64_t)_mm_extract_epi32(x, 1) - << 32; - b = (uint32_t)_mm_extract_epi32(x, 2) | (uint64_t)_mm_extract_epi32(x, 3) - << 32; -#else - a = (uint32_t)_mm_cvtsi128_si32(x); - a |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32; - x = _mm_unpackhi_epi64(x, x); - b = (uint32_t)_mm_cvtsi128_si32(x); - b |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32; -#endif -#endif -#ifdef __AVX__ - _mm256_zeroupper(); -#elif !(defined(_X86_64_) || defined(__x86_64__) || defined(_M_X64) || \ - defined(__e2k__)) - _mm_empty(); -#endif - } - - const uint64_t *v = (const uint64_t *)data; - switch (len) { - default: - mixup64(&a, &b, fetch64_le_unaligned(v++), prime_4); - /* fall through */ - case 24: - case 23: - case 22: - case 21: - case 20: - case 19: - case 18: - case 17: - mixup64(&b, &a, fetch64_le_unaligned(v++), prime_3); - /* fall through */ - case 16: - case 15: - case 14: - case 13: - case 12: - case 11: - case 10: - case 9: - mixup64(&a, &b, fetch64_le_unaligned(v++), prime_2); - /* fall through */ - case 8: - case 7: - case 6: - case 5: - case 4: - case 3: - case 2: - case 1: - mixup64(&b, &a, tail64_le_unaligned(v, len), prime_1); - /* fall through */ - case 0: - return final64(a, b); - } -} - -#endif /* T1HA0_AESNI_AVAILABLE */ -#undef T1HA_IA32AES_NAME + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#include "t1ha_bits.h" +#include "t1ha_selfcheck.h" + +#if T1HA0_AESNI_AVAILABLE + +uint64_t T1HA_IA32AES_NAME(const void *data, size_t len, uint64_t seed) { + uint64_t a = seed; + uint64_t b = len; + + if (unlikely(len > 32)) { + __m128i x = _mm_set_epi64x(a, b); + __m128i y = _mm_aesenc_si128(x, _mm_set_epi64x(prime_0, prime_1)); + + const __m128i *v = (const __m128i *)data; + const __m128i *const detent = + (const __m128i *)((const uint8_t *)data + (len & ~15ul)); + data = detent; + + if (len & 16) { + x = _mm_add_epi64(x, _mm_loadu_si128(v++)); + y = _mm_aesenc_si128(x, y); + } + len &= 15; + + if (v + 7 < detent) { + __m128i salt = y; + do { + __m128i t = _mm_aesenc_si128(_mm_loadu_si128(v++), salt); + t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); + t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); + t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); + + t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); + t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); + t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); + t = _mm_aesdec_si128(t, _mm_loadu_si128(v++)); + + salt = _mm_add_epi64(salt, _mm_set_epi64x(prime_5, prime_6)); + t = _mm_aesenc_si128(x, t); + x = _mm_add_epi64(y, x); + y = t; + } while (v + 7 < detent); + } + + while (v < detent) { + __m128i v0y = _mm_add_epi64(y, _mm_loadu_si128(v++)); + __m128i v1x = _mm_sub_epi64(x, _mm_loadu_si128(v++)); + x = _mm_aesdec_si128(x, v0y); + y = _mm_aesdec_si128(y, v1x); + } + + x = _mm_add_epi64(_mm_aesdec_si128(x, _mm_aesenc_si128(y, x)), y); +#if defined(__x86_64__) || defined(_M_X64) +#if defined(__SSE4_1__) || defined(__AVX__) + a = _mm_extract_epi64(x, 0); + b = _mm_extract_epi64(x, 1); +#else + a = _mm_cvtsi128_si64(x); + b = _mm_cvtsi128_si64(_mm_unpackhi_epi64(x, x)); +#endif +#else +#if defined(__SSE4_1__) || defined(__AVX__) + a = (uint32_t)_mm_extract_epi32(x, 0) | (uint64_t)_mm_extract_epi32(x, 1) + << 32; + b = (uint32_t)_mm_extract_epi32(x, 2) | (uint64_t)_mm_extract_epi32(x, 3) + << 32; +#else + a = (uint32_t)_mm_cvtsi128_si32(x); + a |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32; + x = _mm_unpackhi_epi64(x, x); + b = (uint32_t)_mm_cvtsi128_si32(x); + b |= (uint64_t)_mm_cvtsi128_si32(_mm_shuffle_epi32(x, 1)) << 32; +#endif +#endif +#ifdef __AVX__ + _mm256_zeroupper(); +#elif !(defined(_X86_64_) || defined(__x86_64__) || defined(_M_X64) || \ + defined(__e2k__)) + _mm_empty(); +#endif + } + + const uint64_t *v = (const uint64_t *)data; + switch (len) { + default: + mixup64(&a, &b, fetch64_le_unaligned(v++), prime_4); + /* fall through */ + case 24: + case 23: + case 22: + case 21: + case 20: + case 19: + case 18: + case 17: + mixup64(&b, &a, fetch64_le_unaligned(v++), prime_3); + /* fall through */ + case 16: + case 15: + case 14: + case 13: + case 12: + case 11: + case 10: + case 9: + mixup64(&a, &b, fetch64_le_unaligned(v++), prime_2); + /* fall through */ + case 8: + case 7: + case 6: + case 5: + case 4: + case 3: + case 2: + case 1: + mixup64(&b, &a, tail64_le_unaligned(v, len), prime_1); + /* fall through */ + case 0: + return final64(a, b); + } +} + +#endif /* T1HA0_AESNI_AVAILABLE */ +#undef T1HA_IA32AES_NAME diff --git a/contrib/libs/t1ha/src/t1ha0_ia32aes_noavx.c b/contrib/libs/t1ha/src/t1ha0_ia32aes_noavx.c index 250157b37d..fb6489fbff 100644 --- a/contrib/libs/t1ha/src/t1ha0_ia32aes_noavx.c +++ b/contrib/libs/t1ha/src/t1ha0_ia32aes_noavx.c @@ -1,4 +1,4 @@ -#ifndef T1HA0_DISABLED -#define T1HA_IA32AES_NAME t1ha0_ia32aes_noavx -#include "t1ha0_ia32aes_a.h" -#endif /* T1HA0_DISABLED */ +#ifndef T1HA0_DISABLED +#define T1HA_IA32AES_NAME t1ha0_ia32aes_noavx +#include "t1ha0_ia32aes_a.h" +#endif /* T1HA0_DISABLED */ diff --git a/contrib/libs/t1ha/src/t1ha0_selfcheck.c b/contrib/libs/t1ha/src/t1ha0_selfcheck.c index 0230300b1f..d3c8e9a3fd 100644 --- a/contrib/libs/t1ha/src/t1ha0_selfcheck.c +++ b/contrib/libs/t1ha/src/t1ha0_selfcheck.c @@ -1,204 +1,204 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#ifndef T1HA0_DISABLED -#include "t1ha_bits.h" -#include "t1ha_selfcheck.h" - -/* *INDENT-OFF* */ -/* clang-format off */ - -const uint64_t t1ha_refval_32le[81] = { 0, - 0xC92229C10FAEA50E, 0x3DF1354B0DFDC443, 0x968F016D60417BB3, 0x85AAFB50C6DA770F, - 0x66CCE3BB6842C7D6, 0xDDAA39C11537C226, 0x35958D281F0C9C8C, 0x8C5D64B091DE608E, - 0x4094DF680D39786B, 0x1014F4AA2A2EDF4D, 0x39D21891615AA310, 0x7EF51F67C398C7C4, - 0x06163990DDBF319D, 0xE229CAA00C8D6F3F, 0xD2240B4B0D54E0F5, 0xEA2E7E905DDEAF94, - 0x8D4F8A887183A5CE, 0x44337F9A63C5820C, 0x94938D1E86A9B797, 0x96E9CABA5CA210CC, - 0x6EFBB9CC9E8F7708, 0x3D12EA0282FB8BBC, 0x5DA781EE205A2C48, 0xFA4A51A12677FE12, - 0x81D5F04E20660B28, 0x57258D043BCD3841, 0x5C9BEB62059C1ED2, 0x57A02162F9034B33, - 0xBA2A13E457CE19B8, 0xE593263BF9451F3A, 0x0BC1175539606BC5, 0xA3E2929E9C5F289F, - 0x86BDBD06835E35F7, 0xA180950AB48BAADC, 0x7812C994D9924028, 0x308366011415F46B, - 0x77FE9A9991C5F959, 0x925C340B70B0B1E3, 0xCD9C5BA4C41E2E10, 0x7CC4E7758B94CD93, - 0x898B235962EA4625, 0xD7E3E5BF22893286, 0x396F4CDD33056C64, 0x740AB2E32F17CD9F, - 0x60D12FF9CD15B321, 0xBEE3A6C9903A81D8, 0xB47040913B33C35E, 0x19EE8C2ACC013CFF, - 0x5DEC94C5783B55C4, 0x78DC122D562C5F1D, 0x6520F008DA1C181E, 0x77CAF155A36EBF7C, - 0x0A09E02BDB883CA6, 0xFD5D9ADA7E3FB895, 0xC6F5FDD9EEAB83B5, 0x84589BB29F52A92A, - 0x9B2517F13F8E9814, 0x6F752AF6A52E31EC, 0x8E717799E324CE8A, 0x84D90AEF39262D58, - 0x79C27B13FC28944D, 0xE6D6DF6438E0044A, 0x51B603E400D79CA4, 0x6A902B28C588B390, - 0x8D7F8DE9E6CB1D83, 0xCF1A4DC11CA7F044, 0xEF02E43C366786F1, 0x89915BCDBCFBE30F, - 0x5928B306F1A9CC7F, 0xA8B59092996851C5, 0x22050A20427E8B25, 0x6E6D64018941E7EE, - 0x9798C898B81AE846, 0x80EF218CDC30124A, 0xFCE45E60D55B0284, 0x4010E735D3147C35, - 0xEB647D999FD8DC7E, 0xD3544DCAB14FE907, 0xB588B27D8438700C, 0xA49EBFC43E057A4C -}; - -const uint64_t t1ha_refval_32be[81] = { 0, - 0xC92229C10FAEA50E, 0x0FE212630DD87E0F, 0x968F016D60417BB3, 0xE6B12B2C889913AB, - 0xAA3787887A9DA368, 0x06EE7202D53CEF39, 0x6149AFB2C296664B, 0x86C893210F9A5805, - 0x8379E5DA988AA04C, 0x24763AA7CE411A60, 0x9CF9C64B395A4CF8, 0xFFC192C338DDE904, - 0x094575BAB319E5F5, 0xBBBACFE7728C6511, 0x36B8C3CEBE4EF409, 0xAA0BA8A3397BA4D0, - 0xF9F85CF7124EE653, 0x3ADF4F7DF2A887AE, 0xAA2A0F5964AA9A7A, 0xF18B563F42D36EB8, - 0x034366CEF8334F5C, 0xAE2E85180E330E5F, 0xA5CE9FBFDF5C65B8, 0x5E509F25A9CA9B0B, - 0xE30D1358C2013BD2, 0xBB3A04D5EB8111FE, 0xB04234E82A15A28D, 0x87426A56D0EA0E2F, - 0x095086668E07F9F8, 0xF4CD3A43B6A6AEA5, 0x73F9B9B674D472A6, 0x558344229A1E4DCF, - 0x0AD4C95B2279181A, 0x5E3D19D80821CA6B, 0x652492D25BEBA258, 0xEFA84B02EAB849B1, - 0x81AD2D253059AC2C, 0x1400CCB0DFB2F457, 0x5688DC72A839860E, 0x67CC130E0FD1B0A7, - 0x0A851E3A94E21E69, 0x2EA0000B6A073907, 0xAE9776FF9BF1D02E, 0xC0A96B66B160631C, - 0xA93341DE4ED7C8F0, 0x6FBADD8F5B85E141, 0xB7D295F1C21E0CBA, 0x6D6114591B8E434F, - 0xF5B6939B63D97BE7, 0x3C80D5053F0E5DB4, 0xAC520ACC6B73F62D, 0xD1051F5841CF3966, - 0x62245AEA644AE760, 0x0CD56BE15497C62D, 0x5BB93435C4988FB6, 0x5FADB88EB18DB512, - 0xC897CAE2242475CC, 0xF1A094EF846DC9BB, 0x2B1D8B24924F79B6, 0xC6DF0C0E8456EB53, - 0xE6A40128303A9B9C, 0x64D37AF5EFFA7BD9, 0x90FEB70A5AE2A598, 0xEC3BA5F126D9FF4B, - 0x3121C8EC3AC51B29, 0x3B41C4D422166EC1, 0xB4878DDCBF48ED76, 0x5CB850D77CB762E4, - 0x9A27A43CC1DD171F, 0x2FDFFC6F99CB424A, 0xF54A57E09FDEA7BB, 0x5F78E5EE2CAB7039, - 0xB8BA95883DB31CBA, 0x131C61EB84AF86C3, 0x84B1F64E9C613DA7, 0xE94C1888C0C37C02, - 0xEA08F8BFB2039CDE, 0xCCC6D04D243EC753, 0x8977D105298B0629, 0x7AAA976494A5905E -}; - -#if T1HA0_AESNI_AVAILABLE -const uint64_t t1ha_refval_ia32aes_a[81] = { 0, - 0x772C7311BE32FF42, 0xB231AC660E5B23B5, 0x71F6DF5DA3B4F532, 0x555859635365F660, - 0xE98808F1CD39C626, 0x2EB18FAF2163BB09, 0x7B9DD892C8019C87, 0xE2B1431C4DA4D15A, - 0x1984E718A5477F70, 0x08DD17B266484F79, 0x4C83A05D766AD550, 0x92DCEBB131D1907D, - 0xD67BC6FC881B8549, 0xF6A9886555FBF66B, 0x6E31616D7F33E25E, 0x36E31B7426E3049D, - 0x4F8E4FAF46A13F5F, 0x03EB0CB3253F819F, 0x636A7769905770D2, 0x3ADF3781D16D1148, - 0x92D19CB1818BC9C2, 0x283E68F4D459C533, 0xFA83A8A88DECAA04, 0x8C6F00368EAC538C, - 0x7B66B0CF3797B322, 0x5131E122FDABA3FF, 0x6E59FF515C08C7A9, 0xBA2C5269B2C377B0, - 0xA9D24FD368FE8A2B, 0x22DB13D32E33E891, 0x7B97DFC804B876E5, 0xC598BDFCD0E834F9, - 0xB256163D3687F5A7, 0x66D7A73C6AEF50B3, 0xBB34C6A4396695D2, 0x7F46E1981C3256AD, - 0x4B25A9B217A6C5B4, 0x7A0A6BCDD2321DA9, 0x0A1F55E690A7B44E, 0x8F451A91D7F05244, - 0x624D5D3C9B9800A7, 0x09DDC2B6409DDC25, 0x3E155765865622B6, 0x96519FAC9511B381, - 0x512E58482FE4FBF0, 0x1AB260EA7D54AE1C, 0x67976F12CC28BBBD, 0x0607B5B2E6250156, - 0x7E700BEA717AD36E, 0x06A058D9D61CABB3, 0x57DA5324A824972F, 0x1193BA74DBEBF7E7, - 0xC18DC3140E7002D4, 0x9F7CCC11DFA0EF17, 0xC487D6C20666A13A, 0xB67190E4B50EF0C8, - 0xA53DAA608DF0B9A5, 0x7E13101DE87F9ED3, 0x7F8955AE2F05088B, 0x2DF7E5A097AD383F, - 0xF027683A21EA14B5, 0x9BB8AEC3E3360942, 0x92BE39B54967E7FE, 0x978C6D332E7AFD27, - 0xED512FE96A4FAE81, 0x9E1099B8140D7BA3, 0xDFD5A5BE1E6FE9A6, 0x1D82600E23B66DD4, - 0x3FA3C3B7EE7B52CE, 0xEE84F7D2A655EF4C, 0x2A4361EC769E3BEB, 0x22E4B38916636702, - 0x0063096F5D39A115, 0x6C51B24DAAFA5434, 0xBAFB1DB1B411E344, 0xFF529F161AE0C4B0, - 0x1290EAE3AC0A686F, 0xA7B0D4585447D1BE, 0xAED3D18CB6CCAD53, 0xFC73D46F8B41BEC6 -}; - -const uint64_t t1ha_refval_ia32aes_b[81] = { 0, - 0x772C7311BE32FF42, 0x4398F62A8CB6F72A, 0x71F6DF5DA3B4F532, 0x555859635365F660, - 0xE98808F1CD39C626, 0x2EB18FAF2163BB09, 0x7B9DD892C8019C87, 0xE2B1431C4DA4D15A, - 0x1984E718A5477F70, 0x08DD17B266484F79, 0x4C83A05D766AD550, 0x92DCEBB131D1907D, - 0xD67BC6FC881B8549, 0xF6A9886555FBF66B, 0x6E31616D7F33E25E, 0x36E31B7426E3049D, - 0x4F8E4FAF46A13F5F, 0x03EB0CB3253F819F, 0x636A7769905770D2, 0x3ADF3781D16D1148, - 0x92D19CB1818BC9C2, 0x283E68F4D459C533, 0xFA83A8A88DECAA04, 0x8C6F00368EAC538C, - 0x7B66B0CF3797B322, 0x5131E122FDABA3FF, 0x6E59FF515C08C7A9, 0xBA2C5269B2C377B0, - 0xA9D24FD368FE8A2B, 0x22DB13D32E33E891, 0x7B97DFC804B876E5, 0xC598BDFCD0E834F9, - 0xB256163D3687F5A7, 0x66D7A73C6AEF50B3, 0xE810F88E85CEA11A, 0x4814F8F3B83E4394, - 0x9CABA22D10A2F690, 0x0D10032511F58111, 0xE9A36EF5EEA3CD58, 0xC79242DE194D9D7C, - 0xC3871AA0435EE5C8, 0x52890BED43CCF4CD, 0x07A1D0861ACCD373, 0x227B816FF0FEE9ED, - 0x59FFBF73AACFC0C4, 0x09AB564F2BEDAD0C, 0xC05F744F2EE38318, 0x7B50B621D547C661, - 0x0C1F71CB4E68E5D1, 0x0E33A47881D4DBAA, 0xF5C3BF198E9A7C2E, 0x16328FD8C0F68A91, - 0xA3E399C9AB3E9A59, 0x163AE71CBCBB18B8, 0x18F17E4A8C79F7AB, 0x9250E2EA37014B45, - 0x7BBBB111D60B03E4, 0x3DAA4A3071A0BD88, 0xA28828D790A2D6DC, 0xBC70FC88F64BE3F1, - 0xA3E48008BA4333C7, 0x739E435ACAFC79F7, 0x42BBB360BE007CC6, 0x4FFB6FD2AF74EC92, - 0x2A799A2994673146, 0xBE0A045B69D48E9F, 0x549432F54FC6A278, 0x371D3C60369FC702, - 0xDB4557D415B08CA7, 0xE8692F0A83850B37, 0x022E46AEB36E9AAB, 0x117AC9B814E4652D, - 0xA361041267AE9048, 0x277CB51C961C3DDA, 0xAFFC96F377CB8A8D, 0x83CC79FA01DD1BA7, - 0xA494842ACF4B802C, 0xFC6D9CDDE2C34A3F, 0x4ED6863CE455F7A7, 0x630914D0DB7AAE98 -}; -#endif /* T1HA0_AESNI_AVAILABLE */ - -/* *INDENT-ON* */ -/* clang-format on */ - -__cold int t1ha_selfcheck__t1ha0_32le(void) { - return t1ha_selfcheck(t1ha0_32le, t1ha_refval_32le); -} - -__cold int t1ha_selfcheck__t1ha0_32be(void) { - return t1ha_selfcheck(t1ha0_32be, t1ha_refval_32be); -} - -#if T1HA0_AESNI_AVAILABLE -__cold int t1ha_selfcheck__t1ha0_ia32aes_noavx(void) { - return t1ha_selfcheck(t1ha0_ia32aes_noavx, t1ha_refval_ia32aes_a); -} - -__cold int t1ha_selfcheck__t1ha0_ia32aes_avx(void) { - return t1ha_selfcheck(t1ha0_ia32aes_avx, t1ha_refval_ia32aes_a); -} - -#ifndef __e2k__ -__cold int t1ha_selfcheck__t1ha0_ia32aes_avx2(void) { - return t1ha_selfcheck(t1ha0_ia32aes_avx2, t1ha_refval_ia32aes_b); -} -#endif /* ! __e2k__ */ -#endif /* if T1HA0_AESNI_AVAILABLE */ - -__cold int t1ha_selfcheck__t1ha0(void) { - int rc = t1ha_selfcheck__t1ha0_32le() | t1ha_selfcheck__t1ha0_32be(); - -#if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \ - (!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED)) -#if defined(T1HA1_DISABLED) - rc |= t1ha_selfcheck__t1ha2(); -#else - rc |= t1ha_selfcheck__t1ha1(); -#endif /* T1HA1_DISABLED */ -#endif /* 32/64 */ - -#if T1HA0_AESNI_AVAILABLE -#ifdef __e2k__ - rc |= t1ha_selfcheck__t1ha0_ia32aes_noavx(); - rc |= t1ha_selfcheck__t1ha0_ia32aes_avx(); -#else - uint64_t features = t1ha_ia32cpu_features(); - if (t1ha_ia32_AESNI_avail(features)) { - rc |= t1ha_selfcheck__t1ha0_ia32aes_noavx(); - if (t1ha_ia32_AVX_avail(features)) { - rc |= t1ha_selfcheck__t1ha0_ia32aes_avx(); - if (t1ha_ia32_AVX2_avail(features)) - rc |= t1ha_selfcheck__t1ha0_ia32aes_avx2(); - } - } -#endif /* __e2k__ */ -#endif /* T1HA0_AESNI_AVAILABLE */ - - return rc; -} - -#endif /* T1HA0_DISABLED */ + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#ifndef T1HA0_DISABLED +#include "t1ha_bits.h" +#include "t1ha_selfcheck.h" + +/* *INDENT-OFF* */ +/* clang-format off */ + +const uint64_t t1ha_refval_32le[81] = { 0, + 0xC92229C10FAEA50E, 0x3DF1354B0DFDC443, 0x968F016D60417BB3, 0x85AAFB50C6DA770F, + 0x66CCE3BB6842C7D6, 0xDDAA39C11537C226, 0x35958D281F0C9C8C, 0x8C5D64B091DE608E, + 0x4094DF680D39786B, 0x1014F4AA2A2EDF4D, 0x39D21891615AA310, 0x7EF51F67C398C7C4, + 0x06163990DDBF319D, 0xE229CAA00C8D6F3F, 0xD2240B4B0D54E0F5, 0xEA2E7E905DDEAF94, + 0x8D4F8A887183A5CE, 0x44337F9A63C5820C, 0x94938D1E86A9B797, 0x96E9CABA5CA210CC, + 0x6EFBB9CC9E8F7708, 0x3D12EA0282FB8BBC, 0x5DA781EE205A2C48, 0xFA4A51A12677FE12, + 0x81D5F04E20660B28, 0x57258D043BCD3841, 0x5C9BEB62059C1ED2, 0x57A02162F9034B33, + 0xBA2A13E457CE19B8, 0xE593263BF9451F3A, 0x0BC1175539606BC5, 0xA3E2929E9C5F289F, + 0x86BDBD06835E35F7, 0xA180950AB48BAADC, 0x7812C994D9924028, 0x308366011415F46B, + 0x77FE9A9991C5F959, 0x925C340B70B0B1E3, 0xCD9C5BA4C41E2E10, 0x7CC4E7758B94CD93, + 0x898B235962EA4625, 0xD7E3E5BF22893286, 0x396F4CDD33056C64, 0x740AB2E32F17CD9F, + 0x60D12FF9CD15B321, 0xBEE3A6C9903A81D8, 0xB47040913B33C35E, 0x19EE8C2ACC013CFF, + 0x5DEC94C5783B55C4, 0x78DC122D562C5F1D, 0x6520F008DA1C181E, 0x77CAF155A36EBF7C, + 0x0A09E02BDB883CA6, 0xFD5D9ADA7E3FB895, 0xC6F5FDD9EEAB83B5, 0x84589BB29F52A92A, + 0x9B2517F13F8E9814, 0x6F752AF6A52E31EC, 0x8E717799E324CE8A, 0x84D90AEF39262D58, + 0x79C27B13FC28944D, 0xE6D6DF6438E0044A, 0x51B603E400D79CA4, 0x6A902B28C588B390, + 0x8D7F8DE9E6CB1D83, 0xCF1A4DC11CA7F044, 0xEF02E43C366786F1, 0x89915BCDBCFBE30F, + 0x5928B306F1A9CC7F, 0xA8B59092996851C5, 0x22050A20427E8B25, 0x6E6D64018941E7EE, + 0x9798C898B81AE846, 0x80EF218CDC30124A, 0xFCE45E60D55B0284, 0x4010E735D3147C35, + 0xEB647D999FD8DC7E, 0xD3544DCAB14FE907, 0xB588B27D8438700C, 0xA49EBFC43E057A4C +}; + +const uint64_t t1ha_refval_32be[81] = { 0, + 0xC92229C10FAEA50E, 0x0FE212630DD87E0F, 0x968F016D60417BB3, 0xE6B12B2C889913AB, + 0xAA3787887A9DA368, 0x06EE7202D53CEF39, 0x6149AFB2C296664B, 0x86C893210F9A5805, + 0x8379E5DA988AA04C, 0x24763AA7CE411A60, 0x9CF9C64B395A4CF8, 0xFFC192C338DDE904, + 0x094575BAB319E5F5, 0xBBBACFE7728C6511, 0x36B8C3CEBE4EF409, 0xAA0BA8A3397BA4D0, + 0xF9F85CF7124EE653, 0x3ADF4F7DF2A887AE, 0xAA2A0F5964AA9A7A, 0xF18B563F42D36EB8, + 0x034366CEF8334F5C, 0xAE2E85180E330E5F, 0xA5CE9FBFDF5C65B8, 0x5E509F25A9CA9B0B, + 0xE30D1358C2013BD2, 0xBB3A04D5EB8111FE, 0xB04234E82A15A28D, 0x87426A56D0EA0E2F, + 0x095086668E07F9F8, 0xF4CD3A43B6A6AEA5, 0x73F9B9B674D472A6, 0x558344229A1E4DCF, + 0x0AD4C95B2279181A, 0x5E3D19D80821CA6B, 0x652492D25BEBA258, 0xEFA84B02EAB849B1, + 0x81AD2D253059AC2C, 0x1400CCB0DFB2F457, 0x5688DC72A839860E, 0x67CC130E0FD1B0A7, + 0x0A851E3A94E21E69, 0x2EA0000B6A073907, 0xAE9776FF9BF1D02E, 0xC0A96B66B160631C, + 0xA93341DE4ED7C8F0, 0x6FBADD8F5B85E141, 0xB7D295F1C21E0CBA, 0x6D6114591B8E434F, + 0xF5B6939B63D97BE7, 0x3C80D5053F0E5DB4, 0xAC520ACC6B73F62D, 0xD1051F5841CF3966, + 0x62245AEA644AE760, 0x0CD56BE15497C62D, 0x5BB93435C4988FB6, 0x5FADB88EB18DB512, + 0xC897CAE2242475CC, 0xF1A094EF846DC9BB, 0x2B1D8B24924F79B6, 0xC6DF0C0E8456EB53, + 0xE6A40128303A9B9C, 0x64D37AF5EFFA7BD9, 0x90FEB70A5AE2A598, 0xEC3BA5F126D9FF4B, + 0x3121C8EC3AC51B29, 0x3B41C4D422166EC1, 0xB4878DDCBF48ED76, 0x5CB850D77CB762E4, + 0x9A27A43CC1DD171F, 0x2FDFFC6F99CB424A, 0xF54A57E09FDEA7BB, 0x5F78E5EE2CAB7039, + 0xB8BA95883DB31CBA, 0x131C61EB84AF86C3, 0x84B1F64E9C613DA7, 0xE94C1888C0C37C02, + 0xEA08F8BFB2039CDE, 0xCCC6D04D243EC753, 0x8977D105298B0629, 0x7AAA976494A5905E +}; + +#if T1HA0_AESNI_AVAILABLE +const uint64_t t1ha_refval_ia32aes_a[81] = { 0, + 0x772C7311BE32FF42, 0xB231AC660E5B23B5, 0x71F6DF5DA3B4F532, 0x555859635365F660, + 0xE98808F1CD39C626, 0x2EB18FAF2163BB09, 0x7B9DD892C8019C87, 0xE2B1431C4DA4D15A, + 0x1984E718A5477F70, 0x08DD17B266484F79, 0x4C83A05D766AD550, 0x92DCEBB131D1907D, + 0xD67BC6FC881B8549, 0xF6A9886555FBF66B, 0x6E31616D7F33E25E, 0x36E31B7426E3049D, + 0x4F8E4FAF46A13F5F, 0x03EB0CB3253F819F, 0x636A7769905770D2, 0x3ADF3781D16D1148, + 0x92D19CB1818BC9C2, 0x283E68F4D459C533, 0xFA83A8A88DECAA04, 0x8C6F00368EAC538C, + 0x7B66B0CF3797B322, 0x5131E122FDABA3FF, 0x6E59FF515C08C7A9, 0xBA2C5269B2C377B0, + 0xA9D24FD368FE8A2B, 0x22DB13D32E33E891, 0x7B97DFC804B876E5, 0xC598BDFCD0E834F9, + 0xB256163D3687F5A7, 0x66D7A73C6AEF50B3, 0xBB34C6A4396695D2, 0x7F46E1981C3256AD, + 0x4B25A9B217A6C5B4, 0x7A0A6BCDD2321DA9, 0x0A1F55E690A7B44E, 0x8F451A91D7F05244, + 0x624D5D3C9B9800A7, 0x09DDC2B6409DDC25, 0x3E155765865622B6, 0x96519FAC9511B381, + 0x512E58482FE4FBF0, 0x1AB260EA7D54AE1C, 0x67976F12CC28BBBD, 0x0607B5B2E6250156, + 0x7E700BEA717AD36E, 0x06A058D9D61CABB3, 0x57DA5324A824972F, 0x1193BA74DBEBF7E7, + 0xC18DC3140E7002D4, 0x9F7CCC11DFA0EF17, 0xC487D6C20666A13A, 0xB67190E4B50EF0C8, + 0xA53DAA608DF0B9A5, 0x7E13101DE87F9ED3, 0x7F8955AE2F05088B, 0x2DF7E5A097AD383F, + 0xF027683A21EA14B5, 0x9BB8AEC3E3360942, 0x92BE39B54967E7FE, 0x978C6D332E7AFD27, + 0xED512FE96A4FAE81, 0x9E1099B8140D7BA3, 0xDFD5A5BE1E6FE9A6, 0x1D82600E23B66DD4, + 0x3FA3C3B7EE7B52CE, 0xEE84F7D2A655EF4C, 0x2A4361EC769E3BEB, 0x22E4B38916636702, + 0x0063096F5D39A115, 0x6C51B24DAAFA5434, 0xBAFB1DB1B411E344, 0xFF529F161AE0C4B0, + 0x1290EAE3AC0A686F, 0xA7B0D4585447D1BE, 0xAED3D18CB6CCAD53, 0xFC73D46F8B41BEC6 +}; + +const uint64_t t1ha_refval_ia32aes_b[81] = { 0, + 0x772C7311BE32FF42, 0x4398F62A8CB6F72A, 0x71F6DF5DA3B4F532, 0x555859635365F660, + 0xE98808F1CD39C626, 0x2EB18FAF2163BB09, 0x7B9DD892C8019C87, 0xE2B1431C4DA4D15A, + 0x1984E718A5477F70, 0x08DD17B266484F79, 0x4C83A05D766AD550, 0x92DCEBB131D1907D, + 0xD67BC6FC881B8549, 0xF6A9886555FBF66B, 0x6E31616D7F33E25E, 0x36E31B7426E3049D, + 0x4F8E4FAF46A13F5F, 0x03EB0CB3253F819F, 0x636A7769905770D2, 0x3ADF3781D16D1148, + 0x92D19CB1818BC9C2, 0x283E68F4D459C533, 0xFA83A8A88DECAA04, 0x8C6F00368EAC538C, + 0x7B66B0CF3797B322, 0x5131E122FDABA3FF, 0x6E59FF515C08C7A9, 0xBA2C5269B2C377B0, + 0xA9D24FD368FE8A2B, 0x22DB13D32E33E891, 0x7B97DFC804B876E5, 0xC598BDFCD0E834F9, + 0xB256163D3687F5A7, 0x66D7A73C6AEF50B3, 0xE810F88E85CEA11A, 0x4814F8F3B83E4394, + 0x9CABA22D10A2F690, 0x0D10032511F58111, 0xE9A36EF5EEA3CD58, 0xC79242DE194D9D7C, + 0xC3871AA0435EE5C8, 0x52890BED43CCF4CD, 0x07A1D0861ACCD373, 0x227B816FF0FEE9ED, + 0x59FFBF73AACFC0C4, 0x09AB564F2BEDAD0C, 0xC05F744F2EE38318, 0x7B50B621D547C661, + 0x0C1F71CB4E68E5D1, 0x0E33A47881D4DBAA, 0xF5C3BF198E9A7C2E, 0x16328FD8C0F68A91, + 0xA3E399C9AB3E9A59, 0x163AE71CBCBB18B8, 0x18F17E4A8C79F7AB, 0x9250E2EA37014B45, + 0x7BBBB111D60B03E4, 0x3DAA4A3071A0BD88, 0xA28828D790A2D6DC, 0xBC70FC88F64BE3F1, + 0xA3E48008BA4333C7, 0x739E435ACAFC79F7, 0x42BBB360BE007CC6, 0x4FFB6FD2AF74EC92, + 0x2A799A2994673146, 0xBE0A045B69D48E9F, 0x549432F54FC6A278, 0x371D3C60369FC702, + 0xDB4557D415B08CA7, 0xE8692F0A83850B37, 0x022E46AEB36E9AAB, 0x117AC9B814E4652D, + 0xA361041267AE9048, 0x277CB51C961C3DDA, 0xAFFC96F377CB8A8D, 0x83CC79FA01DD1BA7, + 0xA494842ACF4B802C, 0xFC6D9CDDE2C34A3F, 0x4ED6863CE455F7A7, 0x630914D0DB7AAE98 +}; +#endif /* T1HA0_AESNI_AVAILABLE */ + +/* *INDENT-ON* */ +/* clang-format on */ + +__cold int t1ha_selfcheck__t1ha0_32le(void) { + return t1ha_selfcheck(t1ha0_32le, t1ha_refval_32le); +} + +__cold int t1ha_selfcheck__t1ha0_32be(void) { + return t1ha_selfcheck(t1ha0_32be, t1ha_refval_32be); +} + +#if T1HA0_AESNI_AVAILABLE +__cold int t1ha_selfcheck__t1ha0_ia32aes_noavx(void) { + return t1ha_selfcheck(t1ha0_ia32aes_noavx, t1ha_refval_ia32aes_a); +} + +__cold int t1ha_selfcheck__t1ha0_ia32aes_avx(void) { + return t1ha_selfcheck(t1ha0_ia32aes_avx, t1ha_refval_ia32aes_a); +} + +#ifndef __e2k__ +__cold int t1ha_selfcheck__t1ha0_ia32aes_avx2(void) { + return t1ha_selfcheck(t1ha0_ia32aes_avx2, t1ha_refval_ia32aes_b); +} +#endif /* ! __e2k__ */ +#endif /* if T1HA0_AESNI_AVAILABLE */ + +__cold int t1ha_selfcheck__t1ha0(void) { + int rc = t1ha_selfcheck__t1ha0_32le() | t1ha_selfcheck__t1ha0_32be(); + +#if (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) && \ + (!defined(T1HA1_DISABLED) || !defined(T1HA2_DISABLED)) +#if defined(T1HA1_DISABLED) + rc |= t1ha_selfcheck__t1ha2(); +#else + rc |= t1ha_selfcheck__t1ha1(); +#endif /* T1HA1_DISABLED */ +#endif /* 32/64 */ + +#if T1HA0_AESNI_AVAILABLE +#ifdef __e2k__ + rc |= t1ha_selfcheck__t1ha0_ia32aes_noavx(); + rc |= t1ha_selfcheck__t1ha0_ia32aes_avx(); +#else + uint64_t features = t1ha_ia32cpu_features(); + if (t1ha_ia32_AESNI_avail(features)) { + rc |= t1ha_selfcheck__t1ha0_ia32aes_noavx(); + if (t1ha_ia32_AVX_avail(features)) { + rc |= t1ha_selfcheck__t1ha0_ia32aes_avx(); + if (t1ha_ia32_AVX2_avail(features)) + rc |= t1ha_selfcheck__t1ha0_ia32aes_avx2(); + } + } +#endif /* __e2k__ */ +#endif /* T1HA0_AESNI_AVAILABLE */ + + return rc; +} + +#endif /* T1HA0_DISABLED */ diff --git a/contrib/libs/t1ha/src/t1ha1.c b/contrib/libs/t1ha/src/t1ha1.c index 11275d9f0a..da6899c221 100644 --- a/contrib/libs/t1ha/src/t1ha1.c +++ b/contrib/libs/t1ha/src/t1ha1.c @@ -1,161 +1,161 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#ifndef T1HA1_DISABLED -#include "t1ha_bits.h" -#include "t1ha_selfcheck.h" - -/* xor-mul-xor mixer */ -static __inline uint64_t mix64(uint64_t v, uint64_t p) { - v *= p; - return v ^ rot64(v, 41); -} - -static __inline uint64_t final_weak_avalanche(uint64_t a, uint64_t b) { - /* LY: for performance reason on a some not high-end CPUs - * I replaced the second mux64() operation by mix64(). - * Unfortunately this approach fails the "strict avalanche criteria", - * see test results at https://github.com/demerphq/smhasher. */ - return mux64(rot64(a + b, 17), prime_4) + mix64(a ^ b, prime_0); -} - -/* TODO: C++ template in the next version */ -#define T1HA1_BODY(ENDIANNES, ALIGNESS) \ - const uint64_t *v = (const uint64_t *)data; \ - if (unlikely(len > 32)) { \ - uint64_t c = rot64(len, 17) + seed; \ - uint64_t d = len ^ rot64(seed, 17); \ - const uint64_t *detent = \ - (const uint64_t *)((const uint8_t *)data + len - 31); \ - do { \ - const uint64_t w0 = fetch64_##ENDIANNES##_##ALIGNESS(v + 0); \ - const uint64_t w1 = fetch64_##ENDIANNES##_##ALIGNESS(v + 1); \ - const uint64_t w2 = fetch64_##ENDIANNES##_##ALIGNESS(v + 2); \ - const uint64_t w3 = fetch64_##ENDIANNES##_##ALIGNESS(v + 3); \ - v += 4; \ - prefetch(v); \ - \ - const uint64_t d02 = w0 ^ rot64(w2 + d, 17); \ - const uint64_t c13 = w1 ^ rot64(w3 + c, 17); \ - d -= b ^ rot64(w1, 31); \ - c += a ^ rot64(w0, 41); \ - b ^= prime_0 * (c13 + w2); \ - a ^= prime_1 * (d02 + w3); \ - } while (likely(v < detent)); \ - \ - a ^= prime_6 * (rot64(c, 17) + d); \ - b ^= prime_5 * (c + rot64(d, 17)); \ - len &= 31; \ - } \ - \ - switch (len) { \ - default: \ - b += mux64(fetch64_##ENDIANNES##_##ALIGNESS(v++), prime_4); \ - /* fall through */ \ - case 24: \ - case 23: \ - case 22: \ - case 21: \ - case 20: \ - case 19: \ - case 18: \ - case 17: \ - a += mux64(fetch64_##ENDIANNES##_##ALIGNESS(v++), prime_3); \ - /* fall through */ \ - case 16: \ - case 15: \ - case 14: \ - case 13: \ - case 12: \ - case 11: \ - case 10: \ - case 9: \ - b += mux64(fetch64_##ENDIANNES##_##ALIGNESS(v++), prime_2); \ - /* fall through */ \ - case 8: \ - case 7: \ - case 6: \ - case 5: \ - case 4: \ - case 3: \ - case 2: \ - case 1: \ - a += mux64(tail64_##ENDIANNES##_##ALIGNESS(v, len), prime_1); \ - /* fall through */ \ - case 0: \ - return final_weak_avalanche(a, b); \ - } - -uint64_t t1ha1_le(const void *data, size_t len, uint64_t seed) { - uint64_t a = seed; - uint64_t b = len; - -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT - T1HA1_BODY(le, unaligned); -#else - const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; - if (misaligned) { - T1HA1_BODY(le, unaligned); - } else { - T1HA1_BODY(le, aligned); - } -#endif -} - -uint64_t t1ha1_be(const void *data, size_t len, uint64_t seed) { - uint64_t a = seed; - uint64_t b = len; - -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT - T1HA1_BODY(be, unaligned); -#else - const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; - if (misaligned) { - T1HA1_BODY(be, unaligned); - } else { - T1HA1_BODY(be, aligned); - } -#endif -} - -#endif /* T1HA1_DISABLED */ + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#ifndef T1HA1_DISABLED +#include "t1ha_bits.h" +#include "t1ha_selfcheck.h" + +/* xor-mul-xor mixer */ +static __inline uint64_t mix64(uint64_t v, uint64_t p) { + v *= p; + return v ^ rot64(v, 41); +} + +static __inline uint64_t final_weak_avalanche(uint64_t a, uint64_t b) { + /* LY: for performance reason on a some not high-end CPUs + * I replaced the second mux64() operation by mix64(). + * Unfortunately this approach fails the "strict avalanche criteria", + * see test results at https://github.com/demerphq/smhasher. */ + return mux64(rot64(a + b, 17), prime_4) + mix64(a ^ b, prime_0); +} + +/* TODO: C++ template in the next version */ +#define T1HA1_BODY(ENDIANNES, ALIGNESS) \ + const uint64_t *v = (const uint64_t *)data; \ + if (unlikely(len > 32)) { \ + uint64_t c = rot64(len, 17) + seed; \ + uint64_t d = len ^ rot64(seed, 17); \ + const uint64_t *detent = \ + (const uint64_t *)((const uint8_t *)data + len - 31); \ + do { \ + const uint64_t w0 = fetch64_##ENDIANNES##_##ALIGNESS(v + 0); \ + const uint64_t w1 = fetch64_##ENDIANNES##_##ALIGNESS(v + 1); \ + const uint64_t w2 = fetch64_##ENDIANNES##_##ALIGNESS(v + 2); \ + const uint64_t w3 = fetch64_##ENDIANNES##_##ALIGNESS(v + 3); \ + v += 4; \ + prefetch(v); \ + \ + const uint64_t d02 = w0 ^ rot64(w2 + d, 17); \ + const uint64_t c13 = w1 ^ rot64(w3 + c, 17); \ + d -= b ^ rot64(w1, 31); \ + c += a ^ rot64(w0, 41); \ + b ^= prime_0 * (c13 + w2); \ + a ^= prime_1 * (d02 + w3); \ + } while (likely(v < detent)); \ + \ + a ^= prime_6 * (rot64(c, 17) + d); \ + b ^= prime_5 * (c + rot64(d, 17)); \ + len &= 31; \ + } \ + \ + switch (len) { \ + default: \ + b += mux64(fetch64_##ENDIANNES##_##ALIGNESS(v++), prime_4); \ + /* fall through */ \ + case 24: \ + case 23: \ + case 22: \ + case 21: \ + case 20: \ + case 19: \ + case 18: \ + case 17: \ + a += mux64(fetch64_##ENDIANNES##_##ALIGNESS(v++), prime_3); \ + /* fall through */ \ + case 16: \ + case 15: \ + case 14: \ + case 13: \ + case 12: \ + case 11: \ + case 10: \ + case 9: \ + b += mux64(fetch64_##ENDIANNES##_##ALIGNESS(v++), prime_2); \ + /* fall through */ \ + case 8: \ + case 7: \ + case 6: \ + case 5: \ + case 4: \ + case 3: \ + case 2: \ + case 1: \ + a += mux64(tail64_##ENDIANNES##_##ALIGNESS(v, len), prime_1); \ + /* fall through */ \ + case 0: \ + return final_weak_avalanche(a, b); \ + } + +uint64_t t1ha1_le(const void *data, size_t len, uint64_t seed) { + uint64_t a = seed; + uint64_t b = len; + +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT + T1HA1_BODY(le, unaligned); +#else + const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; + if (misaligned) { + T1HA1_BODY(le, unaligned); + } else { + T1HA1_BODY(le, aligned); + } +#endif +} + +uint64_t t1ha1_be(const void *data, size_t len, uint64_t seed) { + uint64_t a = seed; + uint64_t b = len; + +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT + T1HA1_BODY(be, unaligned); +#else + const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; + if (misaligned) { + T1HA1_BODY(be, unaligned); + } else { + T1HA1_BODY(be, aligned); + } +#endif +} + +#endif /* T1HA1_DISABLED */ diff --git a/contrib/libs/t1ha/src/t1ha1_selfcheck.c b/contrib/libs/t1ha/src/t1ha1_selfcheck.c index 9f9a19ae26..5cf49632ed 100644 --- a/contrib/libs/t1ha/src/t1ha1_selfcheck.c +++ b/contrib/libs/t1ha/src/t1ha1_selfcheck.c @@ -1,112 +1,112 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#ifndef T1HA1_DISABLED -#include "t1ha_bits.h" -#include "t1ha_selfcheck.h" - -/* *INDENT-OFF* */ -/* clang-format off */ - -const uint64_t t1ha_refval_64le[81] = { 0, - 0x6A580668D6048674, 0xA2FE904AFF0D0879, 0xE3AB9C06FAF4D023, 0x6AF1C60874C95442, - 0xB3557E561A6C5D82, 0x0AE73C696F3D37C0, 0x5EF25F7062324941, 0x9B784F3B4CE6AF33, - 0x6993BB206A74F070, 0xF1E95DF109076C4C, 0x4E1EB70C58E48540, 0x5FDD7649D8EC44E4, - 0x559122C706343421, 0x380133D58665E93D, 0x9CE74296C8C55AE4, 0x3556F9A5757AB6D0, - 0xF62751F7F25C469E, 0x851EEC67F6516D94, 0xED463EE3848A8695, 0xDC8791FEFF8ED3AC, - 0x2569C744E1A282CF, 0xF90EB7C1D70A80B9, 0x68DFA6A1B8050A4C, 0x94CCA5E8210D2134, - 0xF5CC0BEABC259F52, 0x40DBC1F51618FDA7, 0x0807945BF0FB52C6, 0xE5EF7E09DE70848D, - 0x63E1DF35FEBE994A, 0x2025E73769720D5A, 0xAD6120B2B8A152E1, 0x2A71D9F13959F2B7, - 0x8A20849A27C32548, 0x0BCBC9FE3B57884E, 0x0E028D255667AEAD, 0xBE66DAD3043AB694, - 0xB00E4C1238F9E2D4, 0x5C54BDE5AE280E82, 0x0E22B86754BC3BC4, 0x016707EBF858B84D, - 0x990015FBC9E095EE, 0x8B9AF0A3E71F042F, 0x6AA56E88BD380564, 0xAACE57113E681A0F, - 0x19F81514AFA9A22D, 0x80DABA3D62BEAC79, 0x715210412CABBF46, 0xD8FA0B9E9D6AA93F, - 0x6C2FC5A4109FD3A2, 0x5B3E60EEB51DDCD8, 0x0A7C717017756FE7, 0xA73773805CA31934, - 0x4DBD6BB7A31E85FD, 0x24F619D3D5BC2DB4, 0x3E4AF35A1678D636, 0x84A1A8DF8D609239, - 0x359C862CD3BE4FCD, 0xCF3A39F5C27DC125, 0xC0FF62F8FD5F4C77, 0x5E9F2493DDAA166C, - 0x17424152BE1CA266, 0xA78AFA5AB4BBE0CD, 0x7BFB2E2CEF118346, 0x647C3E0FF3E3D241, - 0x0352E4055C13242E, 0x6F42FC70EB660E38, 0x0BEBAD4FABF523BA, 0x9269F4214414D61D, - 0x1CA8760277E6006C, 0x7BAD25A859D87B5D, 0xAD645ADCF7414F1D, 0xB07F517E88D7AFB3, - 0xB321C06FB5FFAB5C, 0xD50F162A1EFDD844, 0x1DFD3D1924FBE319, 0xDFAEAB2F09EF7E78, - 0xA7603B5AF07A0B1E, 0x41CD044C0E5A4EE3, 0xF64D2F86E813BF33, 0xFF9FDB99305EB06A -}; - -const uint64_t t1ha_refval_64be[81] = { 0, - 0x6A580668D6048674, 0xDECC975A0E3B8177, 0xE3AB9C06FAF4D023, 0xE401FA8F1B6AF969, - 0x67DB1DAE56FB94E3, 0x1106266A09B7A073, 0x550339B1EF2C7BBB, 0x290A2BAF590045BB, - 0xA182C1258C09F54A, 0x137D53C34BE7143A, 0xF6D2B69C6F42BEDC, 0x39643EAF2CA2E4B4, - 0x22A81F139A2C9559, 0x5B3D6AEF0AF33807, 0x56E3F80A68643C08, 0x9E423BE502378780, - 0xCDB0986F9A5B2FD5, 0xD5B3C84E7933293F, 0xE5FB8C90399E9742, 0x5D393C1F77B2CF3D, - 0xC8C82F5B2FF09266, 0xACA0230CA6F7B593, 0xCB5805E2960D1655, 0x7E2AD5B704D77C95, - 0xC5E903CDB8B9EB5D, 0x4CC7D0D21CC03511, 0x8385DF382CFB3E93, 0xF17699D0564D348A, - 0xF77EE7F8274A4C8D, 0xB9D8CEE48903BABE, 0xFE0EBD2A82B9CFE9, 0xB49FB6397270F565, - 0x173735C8C342108E, 0xA37C7FBBEEC0A2EA, 0xC13F66F462BB0B6E, 0x0C04F3C2B551467E, - 0x76A9CB156810C96E, 0x2038850919B0B151, 0xCEA19F2B6EED647B, 0x6746656D2FA109A4, - 0xF05137F221007F37, 0x892FA9E13A3B4948, 0x4D57B70D37548A32, 0x1A7CFB3D566580E6, - 0x7CB30272A45E3FAC, 0x137CCFFD9D51423F, 0xB87D96F3B82DF266, 0x33349AEE7472ED37, - 0x5CC0D3C99555BC07, 0x4A8F4FA196D964EF, 0xE82A0D64F281FBFA, 0x38A1BAC2C36823E1, - 0x77D197C239FD737E, 0xFB07746B4E07DF26, 0xC8A2198E967672BD, 0x5F1A146D143FA05A, - 0x26B877A1201AB7AC, 0x74E5B145214723F8, 0xE9CE10E3C70254BC, 0x299393A0C05B79E8, - 0xFD2D2B9822A5E7E2, 0x85424FEA50C8E50A, 0xE6839E714B1FFFE5, 0x27971CCB46F9112A, - 0xC98695A2E0715AA9, 0x338E1CBB4F858226, 0xFC6B5C5CF7A8D806, 0x8973CAADDE8DA50C, - 0x9C6D47AE32EBAE72, 0x1EBF1F9F21D26D78, 0x80A9704B8E153859, 0x6AFD20A939F141FB, - 0xC35F6C2B3B553EEF, 0x59529E8B0DC94C1A, 0x1569DF036EBC4FA1, 0xDA32B88593C118F9, - 0xF01E4155FF5A5660, 0x765A2522DCE2B185, 0xCEE95554128073EF, 0x60F072A5CA51DE2F -}; - -/* *INDENT-ON* */ -/* clang-format on */ - -__cold int t1ha_selfcheck__t1ha1_le(void) { - return t1ha_selfcheck(t1ha1_le, t1ha_refval_64le); -} - -__cold int t1ha_selfcheck__t1ha1_be(void) { - return t1ha_selfcheck(t1ha1_be, t1ha_refval_64be); -} - -__cold int t1ha_selfcheck__t1ha1(void) { - return t1ha_selfcheck__t1ha1_le() | t1ha_selfcheck__t1ha1_be(); -} - -#endif /* T1HA1_DISABLED */ + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#ifndef T1HA1_DISABLED +#include "t1ha_bits.h" +#include "t1ha_selfcheck.h" + +/* *INDENT-OFF* */ +/* clang-format off */ + +const uint64_t t1ha_refval_64le[81] = { 0, + 0x6A580668D6048674, 0xA2FE904AFF0D0879, 0xE3AB9C06FAF4D023, 0x6AF1C60874C95442, + 0xB3557E561A6C5D82, 0x0AE73C696F3D37C0, 0x5EF25F7062324941, 0x9B784F3B4CE6AF33, + 0x6993BB206A74F070, 0xF1E95DF109076C4C, 0x4E1EB70C58E48540, 0x5FDD7649D8EC44E4, + 0x559122C706343421, 0x380133D58665E93D, 0x9CE74296C8C55AE4, 0x3556F9A5757AB6D0, + 0xF62751F7F25C469E, 0x851EEC67F6516D94, 0xED463EE3848A8695, 0xDC8791FEFF8ED3AC, + 0x2569C744E1A282CF, 0xF90EB7C1D70A80B9, 0x68DFA6A1B8050A4C, 0x94CCA5E8210D2134, + 0xF5CC0BEABC259F52, 0x40DBC1F51618FDA7, 0x0807945BF0FB52C6, 0xE5EF7E09DE70848D, + 0x63E1DF35FEBE994A, 0x2025E73769720D5A, 0xAD6120B2B8A152E1, 0x2A71D9F13959F2B7, + 0x8A20849A27C32548, 0x0BCBC9FE3B57884E, 0x0E028D255667AEAD, 0xBE66DAD3043AB694, + 0xB00E4C1238F9E2D4, 0x5C54BDE5AE280E82, 0x0E22B86754BC3BC4, 0x016707EBF858B84D, + 0x990015FBC9E095EE, 0x8B9AF0A3E71F042F, 0x6AA56E88BD380564, 0xAACE57113E681A0F, + 0x19F81514AFA9A22D, 0x80DABA3D62BEAC79, 0x715210412CABBF46, 0xD8FA0B9E9D6AA93F, + 0x6C2FC5A4109FD3A2, 0x5B3E60EEB51DDCD8, 0x0A7C717017756FE7, 0xA73773805CA31934, + 0x4DBD6BB7A31E85FD, 0x24F619D3D5BC2DB4, 0x3E4AF35A1678D636, 0x84A1A8DF8D609239, + 0x359C862CD3BE4FCD, 0xCF3A39F5C27DC125, 0xC0FF62F8FD5F4C77, 0x5E9F2493DDAA166C, + 0x17424152BE1CA266, 0xA78AFA5AB4BBE0CD, 0x7BFB2E2CEF118346, 0x647C3E0FF3E3D241, + 0x0352E4055C13242E, 0x6F42FC70EB660E38, 0x0BEBAD4FABF523BA, 0x9269F4214414D61D, + 0x1CA8760277E6006C, 0x7BAD25A859D87B5D, 0xAD645ADCF7414F1D, 0xB07F517E88D7AFB3, + 0xB321C06FB5FFAB5C, 0xD50F162A1EFDD844, 0x1DFD3D1924FBE319, 0xDFAEAB2F09EF7E78, + 0xA7603B5AF07A0B1E, 0x41CD044C0E5A4EE3, 0xF64D2F86E813BF33, 0xFF9FDB99305EB06A +}; + +const uint64_t t1ha_refval_64be[81] = { 0, + 0x6A580668D6048674, 0xDECC975A0E3B8177, 0xE3AB9C06FAF4D023, 0xE401FA8F1B6AF969, + 0x67DB1DAE56FB94E3, 0x1106266A09B7A073, 0x550339B1EF2C7BBB, 0x290A2BAF590045BB, + 0xA182C1258C09F54A, 0x137D53C34BE7143A, 0xF6D2B69C6F42BEDC, 0x39643EAF2CA2E4B4, + 0x22A81F139A2C9559, 0x5B3D6AEF0AF33807, 0x56E3F80A68643C08, 0x9E423BE502378780, + 0xCDB0986F9A5B2FD5, 0xD5B3C84E7933293F, 0xE5FB8C90399E9742, 0x5D393C1F77B2CF3D, + 0xC8C82F5B2FF09266, 0xACA0230CA6F7B593, 0xCB5805E2960D1655, 0x7E2AD5B704D77C95, + 0xC5E903CDB8B9EB5D, 0x4CC7D0D21CC03511, 0x8385DF382CFB3E93, 0xF17699D0564D348A, + 0xF77EE7F8274A4C8D, 0xB9D8CEE48903BABE, 0xFE0EBD2A82B9CFE9, 0xB49FB6397270F565, + 0x173735C8C342108E, 0xA37C7FBBEEC0A2EA, 0xC13F66F462BB0B6E, 0x0C04F3C2B551467E, + 0x76A9CB156810C96E, 0x2038850919B0B151, 0xCEA19F2B6EED647B, 0x6746656D2FA109A4, + 0xF05137F221007F37, 0x892FA9E13A3B4948, 0x4D57B70D37548A32, 0x1A7CFB3D566580E6, + 0x7CB30272A45E3FAC, 0x137CCFFD9D51423F, 0xB87D96F3B82DF266, 0x33349AEE7472ED37, + 0x5CC0D3C99555BC07, 0x4A8F4FA196D964EF, 0xE82A0D64F281FBFA, 0x38A1BAC2C36823E1, + 0x77D197C239FD737E, 0xFB07746B4E07DF26, 0xC8A2198E967672BD, 0x5F1A146D143FA05A, + 0x26B877A1201AB7AC, 0x74E5B145214723F8, 0xE9CE10E3C70254BC, 0x299393A0C05B79E8, + 0xFD2D2B9822A5E7E2, 0x85424FEA50C8E50A, 0xE6839E714B1FFFE5, 0x27971CCB46F9112A, + 0xC98695A2E0715AA9, 0x338E1CBB4F858226, 0xFC6B5C5CF7A8D806, 0x8973CAADDE8DA50C, + 0x9C6D47AE32EBAE72, 0x1EBF1F9F21D26D78, 0x80A9704B8E153859, 0x6AFD20A939F141FB, + 0xC35F6C2B3B553EEF, 0x59529E8B0DC94C1A, 0x1569DF036EBC4FA1, 0xDA32B88593C118F9, + 0xF01E4155FF5A5660, 0x765A2522DCE2B185, 0xCEE95554128073EF, 0x60F072A5CA51DE2F +}; + +/* *INDENT-ON* */ +/* clang-format on */ + +__cold int t1ha_selfcheck__t1ha1_le(void) { + return t1ha_selfcheck(t1ha1_le, t1ha_refval_64le); +} + +__cold int t1ha_selfcheck__t1ha1_be(void) { + return t1ha_selfcheck(t1ha1_be, t1ha_refval_64be); +} + +__cold int t1ha_selfcheck__t1ha1(void) { + return t1ha_selfcheck__t1ha1_le() | t1ha_selfcheck__t1ha1_be(); +} + +#endif /* T1HA1_DISABLED */ diff --git a/contrib/libs/t1ha/src/t1ha2.c b/contrib/libs/t1ha/src/t1ha2.c index 68a1ea4eb9..009f922751 100644 --- a/contrib/libs/t1ha/src/t1ha2.c +++ b/contrib/libs/t1ha/src/t1ha2.c @@ -1,383 +1,383 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#ifndef T1HA2_DISABLED -#include "t1ha_bits.h" -#include "t1ha_selfcheck.h" - -static __always_inline void init_ab(t1ha_state256_t *s, uint64_t x, - uint64_t y) { - s->n.a = x; - s->n.b = y; -} - -static __always_inline void init_cd(t1ha_state256_t *s, uint64_t x, - uint64_t y) { - s->n.c = rot64(y, 23) + ~x; - s->n.d = ~y + rot64(x, 19); -} - -/* TODO: C++ template in the next version */ -#define T1HA2_UPDATE(ENDIANNES, ALIGNESS, state, v) \ - do { \ - t1ha_state256_t *const s = state; \ - const uint64_t w0 = fetch64_##ENDIANNES##_##ALIGNESS(v + 0); \ - const uint64_t w1 = fetch64_##ENDIANNES##_##ALIGNESS(v + 1); \ - const uint64_t w2 = fetch64_##ENDIANNES##_##ALIGNESS(v + 2); \ - const uint64_t w3 = fetch64_##ENDIANNES##_##ALIGNESS(v + 3); \ - \ - const uint64_t d02 = w0 + rot64(w2 + s->n.d, 56); \ - const uint64_t c13 = w1 + rot64(w3 + s->n.c, 19); \ - s->n.d ^= s->n.b + rot64(w1, 38); \ - s->n.c ^= s->n.a + rot64(w0, 57); \ - s->n.b ^= prime_6 * (c13 + w2); \ - s->n.a ^= prime_5 * (d02 + w3); \ - } while (0) - -static __always_inline void squash(t1ha_state256_t *s) { - s->n.a ^= prime_6 * (s->n.c + rot64(s->n.d, 23)); - s->n.b ^= prime_5 * (rot64(s->n.c, 19) + s->n.d); -} - -/* TODO: C++ template in the next version */ -#define T1HA2_LOOP(ENDIANNES, ALIGNESS, state, data, len) \ - do { \ - const void *detent = (const uint8_t *)data + len - 31; \ - do { \ - const uint64_t *v = (const uint64_t *)data; \ - data = (const uint64_t *)data + 4; \ - prefetch(data); \ - T1HA2_UPDATE(le, ALIGNESS, state, v); \ - } while (likely(data < detent)); \ - } while (0) - -/* TODO: C++ template in the next version */ -#define T1HA2_TAIL_AB(ENDIANNES, ALIGNESS, state, data, len) \ - do { \ - t1ha_state256_t *const s = state; \ - const uint64_t *v = (const uint64_t *)data; \ - switch (len) { \ - default: \ - mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ - prime_4); \ - /* fall through */ \ - case 24: \ - case 23: \ - case 22: \ - case 21: \ - case 20: \ - case 19: \ - case 18: \ - case 17: \ - mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ - prime_3); \ - /* fall through */ \ - case 16: \ - case 15: \ - case 14: \ - case 13: \ - case 12: \ - case 11: \ - case 10: \ - case 9: \ - mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ - prime_2); \ - /* fall through */ \ - case 8: \ - case 7: \ - case 6: \ - case 5: \ - case 4: \ - case 3: \ - case 2: \ - case 1: \ - mixup64(&s->n.b, &s->n.a, tail64_##ENDIANNES##_##ALIGNESS(v, len), \ - prime_1); \ - /* fall through */ \ - case 0: \ - return final64(s->n.a, s->n.b); \ - } \ - } while (0) - -/* TODO: C++ template in the next version */ -#define T1HA2_TAIL_ABCD(ENDIANNES, ALIGNESS, state, data, len) \ - do { \ - t1ha_state256_t *const s = state; \ - const uint64_t *v = (const uint64_t *)data; \ - switch (len) { \ - default: \ - mixup64(&s->n.a, &s->n.d, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ - prime_4); \ - /* fall through */ \ - case 24: \ - case 23: \ - case 22: \ - case 21: \ - case 20: \ - case 19: \ - case 18: \ - case 17: \ - mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ - prime_3); \ - /* fall through */ \ - case 16: \ - case 15: \ - case 14: \ - case 13: \ - case 12: \ - case 11: \ - case 10: \ - case 9: \ - mixup64(&s->n.c, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ - prime_2); \ - /* fall through */ \ - case 8: \ - case 7: \ - case 6: \ - case 5: \ - case 4: \ - case 3: \ - case 2: \ - case 1: \ - mixup64(&s->n.d, &s->n.c, tail64_##ENDIANNES##_##ALIGNESS(v, len), \ - prime_1); \ - /* fall through */ \ - case 0: \ - return final128(s->n.a, s->n.b, s->n.c, s->n.d, extra_result); \ - } \ - } while (0) - -static __always_inline uint64_t final128(uint64_t a, uint64_t b, uint64_t c, - uint64_t d, uint64_t *h) { - mixup64(&a, &b, rot64(c, 41) ^ d, prime_0); - mixup64(&b, &c, rot64(d, 23) ^ a, prime_6); - mixup64(&c, &d, rot64(a, 19) ^ b, prime_5); - mixup64(&d, &a, rot64(b, 31) ^ c, prime_4); - *h = c + d; - return a ^ b; -} - -//------------------------------------------------------------------------------ - -uint64_t t1ha2_atonce(const void *data, size_t length, uint64_t seed) { - t1ha_state256_t state; - init_ab(&state, seed, length); - -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT - if (unlikely(length > 32)) { - init_cd(&state, seed, length); + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#ifndef T1HA2_DISABLED +#include "t1ha_bits.h" +#include "t1ha_selfcheck.h" + +static __always_inline void init_ab(t1ha_state256_t *s, uint64_t x, + uint64_t y) { + s->n.a = x; + s->n.b = y; +} + +static __always_inline void init_cd(t1ha_state256_t *s, uint64_t x, + uint64_t y) { + s->n.c = rot64(y, 23) + ~x; + s->n.d = ~y + rot64(x, 19); +} + +/* TODO: C++ template in the next version */ +#define T1HA2_UPDATE(ENDIANNES, ALIGNESS, state, v) \ + do { \ + t1ha_state256_t *const s = state; \ + const uint64_t w0 = fetch64_##ENDIANNES##_##ALIGNESS(v + 0); \ + const uint64_t w1 = fetch64_##ENDIANNES##_##ALIGNESS(v + 1); \ + const uint64_t w2 = fetch64_##ENDIANNES##_##ALIGNESS(v + 2); \ + const uint64_t w3 = fetch64_##ENDIANNES##_##ALIGNESS(v + 3); \ + \ + const uint64_t d02 = w0 + rot64(w2 + s->n.d, 56); \ + const uint64_t c13 = w1 + rot64(w3 + s->n.c, 19); \ + s->n.d ^= s->n.b + rot64(w1, 38); \ + s->n.c ^= s->n.a + rot64(w0, 57); \ + s->n.b ^= prime_6 * (c13 + w2); \ + s->n.a ^= prime_5 * (d02 + w3); \ + } while (0) + +static __always_inline void squash(t1ha_state256_t *s) { + s->n.a ^= prime_6 * (s->n.c + rot64(s->n.d, 23)); + s->n.b ^= prime_5 * (rot64(s->n.c, 19) + s->n.d); +} + +/* TODO: C++ template in the next version */ +#define T1HA2_LOOP(ENDIANNES, ALIGNESS, state, data, len) \ + do { \ + const void *detent = (const uint8_t *)data + len - 31; \ + do { \ + const uint64_t *v = (const uint64_t *)data; \ + data = (const uint64_t *)data + 4; \ + prefetch(data); \ + T1HA2_UPDATE(le, ALIGNESS, state, v); \ + } while (likely(data < detent)); \ + } while (0) + +/* TODO: C++ template in the next version */ +#define T1HA2_TAIL_AB(ENDIANNES, ALIGNESS, state, data, len) \ + do { \ + t1ha_state256_t *const s = state; \ + const uint64_t *v = (const uint64_t *)data; \ + switch (len) { \ + default: \ + mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_4); \ + /* fall through */ \ + case 24: \ + case 23: \ + case 22: \ + case 21: \ + case 20: \ + case 19: \ + case 18: \ + case 17: \ + mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_3); \ + /* fall through */ \ + case 16: \ + case 15: \ + case 14: \ + case 13: \ + case 12: \ + case 11: \ + case 10: \ + case 9: \ + mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_2); \ + /* fall through */ \ + case 8: \ + case 7: \ + case 6: \ + case 5: \ + case 4: \ + case 3: \ + case 2: \ + case 1: \ + mixup64(&s->n.b, &s->n.a, tail64_##ENDIANNES##_##ALIGNESS(v, len), \ + prime_1); \ + /* fall through */ \ + case 0: \ + return final64(s->n.a, s->n.b); \ + } \ + } while (0) + +/* TODO: C++ template in the next version */ +#define T1HA2_TAIL_ABCD(ENDIANNES, ALIGNESS, state, data, len) \ + do { \ + t1ha_state256_t *const s = state; \ + const uint64_t *v = (const uint64_t *)data; \ + switch (len) { \ + default: \ + mixup64(&s->n.a, &s->n.d, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_4); \ + /* fall through */ \ + case 24: \ + case 23: \ + case 22: \ + case 21: \ + case 20: \ + case 19: \ + case 18: \ + case 17: \ + mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_3); \ + /* fall through */ \ + case 16: \ + case 15: \ + case 14: \ + case 13: \ + case 12: \ + case 11: \ + case 10: \ + case 9: \ + mixup64(&s->n.c, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_2); \ + /* fall through */ \ + case 8: \ + case 7: \ + case 6: \ + case 5: \ + case 4: \ + case 3: \ + case 2: \ + case 1: \ + mixup64(&s->n.d, &s->n.c, tail64_##ENDIANNES##_##ALIGNESS(v, len), \ + prime_1); \ + /* fall through */ \ + case 0: \ + return final128(s->n.a, s->n.b, s->n.c, s->n.d, extra_result); \ + } \ + } while (0) + +static __always_inline uint64_t final128(uint64_t a, uint64_t b, uint64_t c, + uint64_t d, uint64_t *h) { + mixup64(&a, &b, rot64(c, 41) ^ d, prime_0); + mixup64(&b, &c, rot64(d, 23) ^ a, prime_6); + mixup64(&c, &d, rot64(a, 19) ^ b, prime_5); + mixup64(&d, &a, rot64(b, 31) ^ c, prime_4); + *h = c + d; + return a ^ b; +} + +//------------------------------------------------------------------------------ + +uint64_t t1ha2_atonce(const void *data, size_t length, uint64_t seed) { + t1ha_state256_t state; + init_ab(&state, seed, length); + +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT + if (unlikely(length > 32)) { + init_cd(&state, seed, length); #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, unaligned, &state, data, length); - squash(&state); - length &= 31; - } - T1HA2_TAIL_AB(le, unaligned, &state, data, length); -#else - const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; - if (misaligned) { - if (unlikely(length > 32)) { - init_cd(&state, seed, length); + T1HA2_LOOP(le, unaligned, &state, data, length); + squash(&state); + length &= 31; + } + T1HA2_TAIL_AB(le, unaligned, &state, data, length); +#else + const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; + if (misaligned) { + if (unlikely(length > 32)) { + init_cd(&state, seed, length); #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, unaligned, &state, data, length); - squash(&state); - length &= 31; - } - T1HA2_TAIL_AB(le, unaligned, &state, data, length); - } else { - if (unlikely(length > 32)) { - init_cd(&state, seed, length); + T1HA2_LOOP(le, unaligned, &state, data, length); + squash(&state); + length &= 31; + } + T1HA2_TAIL_AB(le, unaligned, &state, data, length); + } else { + if (unlikely(length > 32)) { + init_cd(&state, seed, length); #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, aligned, &state, data, length); - squash(&state); - length &= 31; - } - T1HA2_TAIL_AB(le, aligned, &state, data, length); - } -#endif -} - -uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result, - const void *__restrict data, size_t length, - uint64_t seed) { - t1ha_state256_t state; - init_ab(&state, seed, length); - init_cd(&state, seed, length); - -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT - if (unlikely(length > 32)) { + T1HA2_LOOP(le, aligned, &state, data, length); + squash(&state); + length &= 31; + } + T1HA2_TAIL_AB(le, aligned, &state, data, length); + } +#endif +} + +uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result, + const void *__restrict data, size_t length, + uint64_t seed) { + t1ha_state256_t state; + init_ab(&state, seed, length); + init_cd(&state, seed, length); + +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT + if (unlikely(length > 32)) { #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, unaligned, &state, data, length); - length &= 31; - } - T1HA2_TAIL_ABCD(le, unaligned, &state, data, length); -#else - const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; - if (misaligned) { - if (unlikely(length > 32)) { + T1HA2_LOOP(le, unaligned, &state, data, length); + length &= 31; + } + T1HA2_TAIL_ABCD(le, unaligned, &state, data, length); +#else + const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; + if (misaligned) { + if (unlikely(length > 32)) { #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, unaligned, &state, data, length); - length &= 31; - } - T1HA2_TAIL_ABCD(le, unaligned, &state, data, length); - } else { - if (unlikely(length > 32)) { + T1HA2_LOOP(le, unaligned, &state, data, length); + length &= 31; + } + T1HA2_TAIL_ABCD(le, unaligned, &state, data, length); + } else { + if (unlikely(length > 32)) { #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, aligned, &state, data, length); - length &= 31; - } - T1HA2_TAIL_ABCD(le, aligned, &state, data, length); - } -#endif -} - -//------------------------------------------------------------------------------ - -void t1ha2_init(t1ha_context_t *ctx, uint64_t seed_x, uint64_t seed_y) { - init_ab(&ctx->state, seed_x, seed_y); - init_cd(&ctx->state, seed_x, seed_y); - ctx->partial = 0; - ctx->total = 0; -} - -void t1ha2_update(t1ha_context_t *__restrict ctx, const void *__restrict data, - size_t length) { - ctx->total += length; - - if (ctx->partial) { - const size_t left = 32 - ctx->partial; - const size_t chunk = (length >= left) ? left : length; - memcpy(ctx->buffer.bytes + ctx->partial, data, chunk); - ctx->partial += chunk; - if (ctx->partial < 32) { - assert(left >= length); - return; - } - ctx->partial = 0; - data = (const uint8_t *)data + chunk; - length -= chunk; - T1HA2_UPDATE(le, aligned, &ctx->state, ctx->buffer.u64); - } - - if (length >= 32) { -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT + T1HA2_LOOP(le, aligned, &state, data, length); + length &= 31; + } + T1HA2_TAIL_ABCD(le, aligned, &state, data, length); + } +#endif +} + +//------------------------------------------------------------------------------ + +void t1ha2_init(t1ha_context_t *ctx, uint64_t seed_x, uint64_t seed_y) { + init_ab(&ctx->state, seed_x, seed_y); + init_cd(&ctx->state, seed_x, seed_y); + ctx->partial = 0; + ctx->total = 0; +} + +void t1ha2_update(t1ha_context_t *__restrict ctx, const void *__restrict data, + size_t length) { + ctx->total += length; + + if (ctx->partial) { + const size_t left = 32 - ctx->partial; + const size_t chunk = (length >= left) ? left : length; + memcpy(ctx->buffer.bytes + ctx->partial, data, chunk); + ctx->partial += chunk; + if (ctx->partial < 32) { + assert(left >= length); + return; + } + ctx->partial = 0; + data = (const uint8_t *)data + chunk; + length -= chunk; + T1HA2_UPDATE(le, aligned, &ctx->state, ctx->buffer.u64); + } + + if (length >= 32) { +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, unaligned, &ctx->state, data, length); -#else - const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; - if (misaligned) { + T1HA2_LOOP(le, unaligned, &ctx->state, data, length); +#else + const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; + if (misaligned) { #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, unaligned, &ctx->state, data, length); - } else { + T1HA2_LOOP(le, unaligned, &ctx->state, data, length); + } else { #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, aligned, &ctx->state, data, length); - } -#endif - length &= 31; - } - - if (length) - memcpy(ctx->buffer.bytes, data, ctx->partial = length); -} - -uint64_t t1ha2_final(t1ha_context_t *__restrict ctx, - uint64_t *__restrict extra_result) { - uint64_t bits = (ctx->total << 3) ^ (UINT64_C(1) << 63); -#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ - bits = bswap64(bits); -#endif - t1ha2_update(ctx, &bits, 8); - - if (likely(!extra_result)) { - squash(&ctx->state); - T1HA2_TAIL_AB(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial); - } - - T1HA2_TAIL_ABCD(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial); -} - -#endif /* T1HA2_DISABLED */ + T1HA2_LOOP(le, aligned, &ctx->state, data, length); + } +#endif + length &= 31; + } + + if (length) + memcpy(ctx->buffer.bytes, data, ctx->partial = length); +} + +uint64_t t1ha2_final(t1ha_context_t *__restrict ctx, + uint64_t *__restrict extra_result) { + uint64_t bits = (ctx->total << 3) ^ (UINT64_C(1) << 63); +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ + bits = bswap64(bits); +#endif + t1ha2_update(ctx, &bits, 8); + + if (likely(!extra_result)) { + squash(&ctx->state); + T1HA2_TAIL_AB(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial); + } + + T1HA2_TAIL_ABCD(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial); +} + +#endif /* T1HA2_DISABLED */ diff --git a/contrib/libs/t1ha/src/t1ha2_selfcheck.c b/contrib/libs/t1ha/src/t1ha2_selfcheck.c index 275422fa64..1a01f99512 100644 --- a/contrib/libs/t1ha/src/t1ha2_selfcheck.c +++ b/contrib/libs/t1ha/src/t1ha2_selfcheck.c @@ -1,187 +1,187 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#ifndef T1HA2_DISABLED -#include "t1ha_bits.h" -#include "t1ha_selfcheck.h" - -/* *INDENT-OFF* */ -/* clang-format off */ - -const uint64_t t1ha_refval_2atonce[81] = { 0, - 0x772C7311BE32FF42, 0x444753D23F207E03, 0x71F6DF5DA3B4F532, 0x555859635365F660, - 0xE98808F1CD39C626, 0x2EB18FAF2163BB09, 0x7B9DD892C8019C87, 0xE2B1431C4DA4D15A, - 0x1984E718A5477F70, 0x08DD17B266484F79, 0x4C83A05D766AD550, 0x92DCEBB131D1907D, - 0xD67BC6FC881B8549, 0xF6A9886555FBF66B, 0x6E31616D7F33E25E, 0x36E31B7426E3049D, - 0x4F8E4FAF46A13F5F, 0x03EB0CB3253F819F, 0x636A7769905770D2, 0x3ADF3781D16D1148, - 0x92D19CB1818BC9C2, 0x283E68F4D459C533, 0xFA83A8A88DECAA04, 0x8C6F00368EAC538C, - 0x7B66B0CF3797B322, 0x5131E122FDABA3FF, 0x6E59FF515C08C7A9, 0xBA2C5269B2C377B0, - 0xA9D24FD368FE8A2B, 0x22DB13D32E33E891, 0x7B97DFC804B876E5, 0xC598BDFCD0E834F9, - 0xB256163D3687F5A7, 0x66D7A73C6AEF50B3, 0x25A7201C85D9E2A3, 0x911573EDA15299AA, - 0x5C0062B669E18E4C, 0x17734ADE08D54E28, 0xFFF036E33883F43B, 0xFE0756E7777DF11E, - 0x37972472D023F129, 0x6CFCE201B55C7F57, 0xE019D1D89F02B3E1, 0xAE5CC580FA1BB7E6, - 0x295695FB7E59FC3A, 0x76B6C820A40DD35E, 0xB1680A1768462B17, 0x2FB6AF279137DADA, - 0x28FB6B4366C78535, 0xEC278E53924541B1, 0x164F8AAB8A2A28B5, 0xB6C330AEAC4578AD, - 0x7F6F371070085084, 0x94DEAD60C0F448D3, 0x99737AC232C559EF, 0x6F54A6F9CA8EDD57, - 0x979B01E926BFCE0C, 0xF7D20BC85439C5B4, 0x64EDB27CD8087C12, 0x11488DE5F79C0BE2, - 0x25541DDD1680B5A4, 0x8B633D33BE9D1973, 0x404A3113ACF7F6C6, 0xC59DBDEF8550CD56, - 0x039D23C68F4F992C, 0x5BBB48E4BDD6FD86, 0x41E312248780DF5A, 0xD34791CE75D4E94F, - 0xED523E5D04DCDCFF, 0x7A6BCE0B6182D879, 0x21FB37483CAC28D8, 0x19A1B66E8DA878AD, - 0x6F804C5295B09ABE, 0x2A4BE5014115BA81, 0xA678ECC5FC924BE0, 0x50F7A54A99A36F59, - 0x0FD7E63A39A66452, 0x5AB1B213DD29C4E4, 0xF3ED80D9DF6534C5, 0xC736B12EF90615FD -}; - -const uint64_t t1ha_refval_2atonce128[81] = { 0x4EC7F6A48E33B00A, - 0xB7B7FAA5BD7D8C1E, 0x3269533F66534A76, 0x6C3EC6B687923BFC, 0xC096F5E7EFA471A9, - 0x79D8AFB550CEA471, 0xCEE0507A20FD5119, 0xFB04CFFC14A9F4BF, 0xBD4406E923807AF2, - 0x375C02FF11010491, 0xA6EA4C2A59E173FF, 0xE0A606F0002CADDF, 0xE13BEAE6EBC07897, - 0xF069C2463E48EA10, 0x75BEE1A97089B5FA, 0x378F22F8DE0B8085, 0x9C726FC4D53D0D8B, - 0x71F6130A2D08F788, 0x7A9B20433FF6CF69, 0xFF49B7CD59BF6D61, 0xCCAAEE0D1CA9C6B3, - 0xC77889D86039D2AD, 0x7B378B5BEA9B0475, 0x6520BFA79D59AD66, 0x2441490CB8A37267, - 0xA715A66B7D5CF473, 0x9AE892C88334FD67, 0xD2FFE9AEC1D2169A, 0x790B993F18B18CBB, - 0xA0D02FBCF6A7B1AD, 0xA90833E6F151D0C1, 0x1AC7AFA37BD79BE0, 0xD5383628B2881A24, - 0xE5526F9D63F9F8F1, 0xC1F165A01A6D1F4D, 0x6CCEF8FF3FCFA3F2, 0x2030F18325E6DF48, - 0x289207230E3FB17A, 0x077B66F713A3C4B9, 0x9F39843CAF871754, 0x512FDA0F808ACCF3, - 0xF4D9801CD0CD1F14, 0x28A0C749ED323638, 0x94844CAFA671F01C, 0xD0E261876B8ACA51, - 0x8FC2A648A4792EA2, 0x8EF87282136AF5FE, 0x5FE6A54A9FBA6B40, 0xA3CC5B8FE6223D54, - 0xA8C3C0DD651BB01C, 0x625E9FDD534716F3, 0x1AB2604083C33AC5, 0xDE098853F8692F12, - 0x4B0813891BD87624, 0x4AB89C4553D182AD, 0x92C15AA2A3C27ADA, 0xFF2918D68191F5D9, - 0x06363174F641C325, 0x667112ADA74A2059, 0x4BD605D6B5E53D7D, 0xF2512C53663A14C8, - 0x21857BCB1852667C, 0xAFBEBD0369AEE228, 0x7049340E48FBFD6B, 0x50710E1924F46954, - 0x869A75E04A976A3F, 0x5A41ABBDD6373889, 0xA781778389B4B188, 0x21A3AFCED6C925B6, - 0x107226192EC10B42, 0x62A862E84EC2F9B1, 0x2B15E91659606DD7, 0x613934D1F9EC5A42, - 0x4DC3A96DC5361BAF, 0xC80BBA4CB5F12903, 0x3E3EDAE99A7D6987, 0x8F97B2D55941DCB0, - 0x4C9787364C3E4EC1, 0xEF0A2D07BEA90CA7, 0x5FABF32C70AEEAFB, 0x3356A5CFA8F23BF4 -}; - -const uint64_t t1ha_refval_2stream[81] = { 0x3C8426E33CB41606, - 0xFD74BE70EE73E617, 0xF43DE3CDD8A20486, 0x882FBCB37E8EA3BB, 0x1AA2CDD34CAA3D4B, - 0xEE755B2BFAE07ED5, 0xD4E225250D92E213, 0xA09B49083205965B, 0xD47B21724EF9EC9E, - 0xAC888FC3858CEE11, 0x94F820D85736F244, 0x1707951CCA920932, 0x8E0E45603F7877F0, - 0x9FD2592C0E3A7212, 0x9A66370F3AE3D427, 0xD33382D2161DE2B7, 0x9A35BE079DA7115F, - 0x73457C7FF58B4EC3, 0xBE8610BD53D7CE98, 0x65506DFE5CCD5371, 0x286A321AF9D5D9FA, - 0xB81EF9A7EF3C536D, 0x2CFDB5E6825C6E86, 0xB2A58CBFDFDD303A, 0xD26094A42B950635, - 0xA34D666A5F02AD9A, 0x0151E013EBCC72E5, 0x9254A6EA7FCB6BB5, 0x10C9361B3869DC2B, - 0xD7EC55A060606276, 0xA2FF7F8BF8976FFD, 0xB5181BB6852DCC88, 0x0EE394BB6178BAFF, - 0x3A8B4B400D21B89C, 0xEC270461970960FD, 0x615967FAB053877E, 0xFA51BF1CFEB4714C, - 0x29FDA8383070F375, 0xC3B663061BC52EDA, 0x192BBAF1F1A57923, 0x6D193B52F93C53AF, - 0x7F6F5639FE87CA1E, 0x69F7F9140B32EDC8, 0xD0F2416FB24325B6, 0x62C0E37FEDD49FF3, - 0x57866A4B809D373D, 0x9848D24BD935E137, 0xDFC905B66734D50A, 0x9A938DD194A68529, - 0x8276C44DF0625228, 0xA4B35D00AD67C0AB, 0x3D9CB359842DB452, 0x4241BFA8C23B267F, - 0x650FA517BEF15952, 0x782DE2ABD8C7B1E1, 0x4EAE456166CA3E15, 0x40CDF3A02614E337, - 0xAD84092C46102172, 0x0C68479B03F9A167, 0x7E1BA046749E181C, 0x3F3AB41A697382C1, - 0xC5E5DD6586EBFDC4, 0xFF926CD4EB02555C, 0x035CFE67F89E709B, 0x89F06AB6464A1B9D, - 0x8EFF58F3F7DEA758, 0x8B54AC657902089F, 0xC6C4F1F9F8DA4D64, 0xBDB729048AAAC93A, - 0xEA76BA628F5E5CD6, 0x742159B728B8A979, 0x6D151CD3C720E53D, 0xE97FFF9368FCDC42, - 0xCA5B38314914FBDA, 0xDD92C91D8B858EAE, 0x66E5F07CF647CBF2, 0xD4CF9B42F4985AFB, - 0x72AE17AC7D92F6B7, 0xB8206B22AB0472E1, 0x385876B5CFD42479, 0x03294A249EBE6B26 -}; - -const uint64_t t1ha_refval_2stream128[81] = { 0xCD2801D3B92237D6, - 0x10E4D47BD821546D, 0x9100704B9D65CD06, 0xD6951CB4016313EF, 0x24DB636F96F474DA, - 0x3F4AF7DF3C49E422, 0xBFF25B8AF143459B, 0xA157EC13538BE549, 0xD3F5F52C47DBD419, - 0x0EF3D7D735AF1575, 0x46B7B892823F7B1B, 0xEE22EA4655213289, 0x56AD76F02FE929BC, - 0x9CF6CD1AC886546E, 0xAF45CE47AEA0B933, 0x535F9DC09F3996B7, 0x1F0C3C01694AE128, - 0x18495069BE0766F7, 0x37E5FFB3D72A4CB1, 0x6D6C2E9299F30709, 0x4F39E693F50B41E3, - 0xB11FC4EF0658E116, 0x48BFAACB78E5079B, 0xE1B4C89C781B3AD0, 0x81D2F34888D333A1, - 0xF6D02270D2EA449C, 0xC884C3C2C3CE1503, 0x711AE16BA157A9B9, 0x1E6140C642558C9D, - 0x35AB3D238F5DC55B, 0x33F07B6AEF051177, 0xE57336776EEFA71C, 0x6D445F8318BA3752, - 0xD4F5F6631934C988, 0xD5E260085727C4A2, 0x5B54B41EC180B4FA, 0x7F5D75769C15A898, - 0xAE5A6DB850CA33C6, 0x038CCB8044663403, 0xDA16310133DC92B8, 0x6A2FFB7AB2B7CE2B, - 0xDC1832D9229BAE20, 0x8C62C479F5ABC9E4, 0x5EB7B617857C9CCB, 0xB79CF7D749A1E80D, - 0xDE7FAC3798324FD3, 0x8178911813685D06, 0x6A726CBD394D4410, 0x6CBE6B3280DA1113, - 0x6829BA4410CF1148, 0xFA7E417EB26C5BC6, 0x22ED87884D6E3A49, 0x15F1472D5115669D, - 0x2EA0B4C8BF69D318, 0xDFE87070AA545503, 0x6B4C14B5F7144AB9, 0xC1ED49C06126551A, - 0x351919FC425C3899, 0x7B569C0FA6F1BD3E, 0x713AC2350844CFFD, 0xE9367F9A638C2FF3, - 0x97F17D325AEA0786, 0xBCB907CC6CF75F91, 0x0CB7517DAF247719, 0xBE16093CC45BE8A9, - 0x786EEE97359AD6AB, 0xB7AFA4F326B97E78, 0x2694B67FE23E502E, 0x4CB492826E98E0B4, - 0x838D119F74A416C7, 0x70D6A91E4E5677FD, 0xF3E4027AD30000E6, 0x9BDF692795807F77, - 0x6A371F966E034A54, 0x8789CF41AE4D67EF, 0x02688755484D60AE, 0xD5834B3A4BF5CE42, - 0x9405FC61440DE25D, 0x35EB280A157979B6, 0x48D40D6A525297AC, 0x6A87DC185054BADA -}; - -/* *INDENT-ON* */ -/* clang-format on */ - -__cold int t1ha_selfcheck__t1ha2_atonce(void) { - return t1ha_selfcheck(t1ha2_atonce, t1ha_refval_2atonce); -} - -__cold static uint64_t thunk_atonce128(const void *data, size_t len, - uint64_t seed) { - uint64_t unused; - return t1ha2_atonce128(&unused, data, len, seed); -} - -__cold int t1ha_selfcheck__t1ha2_atonce128(void) { - return t1ha_selfcheck(thunk_atonce128, t1ha_refval_2atonce128); -} - -__cold static uint64_t thunk_stream(const void *data, size_t len, - uint64_t seed) { - t1ha_context_t ctx; - t1ha2_init(&ctx, seed, seed); - t1ha2_update(&ctx, data, len); - return t1ha2_final(&ctx, NULL); -} - -__cold static uint64_t thunk_stream128(const void *data, size_t len, - uint64_t seed) { - t1ha_context_t ctx; - t1ha2_init(&ctx, seed, seed); - t1ha2_update(&ctx, data, len); - uint64_t unused; - return t1ha2_final(&ctx, &unused); -} - -__cold int t1ha_selfcheck__t1ha2_stream(void) { - return t1ha_selfcheck(thunk_stream, t1ha_refval_2stream) | - t1ha_selfcheck(thunk_stream128, t1ha_refval_2stream128); -} - -__cold int t1ha_selfcheck__t1ha2(void) { - return t1ha_selfcheck__t1ha2_atonce() | t1ha_selfcheck__t1ha2_atonce128() | - t1ha_selfcheck__t1ha2_stream(); -} - -#endif /* T1HA2_DISABLED */ + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#ifndef T1HA2_DISABLED +#include "t1ha_bits.h" +#include "t1ha_selfcheck.h" + +/* *INDENT-OFF* */ +/* clang-format off */ + +const uint64_t t1ha_refval_2atonce[81] = { 0, + 0x772C7311BE32FF42, 0x444753D23F207E03, 0x71F6DF5DA3B4F532, 0x555859635365F660, + 0xE98808F1CD39C626, 0x2EB18FAF2163BB09, 0x7B9DD892C8019C87, 0xE2B1431C4DA4D15A, + 0x1984E718A5477F70, 0x08DD17B266484F79, 0x4C83A05D766AD550, 0x92DCEBB131D1907D, + 0xD67BC6FC881B8549, 0xF6A9886555FBF66B, 0x6E31616D7F33E25E, 0x36E31B7426E3049D, + 0x4F8E4FAF46A13F5F, 0x03EB0CB3253F819F, 0x636A7769905770D2, 0x3ADF3781D16D1148, + 0x92D19CB1818BC9C2, 0x283E68F4D459C533, 0xFA83A8A88DECAA04, 0x8C6F00368EAC538C, + 0x7B66B0CF3797B322, 0x5131E122FDABA3FF, 0x6E59FF515C08C7A9, 0xBA2C5269B2C377B0, + 0xA9D24FD368FE8A2B, 0x22DB13D32E33E891, 0x7B97DFC804B876E5, 0xC598BDFCD0E834F9, + 0xB256163D3687F5A7, 0x66D7A73C6AEF50B3, 0x25A7201C85D9E2A3, 0x911573EDA15299AA, + 0x5C0062B669E18E4C, 0x17734ADE08D54E28, 0xFFF036E33883F43B, 0xFE0756E7777DF11E, + 0x37972472D023F129, 0x6CFCE201B55C7F57, 0xE019D1D89F02B3E1, 0xAE5CC580FA1BB7E6, + 0x295695FB7E59FC3A, 0x76B6C820A40DD35E, 0xB1680A1768462B17, 0x2FB6AF279137DADA, + 0x28FB6B4366C78535, 0xEC278E53924541B1, 0x164F8AAB8A2A28B5, 0xB6C330AEAC4578AD, + 0x7F6F371070085084, 0x94DEAD60C0F448D3, 0x99737AC232C559EF, 0x6F54A6F9CA8EDD57, + 0x979B01E926BFCE0C, 0xF7D20BC85439C5B4, 0x64EDB27CD8087C12, 0x11488DE5F79C0BE2, + 0x25541DDD1680B5A4, 0x8B633D33BE9D1973, 0x404A3113ACF7F6C6, 0xC59DBDEF8550CD56, + 0x039D23C68F4F992C, 0x5BBB48E4BDD6FD86, 0x41E312248780DF5A, 0xD34791CE75D4E94F, + 0xED523E5D04DCDCFF, 0x7A6BCE0B6182D879, 0x21FB37483CAC28D8, 0x19A1B66E8DA878AD, + 0x6F804C5295B09ABE, 0x2A4BE5014115BA81, 0xA678ECC5FC924BE0, 0x50F7A54A99A36F59, + 0x0FD7E63A39A66452, 0x5AB1B213DD29C4E4, 0xF3ED80D9DF6534C5, 0xC736B12EF90615FD +}; + +const uint64_t t1ha_refval_2atonce128[81] = { 0x4EC7F6A48E33B00A, + 0xB7B7FAA5BD7D8C1E, 0x3269533F66534A76, 0x6C3EC6B687923BFC, 0xC096F5E7EFA471A9, + 0x79D8AFB550CEA471, 0xCEE0507A20FD5119, 0xFB04CFFC14A9F4BF, 0xBD4406E923807AF2, + 0x375C02FF11010491, 0xA6EA4C2A59E173FF, 0xE0A606F0002CADDF, 0xE13BEAE6EBC07897, + 0xF069C2463E48EA10, 0x75BEE1A97089B5FA, 0x378F22F8DE0B8085, 0x9C726FC4D53D0D8B, + 0x71F6130A2D08F788, 0x7A9B20433FF6CF69, 0xFF49B7CD59BF6D61, 0xCCAAEE0D1CA9C6B3, + 0xC77889D86039D2AD, 0x7B378B5BEA9B0475, 0x6520BFA79D59AD66, 0x2441490CB8A37267, + 0xA715A66B7D5CF473, 0x9AE892C88334FD67, 0xD2FFE9AEC1D2169A, 0x790B993F18B18CBB, + 0xA0D02FBCF6A7B1AD, 0xA90833E6F151D0C1, 0x1AC7AFA37BD79BE0, 0xD5383628B2881A24, + 0xE5526F9D63F9F8F1, 0xC1F165A01A6D1F4D, 0x6CCEF8FF3FCFA3F2, 0x2030F18325E6DF48, + 0x289207230E3FB17A, 0x077B66F713A3C4B9, 0x9F39843CAF871754, 0x512FDA0F808ACCF3, + 0xF4D9801CD0CD1F14, 0x28A0C749ED323638, 0x94844CAFA671F01C, 0xD0E261876B8ACA51, + 0x8FC2A648A4792EA2, 0x8EF87282136AF5FE, 0x5FE6A54A9FBA6B40, 0xA3CC5B8FE6223D54, + 0xA8C3C0DD651BB01C, 0x625E9FDD534716F3, 0x1AB2604083C33AC5, 0xDE098853F8692F12, + 0x4B0813891BD87624, 0x4AB89C4553D182AD, 0x92C15AA2A3C27ADA, 0xFF2918D68191F5D9, + 0x06363174F641C325, 0x667112ADA74A2059, 0x4BD605D6B5E53D7D, 0xF2512C53663A14C8, + 0x21857BCB1852667C, 0xAFBEBD0369AEE228, 0x7049340E48FBFD6B, 0x50710E1924F46954, + 0x869A75E04A976A3F, 0x5A41ABBDD6373889, 0xA781778389B4B188, 0x21A3AFCED6C925B6, + 0x107226192EC10B42, 0x62A862E84EC2F9B1, 0x2B15E91659606DD7, 0x613934D1F9EC5A42, + 0x4DC3A96DC5361BAF, 0xC80BBA4CB5F12903, 0x3E3EDAE99A7D6987, 0x8F97B2D55941DCB0, + 0x4C9787364C3E4EC1, 0xEF0A2D07BEA90CA7, 0x5FABF32C70AEEAFB, 0x3356A5CFA8F23BF4 +}; + +const uint64_t t1ha_refval_2stream[81] = { 0x3C8426E33CB41606, + 0xFD74BE70EE73E617, 0xF43DE3CDD8A20486, 0x882FBCB37E8EA3BB, 0x1AA2CDD34CAA3D4B, + 0xEE755B2BFAE07ED5, 0xD4E225250D92E213, 0xA09B49083205965B, 0xD47B21724EF9EC9E, + 0xAC888FC3858CEE11, 0x94F820D85736F244, 0x1707951CCA920932, 0x8E0E45603F7877F0, + 0x9FD2592C0E3A7212, 0x9A66370F3AE3D427, 0xD33382D2161DE2B7, 0x9A35BE079DA7115F, + 0x73457C7FF58B4EC3, 0xBE8610BD53D7CE98, 0x65506DFE5CCD5371, 0x286A321AF9D5D9FA, + 0xB81EF9A7EF3C536D, 0x2CFDB5E6825C6E86, 0xB2A58CBFDFDD303A, 0xD26094A42B950635, + 0xA34D666A5F02AD9A, 0x0151E013EBCC72E5, 0x9254A6EA7FCB6BB5, 0x10C9361B3869DC2B, + 0xD7EC55A060606276, 0xA2FF7F8BF8976FFD, 0xB5181BB6852DCC88, 0x0EE394BB6178BAFF, + 0x3A8B4B400D21B89C, 0xEC270461970960FD, 0x615967FAB053877E, 0xFA51BF1CFEB4714C, + 0x29FDA8383070F375, 0xC3B663061BC52EDA, 0x192BBAF1F1A57923, 0x6D193B52F93C53AF, + 0x7F6F5639FE87CA1E, 0x69F7F9140B32EDC8, 0xD0F2416FB24325B6, 0x62C0E37FEDD49FF3, + 0x57866A4B809D373D, 0x9848D24BD935E137, 0xDFC905B66734D50A, 0x9A938DD194A68529, + 0x8276C44DF0625228, 0xA4B35D00AD67C0AB, 0x3D9CB359842DB452, 0x4241BFA8C23B267F, + 0x650FA517BEF15952, 0x782DE2ABD8C7B1E1, 0x4EAE456166CA3E15, 0x40CDF3A02614E337, + 0xAD84092C46102172, 0x0C68479B03F9A167, 0x7E1BA046749E181C, 0x3F3AB41A697382C1, + 0xC5E5DD6586EBFDC4, 0xFF926CD4EB02555C, 0x035CFE67F89E709B, 0x89F06AB6464A1B9D, + 0x8EFF58F3F7DEA758, 0x8B54AC657902089F, 0xC6C4F1F9F8DA4D64, 0xBDB729048AAAC93A, + 0xEA76BA628F5E5CD6, 0x742159B728B8A979, 0x6D151CD3C720E53D, 0xE97FFF9368FCDC42, + 0xCA5B38314914FBDA, 0xDD92C91D8B858EAE, 0x66E5F07CF647CBF2, 0xD4CF9B42F4985AFB, + 0x72AE17AC7D92F6B7, 0xB8206B22AB0472E1, 0x385876B5CFD42479, 0x03294A249EBE6B26 +}; + +const uint64_t t1ha_refval_2stream128[81] = { 0xCD2801D3B92237D6, + 0x10E4D47BD821546D, 0x9100704B9D65CD06, 0xD6951CB4016313EF, 0x24DB636F96F474DA, + 0x3F4AF7DF3C49E422, 0xBFF25B8AF143459B, 0xA157EC13538BE549, 0xD3F5F52C47DBD419, + 0x0EF3D7D735AF1575, 0x46B7B892823F7B1B, 0xEE22EA4655213289, 0x56AD76F02FE929BC, + 0x9CF6CD1AC886546E, 0xAF45CE47AEA0B933, 0x535F9DC09F3996B7, 0x1F0C3C01694AE128, + 0x18495069BE0766F7, 0x37E5FFB3D72A4CB1, 0x6D6C2E9299F30709, 0x4F39E693F50B41E3, + 0xB11FC4EF0658E116, 0x48BFAACB78E5079B, 0xE1B4C89C781B3AD0, 0x81D2F34888D333A1, + 0xF6D02270D2EA449C, 0xC884C3C2C3CE1503, 0x711AE16BA157A9B9, 0x1E6140C642558C9D, + 0x35AB3D238F5DC55B, 0x33F07B6AEF051177, 0xE57336776EEFA71C, 0x6D445F8318BA3752, + 0xD4F5F6631934C988, 0xD5E260085727C4A2, 0x5B54B41EC180B4FA, 0x7F5D75769C15A898, + 0xAE5A6DB850CA33C6, 0x038CCB8044663403, 0xDA16310133DC92B8, 0x6A2FFB7AB2B7CE2B, + 0xDC1832D9229BAE20, 0x8C62C479F5ABC9E4, 0x5EB7B617857C9CCB, 0xB79CF7D749A1E80D, + 0xDE7FAC3798324FD3, 0x8178911813685D06, 0x6A726CBD394D4410, 0x6CBE6B3280DA1113, + 0x6829BA4410CF1148, 0xFA7E417EB26C5BC6, 0x22ED87884D6E3A49, 0x15F1472D5115669D, + 0x2EA0B4C8BF69D318, 0xDFE87070AA545503, 0x6B4C14B5F7144AB9, 0xC1ED49C06126551A, + 0x351919FC425C3899, 0x7B569C0FA6F1BD3E, 0x713AC2350844CFFD, 0xE9367F9A638C2FF3, + 0x97F17D325AEA0786, 0xBCB907CC6CF75F91, 0x0CB7517DAF247719, 0xBE16093CC45BE8A9, + 0x786EEE97359AD6AB, 0xB7AFA4F326B97E78, 0x2694B67FE23E502E, 0x4CB492826E98E0B4, + 0x838D119F74A416C7, 0x70D6A91E4E5677FD, 0xF3E4027AD30000E6, 0x9BDF692795807F77, + 0x6A371F966E034A54, 0x8789CF41AE4D67EF, 0x02688755484D60AE, 0xD5834B3A4BF5CE42, + 0x9405FC61440DE25D, 0x35EB280A157979B6, 0x48D40D6A525297AC, 0x6A87DC185054BADA +}; + +/* *INDENT-ON* */ +/* clang-format on */ + +__cold int t1ha_selfcheck__t1ha2_atonce(void) { + return t1ha_selfcheck(t1ha2_atonce, t1ha_refval_2atonce); +} + +__cold static uint64_t thunk_atonce128(const void *data, size_t len, + uint64_t seed) { + uint64_t unused; + return t1ha2_atonce128(&unused, data, len, seed); +} + +__cold int t1ha_selfcheck__t1ha2_atonce128(void) { + return t1ha_selfcheck(thunk_atonce128, t1ha_refval_2atonce128); +} + +__cold static uint64_t thunk_stream(const void *data, size_t len, + uint64_t seed) { + t1ha_context_t ctx; + t1ha2_init(&ctx, seed, seed); + t1ha2_update(&ctx, data, len); + return t1ha2_final(&ctx, NULL); +} + +__cold static uint64_t thunk_stream128(const void *data, size_t len, + uint64_t seed) { + t1ha_context_t ctx; + t1ha2_init(&ctx, seed, seed); + t1ha2_update(&ctx, data, len); + uint64_t unused; + return t1ha2_final(&ctx, &unused); +} + +__cold int t1ha_selfcheck__t1ha2_stream(void) { + return t1ha_selfcheck(thunk_stream, t1ha_refval_2stream) | + t1ha_selfcheck(thunk_stream128, t1ha_refval_2stream128); +} + +__cold int t1ha_selfcheck__t1ha2(void) { + return t1ha_selfcheck__t1ha2_atonce() | t1ha_selfcheck__t1ha2_atonce128() | + t1ha_selfcheck__t1ha2_stream(); +} + +#endif /* T1HA2_DISABLED */ diff --git a/contrib/libs/t1ha/src/t1ha_bits.h b/contrib/libs/t1ha/src/t1ha_bits.h index 5cd34a7496..93b6b51a54 100644 --- a/contrib/libs/t1ha/src/t1ha_bits.h +++ b/contrib/libs/t1ha/src/t1ha_bits.h @@ -1,406 +1,406 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#pragma once - -#if defined(_MSC_VER) -#pragma warning(disable : 4201) /* nameless struct/union */ -#if _MSC_VER > 1800 -#pragma warning(disable : 4464) /* relative include path contains '..' */ -#endif /* 1800 */ -#endif /* MSVC */ -#include "../t1ha.h" - -#ifndef T1HA_USE_FAST_ONESHOT_READ -/* Define it to 1 for little bit faster code. - * Unfortunately this may triggering a false-positive alarms from Valgrind, - * AddressSanitizer and other similar tool. - * So, define it to 0 for calmness if doubt. */ -#define T1HA_USE_FAST_ONESHOT_READ 1 -#endif /* T1HA_USE_FAST_ONESHOT_READ */ - -/*****************************************************************************/ - -#include <assert.h> /* for assert() */ -#include <stdbool.h> /* for bool */ -#include <string.h> /* for memcpy() */ - -#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ && \ - __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ -#error Unsupported byte order. -#endif - -#define T1HA_UNALIGNED_ACCESS__UNABLE 0 -#define T1HA_UNALIGNED_ACCESS__SLOW 1 -#define T1HA_UNALIGNED_ACCESS__EFFICIENT 2 - -#ifndef T1HA_SYS_UNALIGNED_ACCESS -#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) -#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__EFFICIENT -#elif defined(__ia32__) -#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__EFFICIENT -#elif defined(__e2k__) -#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__SLOW -#elif defined(__ARM_FEATURE_UNALIGNED) -#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__EFFICIENT -#else -#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__UNABLE -#endif -#endif /* T1HA_SYS_UNALIGNED_ACCESS */ - -#define ALIGNMENT_16 2 -#define ALIGNMENT_32 4 -#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul -#define ALIGNMENT_64 8 -#else -#define ALIGNMENT_64 4 -#endif - -#ifndef PAGESIZE -#define PAGESIZE 4096 -#endif /* PAGESIZE */ - -/***************************************************************************/ - -#ifndef __has_builtin -#define __has_builtin(x) (0) -#endif - -#ifndef __has_warning -#define __has_warning(x) (0) -#endif - -#ifndef __has_feature -#define __has_feature(x) (0) -#endif - -#ifndef __has_extension -#define __has_extension(x) (0) -#endif - -#if __has_feature(address_sanitizer) -#define __SANITIZE_ADDRESS__ 1 -#endif - -#ifndef __optimize + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#pragma once + +#if defined(_MSC_VER) +#pragma warning(disable : 4201) /* nameless struct/union */ +#if _MSC_VER > 1800 +#pragma warning(disable : 4464) /* relative include path contains '..' */ +#endif /* 1800 */ +#endif /* MSVC */ +#include "../t1ha.h" + +#ifndef T1HA_USE_FAST_ONESHOT_READ +/* Define it to 1 for little bit faster code. + * Unfortunately this may triggering a false-positive alarms from Valgrind, + * AddressSanitizer and other similar tool. + * So, define it to 0 for calmness if doubt. */ +#define T1HA_USE_FAST_ONESHOT_READ 1 +#endif /* T1HA_USE_FAST_ONESHOT_READ */ + +/*****************************************************************************/ + +#include <assert.h> /* for assert() */ +#include <stdbool.h> /* for bool */ +#include <string.h> /* for memcpy() */ + +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ && \ + __BYTE_ORDER__ != __ORDER_BIG_ENDIAN__ +#error Unsupported byte order. +#endif + +#define T1HA_UNALIGNED_ACCESS__UNABLE 0 +#define T1HA_UNALIGNED_ACCESS__SLOW 1 +#define T1HA_UNALIGNED_ACCESS__EFFICIENT 2 + +#ifndef T1HA_SYS_UNALIGNED_ACCESS +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) +#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__EFFICIENT +#elif defined(__ia32__) +#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__EFFICIENT +#elif defined(__e2k__) +#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__SLOW +#elif defined(__ARM_FEATURE_UNALIGNED) +#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__EFFICIENT +#else +#define T1HA_SYS_UNALIGNED_ACCESS T1HA_UNALIGNED_ACCESS__UNABLE +#endif +#endif /* T1HA_SYS_UNALIGNED_ACCESS */ + +#define ALIGNMENT_16 2 +#define ALIGNMENT_32 4 +#if UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul +#define ALIGNMENT_64 8 +#else +#define ALIGNMENT_64 4 +#endif + +#ifndef PAGESIZE +#define PAGESIZE 4096 +#endif /* PAGESIZE */ + +/***************************************************************************/ + +#ifndef __has_builtin +#define __has_builtin(x) (0) +#endif + +#ifndef __has_warning +#define __has_warning(x) (0) +#endif + +#ifndef __has_feature +#define __has_feature(x) (0) +#endif + +#ifndef __has_extension +#define __has_extension(x) (0) +#endif + +#if __has_feature(address_sanitizer) +#define __SANITIZE_ADDRESS__ 1 +#endif + +#ifndef __optimize #if defined(__clang__) && !__has_attribute(__optimize__) -#define __optimize(ops) +#define __optimize(ops) #elif defined(__GNUC__) || __has_attribute(__optimize__) #define __optimize(ops) __attribute__((__optimize__(ops))) -#else -#define __optimize(ops) -#endif -#endif /* __optimize */ - -#ifndef __cold -#if defined(__OPTIMIZE__) -#if defined(__e2k__) +#else +#define __optimize(ops) +#endif +#endif /* __optimize */ + +#ifndef __cold +#if defined(__OPTIMIZE__) +#if defined(__e2k__) #define __cold __optimize(1) __attribute__((__cold__)) #elif defined(__clang__) && !__has_attribute(__cold__) && \ __has_attribute(__section__) -/* just put infrequently used functions in separate section */ +/* just put infrequently used functions in separate section */ #define __cold __attribute__((__section__("text.unlikely"))) __optimize("Os") #elif defined(__GNUC__) || __has_attribute(__cold__) #define __cold __attribute__((__cold__)) __optimize("Os") -#else -#define __cold __optimize("Os") -#endif -#else -#define __cold -#endif -#endif /* __cold */ - -#if __GNUC_PREREQ(4, 4) || defined(__clang__) - -#if defined(__ia32__) || defined(__e2k__) -#include <x86intrin.h> -#endif - -#if defined(__ia32__) && !defined(__cpuid_count) -#include <cpuid.h> -#endif - -#if defined(__e2k__) +#else +#define __cold __optimize("Os") +#endif +#else +#define __cold +#endif +#endif /* __cold */ + +#if __GNUC_PREREQ(4, 4) || defined(__clang__) + +#if defined(__ia32__) || defined(__e2k__) +#include <x86intrin.h> +#endif + +#if defined(__ia32__) && !defined(__cpuid_count) +#include <cpuid.h> +#endif + +#if defined(__e2k__) #include <e2kbuiltin.h> -#endif - -#ifndef likely -#define likely(cond) __builtin_expect(!!(cond), 1) -#endif - -#ifndef unlikely -#define unlikely(cond) __builtin_expect(!!(cond), 0) -#endif - -#if __GNUC_PREREQ(4, 5) || __has_builtin(__builtin_unreachable) -#define unreachable() __builtin_unreachable() -#endif - -#define bswap64(v) __builtin_bswap64(v) -#define bswap32(v) __builtin_bswap32(v) -#if __GNUC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16) -#define bswap16(v) __builtin_bswap16(v) -#endif - +#endif + +#ifndef likely +#define likely(cond) __builtin_expect(!!(cond), 1) +#endif + +#ifndef unlikely +#define unlikely(cond) __builtin_expect(!!(cond), 0) +#endif + +#if __GNUC_PREREQ(4, 5) || __has_builtin(__builtin_unreachable) +#define unreachable() __builtin_unreachable() +#endif + +#define bswap64(v) __builtin_bswap64(v) +#define bswap32(v) __builtin_bswap32(v) +#if __GNUC_PREREQ(4, 8) || __has_builtin(__builtin_bswap16) +#define bswap16(v) __builtin_bswap16(v) +#endif + #if !defined(__maybe_unused) && \ (__GNUC_PREREQ(4, 3) || __has_attribute(__unused__)) #define __maybe_unused __attribute__((__unused__)) -#endif - -#if !defined(__always_inline) && \ +#endif + +#if !defined(__always_inline) && \ (__GNUC_PREREQ(3, 2) || __has_attribute(__always_inline__)) #define __always_inline __inline __attribute__((__always_inline__)) -#endif - -#if defined(__e2k__) - -#if __iset__ >= 3 -#define mul_64x64_high(a, b) __builtin_e2k_umulhd(a, b) -#endif /* __iset__ >= 3 */ - -#if __iset__ >= 5 -static __maybe_unused __always_inline unsigned -e2k_add64carry_first(uint64_t base, uint64_t addend, uint64_t *sum) { - *sum = base + addend; - return (unsigned)__builtin_e2k_addcd_c(base, addend, 0); -} -#define add64carry_first(base, addend, sum) \ - e2k_add64carry_first(base, addend, sum) - -static __maybe_unused __always_inline unsigned -e2k_add64carry_next(unsigned carry, uint64_t base, uint64_t addend, - uint64_t *sum) { - *sum = __builtin_e2k_addcd(base, addend, carry); - return (unsigned)__builtin_e2k_addcd_c(base, addend, carry); -} -#define add64carry_next(carry, base, addend, sum) \ - e2k_add64carry_next(carry, base, addend, sum) - -static __maybe_unused __always_inline void e2k_add64carry_last(unsigned carry, - uint64_t base, - uint64_t addend, - uint64_t *sum) { - *sum = __builtin_e2k_addcd(base, addend, carry); -} -#define add64carry_last(carry, base, addend, sum) \ - e2k_add64carry_last(carry, base, addend, sum) -#endif /* __iset__ >= 5 */ - -#define fetch64_be_aligned(ptr) ((uint64_t)__builtin_e2k_ld_64s_be(ptr)) -#define fetch32_be_aligned(ptr) ((uint32_t)__builtin_e2k_ld_32u_be(ptr)) - -#endif /* __e2k__ Elbrus */ - -#elif defined(_MSC_VER) - -#if _MSC_FULL_VER < 190024234 && defined(_M_IX86) -#pragma message( \ - "For AES-NI at least \"Microsoft C/C++ Compiler\" version 19.00.24234 (Visual Studio 2015 Update 3) is required.") -#endif -#if _MSC_FULL_VER < 191526730 -#pragma message( \ - "It is recommended to use \"Microsoft C/C++ Compiler\" version 19.15.26730 (Visual Studio 2017 15.8) or newer.") -#endif -#if _MSC_FULL_VER < 180040629 -#error At least "Microsoft C/C++ Compiler" version 18.00.40629 (Visual Studio 2013 Update 5) is required. -#endif - -#pragma warning(push, 1) - -#include <intrin.h> -#include <stdlib.h> -#define likely(cond) (cond) -#define unlikely(cond) (cond) -#define unreachable() __assume(0) -#define bswap64(v) _byteswap_uint64(v) -#define bswap32(v) _byteswap_ulong(v) -#define bswap16(v) _byteswap_ushort(v) -#define rot64(v, s) _rotr64(v, s) -#define rot32(v, s) _rotr(v, s) -#define __always_inline __forceinline - -#if defined(_M_X64) || defined(_M_IA64) -#pragma intrinsic(_umul128) -#define mul_64x64_128(a, b, ph) _umul128(a, b, ph) -#pragma intrinsic(_addcarry_u64) -#define add64carry_first(base, addend, sum) _addcarry_u64(0, base, addend, sum) -#define add64carry_next(carry, base, addend, sum) \ - _addcarry_u64(carry, base, addend, sum) -#define add64carry_last(carry, base, addend, sum) \ - (void)_addcarry_u64(carry, base, addend, sum) -#endif - -#if defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64) -#pragma intrinsic(__umulh) -#define mul_64x64_high(a, b) __umulh(a, b) -#endif - -#if defined(_M_IX86) -#pragma intrinsic(__emulu) -#define mul_32x32_64(a, b) __emulu(a, b) - -#if _MSC_VER >= 1915 /* LY: workaround for SSA-optimizer bug */ -#pragma intrinsic(_addcarry_u32) -#define add32carry_first(base, addend, sum) _addcarry_u32(0, base, addend, sum) -#define add32carry_next(carry, base, addend, sum) \ - _addcarry_u32(carry, base, addend, sum) -#define add32carry_last(carry, base, addend, sum) \ - (void)_addcarry_u32(carry, base, addend, sum) - -static __forceinline char -msvc32_add64carry_first(uint64_t base, uint64_t addend, uint64_t *sum) { - uint32_t *const sum32 = (uint32_t *)sum; - const uint32_t base_32l = (uint32_t)base; - const uint32_t base_32h = (uint32_t)(base >> 32); - const uint32_t addend_32l = (uint32_t)addend; - const uint32_t addend_32h = (uint32_t)(addend >> 32); - return add32carry_next(add32carry_first(base_32l, addend_32l, sum32), - base_32h, addend_32h, sum32 + 1); -} -#define add64carry_first(base, addend, sum) \ - msvc32_add64carry_first(base, addend, sum) - -static __forceinline char msvc32_add64carry_next(char carry, uint64_t base, - uint64_t addend, - uint64_t *sum) { - uint32_t *const sum32 = (uint32_t *)sum; - const uint32_t base_32l = (uint32_t)base; - const uint32_t base_32h = (uint32_t)(base >> 32); - const uint32_t addend_32l = (uint32_t)addend; - const uint32_t addend_32h = (uint32_t)(addend >> 32); - return add32carry_next(add32carry_next(carry, base_32l, addend_32l, sum32), - base_32h, addend_32h, sum32 + 1); -} -#define add64carry_next(carry, base, addend, sum) \ - msvc32_add64carry_next(carry, base, addend, sum) - -static __forceinline void msvc32_add64carry_last(char carry, uint64_t base, - uint64_t addend, - uint64_t *sum) { - uint32_t *const sum32 = (uint32_t *)sum; - const uint32_t base_32l = (uint32_t)base; - const uint32_t base_32h = (uint32_t)(base >> 32); - const uint32_t addend_32l = (uint32_t)addend; - const uint32_t addend_32h = (uint32_t)(addend >> 32); - add32carry_last(add32carry_next(carry, base_32l, addend_32l, sum32), base_32h, - addend_32h, sum32 + 1); -} -#define add64carry_last(carry, base, addend, sum) \ - msvc32_add64carry_last(carry, base, addend, sum) -#endif /* _MSC_FULL_VER >= 190024231 */ - -#elif defined(_M_ARM) -#define mul_32x32_64(a, b) _arm_umull(a, b) -#endif - -#pragma warning(pop) -#pragma warning(disable : 4514) /* 'xyz': unreferenced inline function \ - has been removed */ -#pragma warning(disable : 4710) /* 'xyz': function not inlined */ -#pragma warning(disable : 4711) /* function 'xyz' selected for \ - automatic inline expansion */ -#pragma warning(disable : 4127) /* conditional expression is constant */ -#pragma warning(disable : 4702) /* unreachable code */ -#endif /* Compiler */ - -#ifndef likely -#define likely(cond) (cond) -#endif -#ifndef unlikely -#define unlikely(cond) (cond) -#endif -#ifndef __maybe_unused -#define __maybe_unused -#endif -#ifndef __always_inline -#define __always_inline __inline -#endif -#ifndef unreachable -#define unreachable() \ - do { \ - } while (1) -#endif - -#ifndef bswap64 -#if defined(bswap_64) -#define bswap64 bswap_64 -#elif defined(__bswap_64) -#define bswap64 __bswap_64 -#else -static __always_inline uint64_t bswap64(uint64_t v) { - return v << 56 | v >> 56 | ((v << 40) & UINT64_C(0x00ff000000000000)) | - ((v << 24) & UINT64_C(0x0000ff0000000000)) | - ((v << 8) & UINT64_C(0x000000ff00000000)) | - ((v >> 8) & UINT64_C(0x00000000ff000000)) | - ((v >> 24) & UINT64_C(0x0000000000ff0000)) | - ((v >> 40) & UINT64_C(0x000000000000ff00)); -} -#endif -#endif /* bswap64 */ - -#ifndef bswap32 -#if defined(bswap_32) -#define bswap32 bswap_32 -#elif defined(__bswap_32) -#define bswap32 __bswap_32 -#else -static __always_inline uint32_t bswap32(uint32_t v) { - return v << 24 | v >> 24 | ((v << 8) & UINT32_C(0x00ff0000)) | - ((v >> 8) & UINT32_C(0x0000ff00)); -} -#endif -#endif /* bswap32 */ - -#ifndef bswap16 -#if defined(bswap_16) -#define bswap16 bswap_16 -#elif defined(__bswap_16) -#define bswap16 __bswap_16 -#else -static __always_inline uint16_t bswap16(uint16_t v) { return v << 8 | v >> 8; } -#endif -#endif /* bswap16 */ - +#endif + +#if defined(__e2k__) + +#if __iset__ >= 3 +#define mul_64x64_high(a, b) __builtin_e2k_umulhd(a, b) +#endif /* __iset__ >= 3 */ + +#if __iset__ >= 5 +static __maybe_unused __always_inline unsigned +e2k_add64carry_first(uint64_t base, uint64_t addend, uint64_t *sum) { + *sum = base + addend; + return (unsigned)__builtin_e2k_addcd_c(base, addend, 0); +} +#define add64carry_first(base, addend, sum) \ + e2k_add64carry_first(base, addend, sum) + +static __maybe_unused __always_inline unsigned +e2k_add64carry_next(unsigned carry, uint64_t base, uint64_t addend, + uint64_t *sum) { + *sum = __builtin_e2k_addcd(base, addend, carry); + return (unsigned)__builtin_e2k_addcd_c(base, addend, carry); +} +#define add64carry_next(carry, base, addend, sum) \ + e2k_add64carry_next(carry, base, addend, sum) + +static __maybe_unused __always_inline void e2k_add64carry_last(unsigned carry, + uint64_t base, + uint64_t addend, + uint64_t *sum) { + *sum = __builtin_e2k_addcd(base, addend, carry); +} +#define add64carry_last(carry, base, addend, sum) \ + e2k_add64carry_last(carry, base, addend, sum) +#endif /* __iset__ >= 5 */ + +#define fetch64_be_aligned(ptr) ((uint64_t)__builtin_e2k_ld_64s_be(ptr)) +#define fetch32_be_aligned(ptr) ((uint32_t)__builtin_e2k_ld_32u_be(ptr)) + +#endif /* __e2k__ Elbrus */ + +#elif defined(_MSC_VER) + +#if _MSC_FULL_VER < 190024234 && defined(_M_IX86) +#pragma message( \ + "For AES-NI at least \"Microsoft C/C++ Compiler\" version 19.00.24234 (Visual Studio 2015 Update 3) is required.") +#endif +#if _MSC_FULL_VER < 191526730 +#pragma message( \ + "It is recommended to use \"Microsoft C/C++ Compiler\" version 19.15.26730 (Visual Studio 2017 15.8) or newer.") +#endif +#if _MSC_FULL_VER < 180040629 +#error At least "Microsoft C/C++ Compiler" version 18.00.40629 (Visual Studio 2013 Update 5) is required. +#endif + +#pragma warning(push, 1) + +#include <intrin.h> +#include <stdlib.h> +#define likely(cond) (cond) +#define unlikely(cond) (cond) +#define unreachable() __assume(0) +#define bswap64(v) _byteswap_uint64(v) +#define bswap32(v) _byteswap_ulong(v) +#define bswap16(v) _byteswap_ushort(v) +#define rot64(v, s) _rotr64(v, s) +#define rot32(v, s) _rotr(v, s) +#define __always_inline __forceinline + +#if defined(_M_X64) || defined(_M_IA64) +#pragma intrinsic(_umul128) +#define mul_64x64_128(a, b, ph) _umul128(a, b, ph) +#pragma intrinsic(_addcarry_u64) +#define add64carry_first(base, addend, sum) _addcarry_u64(0, base, addend, sum) +#define add64carry_next(carry, base, addend, sum) \ + _addcarry_u64(carry, base, addend, sum) +#define add64carry_last(carry, base, addend, sum) \ + (void)_addcarry_u64(carry, base, addend, sum) +#endif + +#if defined(_M_ARM64) || defined(_M_X64) || defined(_M_IA64) +#pragma intrinsic(__umulh) +#define mul_64x64_high(a, b) __umulh(a, b) +#endif + +#if defined(_M_IX86) +#pragma intrinsic(__emulu) +#define mul_32x32_64(a, b) __emulu(a, b) + +#if _MSC_VER >= 1915 /* LY: workaround for SSA-optimizer bug */ +#pragma intrinsic(_addcarry_u32) +#define add32carry_first(base, addend, sum) _addcarry_u32(0, base, addend, sum) +#define add32carry_next(carry, base, addend, sum) \ + _addcarry_u32(carry, base, addend, sum) +#define add32carry_last(carry, base, addend, sum) \ + (void)_addcarry_u32(carry, base, addend, sum) + +static __forceinline char +msvc32_add64carry_first(uint64_t base, uint64_t addend, uint64_t *sum) { + uint32_t *const sum32 = (uint32_t *)sum; + const uint32_t base_32l = (uint32_t)base; + const uint32_t base_32h = (uint32_t)(base >> 32); + const uint32_t addend_32l = (uint32_t)addend; + const uint32_t addend_32h = (uint32_t)(addend >> 32); + return add32carry_next(add32carry_first(base_32l, addend_32l, sum32), + base_32h, addend_32h, sum32 + 1); +} +#define add64carry_first(base, addend, sum) \ + msvc32_add64carry_first(base, addend, sum) + +static __forceinline char msvc32_add64carry_next(char carry, uint64_t base, + uint64_t addend, + uint64_t *sum) { + uint32_t *const sum32 = (uint32_t *)sum; + const uint32_t base_32l = (uint32_t)base; + const uint32_t base_32h = (uint32_t)(base >> 32); + const uint32_t addend_32l = (uint32_t)addend; + const uint32_t addend_32h = (uint32_t)(addend >> 32); + return add32carry_next(add32carry_next(carry, base_32l, addend_32l, sum32), + base_32h, addend_32h, sum32 + 1); +} +#define add64carry_next(carry, base, addend, sum) \ + msvc32_add64carry_next(carry, base, addend, sum) + +static __forceinline void msvc32_add64carry_last(char carry, uint64_t base, + uint64_t addend, + uint64_t *sum) { + uint32_t *const sum32 = (uint32_t *)sum; + const uint32_t base_32l = (uint32_t)base; + const uint32_t base_32h = (uint32_t)(base >> 32); + const uint32_t addend_32l = (uint32_t)addend; + const uint32_t addend_32h = (uint32_t)(addend >> 32); + add32carry_last(add32carry_next(carry, base_32l, addend_32l, sum32), base_32h, + addend_32h, sum32 + 1); +} +#define add64carry_last(carry, base, addend, sum) \ + msvc32_add64carry_last(carry, base, addend, sum) +#endif /* _MSC_FULL_VER >= 190024231 */ + +#elif defined(_M_ARM) +#define mul_32x32_64(a, b) _arm_umull(a, b) +#endif + +#pragma warning(pop) +#pragma warning(disable : 4514) /* 'xyz': unreferenced inline function \ + has been removed */ +#pragma warning(disable : 4710) /* 'xyz': function not inlined */ +#pragma warning(disable : 4711) /* function 'xyz' selected for \ + automatic inline expansion */ +#pragma warning(disable : 4127) /* conditional expression is constant */ +#pragma warning(disable : 4702) /* unreachable code */ +#endif /* Compiler */ + +#ifndef likely +#define likely(cond) (cond) +#endif +#ifndef unlikely +#define unlikely(cond) (cond) +#endif +#ifndef __maybe_unused +#define __maybe_unused +#endif +#ifndef __always_inline +#define __always_inline __inline +#endif +#ifndef unreachable +#define unreachable() \ + do { \ + } while (1) +#endif + +#ifndef bswap64 +#if defined(bswap_64) +#define bswap64 bswap_64 +#elif defined(__bswap_64) +#define bswap64 __bswap_64 +#else +static __always_inline uint64_t bswap64(uint64_t v) { + return v << 56 | v >> 56 | ((v << 40) & UINT64_C(0x00ff000000000000)) | + ((v << 24) & UINT64_C(0x0000ff0000000000)) | + ((v << 8) & UINT64_C(0x000000ff00000000)) | + ((v >> 8) & UINT64_C(0x00000000ff000000)) | + ((v >> 24) & UINT64_C(0x0000000000ff0000)) | + ((v >> 40) & UINT64_C(0x000000000000ff00)); +} +#endif +#endif /* bswap64 */ + +#ifndef bswap32 +#if defined(bswap_32) +#define bswap32 bswap_32 +#elif defined(__bswap_32) +#define bswap32 __bswap_32 +#else +static __always_inline uint32_t bswap32(uint32_t v) { + return v << 24 | v >> 24 | ((v << 8) & UINT32_C(0x00ff0000)) | + ((v >> 8) & UINT32_C(0x0000ff00)); +} +#endif +#endif /* bswap32 */ + +#ifndef bswap16 +#if defined(bswap_16) +#define bswap16 bswap_16 +#elif defined(__bswap_16) +#define bswap16 __bswap_16 +#else +static __always_inline uint16_t bswap16(uint16_t v) { return v << 8 | v >> 8; } +#endif +#endif /* bswap16 */ + #if defined(__ia32__) || \ T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT /* The __builtin_assume_aligned() leads gcc/clang to load values into the @@ -411,844 +411,844 @@ static __always_inline uint16_t bswap16(uint16_t v) { return v << 8 | v >> 8; } #define read_aligned(ptr, bits) (*(const uint##bits##_t *__restrict)(ptr)) #endif /* __ia32__ */ -#ifndef read_unaligned +#ifndef read_unaligned #if defined(__GNUC__) || __has_attribute(__packed__) -typedef struct { - uint8_t unaligned_8; - uint16_t unaligned_16; - uint32_t unaligned_32; - uint64_t unaligned_64; +typedef struct { + uint8_t unaligned_8; + uint16_t unaligned_16; + uint32_t unaligned_32; + uint64_t unaligned_64; } __attribute__((__packed__)) t1ha_unaligned_proxy; -#define read_unaligned(ptr, bits) \ - (((const t1ha_unaligned_proxy *)((const uint8_t *)(ptr)-offsetof( \ - t1ha_unaligned_proxy, unaligned_##bits))) \ - ->unaligned_##bits) -#elif defined(_MSC_VER) -#pragma warning( \ - disable : 4235) /* nonstandard extension used: '__unaligned' \ - * keyword not supported on this architecture */ -#define read_unaligned(ptr, bits) (*(const __unaligned uint##bits##_t *)(ptr)) -#else -#pragma pack(push, 1) -typedef struct { - uint8_t unaligned_8; - uint16_t unaligned_16; - uint32_t unaligned_32; - uint64_t unaligned_64; -} t1ha_unaligned_proxy; -#pragma pack(pop) -#define read_unaligned(ptr, bits) \ - (((const t1ha_unaligned_proxy *)((const uint8_t *)(ptr)-offsetof( \ - t1ha_unaligned_proxy, unaligned_##bits))) \ - ->unaligned_##bits) -#endif -#endif /* read_unaligned */ - -#ifndef read_aligned -#if __GNUC_PREREQ(4, 8) || __has_builtin(__builtin_assume_aligned) -#define read_aligned(ptr, bits) \ - (*(const uint##bits##_t *)__builtin_assume_aligned(ptr, ALIGNMENT_##bits)) +#define read_unaligned(ptr, bits) \ + (((const t1ha_unaligned_proxy *)((const uint8_t *)(ptr)-offsetof( \ + t1ha_unaligned_proxy, unaligned_##bits))) \ + ->unaligned_##bits) +#elif defined(_MSC_VER) +#pragma warning( \ + disable : 4235) /* nonstandard extension used: '__unaligned' \ + * keyword not supported on this architecture */ +#define read_unaligned(ptr, bits) (*(const __unaligned uint##bits##_t *)(ptr)) +#else +#pragma pack(push, 1) +typedef struct { + uint8_t unaligned_8; + uint16_t unaligned_16; + uint32_t unaligned_32; + uint64_t unaligned_64; +} t1ha_unaligned_proxy; +#pragma pack(pop) +#define read_unaligned(ptr, bits) \ + (((const t1ha_unaligned_proxy *)((const uint8_t *)(ptr)-offsetof( \ + t1ha_unaligned_proxy, unaligned_##bits))) \ + ->unaligned_##bits) +#endif +#endif /* read_unaligned */ + +#ifndef read_aligned +#if __GNUC_PREREQ(4, 8) || __has_builtin(__builtin_assume_aligned) +#define read_aligned(ptr, bits) \ + (*(const uint##bits##_t *)__builtin_assume_aligned(ptr, ALIGNMENT_##bits)) #elif (__GNUC_PREREQ(3, 3) || __has_attribute(__aligned__)) && \ !defined(__clang__) -#define read_aligned(ptr, bits) \ +#define read_aligned(ptr, bits) \ (*(const uint##bits##_t \ __attribute__((__aligned__(ALIGNMENT_##bits))) *)(ptr)) #elif __has_attribute(__assume_aligned__) - -static __always_inline const + +static __always_inline const uint16_t *__attribute__((__assume_aligned__(ALIGNMENT_16))) - cast_aligned_16(const void *ptr) { - return (const uint16_t *)ptr; -} -static __always_inline const + cast_aligned_16(const void *ptr) { + return (const uint16_t *)ptr; +} +static __always_inline const uint32_t *__attribute__((__assume_aligned__(ALIGNMENT_32))) - cast_aligned_32(const void *ptr) { - return (const uint32_t *)ptr; -} -static __always_inline const + cast_aligned_32(const void *ptr) { + return (const uint32_t *)ptr; +} +static __always_inline const uint64_t *__attribute__((__assume_aligned__(ALIGNMENT_64))) - cast_aligned_64(const void *ptr) { - return (const uint64_t *)ptr; -} - -#define read_aligned(ptr, bits) (*cast_aligned_##bits(ptr)) - -#elif defined(_MSC_VER) -#define read_aligned(ptr, bits) \ - (*(const __declspec(align(ALIGNMENT_##bits)) uint##bits##_t *)(ptr)) -#else -#define read_aligned(ptr, bits) (*(const uint##bits##_t *)(ptr)) -#endif -#endif /* read_aligned */ - -#ifndef prefetch -#if (__GNUC_PREREQ(4, 0) || __has_builtin(__builtin_prefetch)) && \ - !defined(__ia32__) -#define prefetch(ptr) __builtin_prefetch(ptr) -#elif defined(_M_ARM64) || defined(_M_ARM) -#define prefetch(ptr) __prefetch(ptr) -#else -#define prefetch(ptr) \ - do { \ - (void)(ptr); \ - } while (0) -#endif -#endif /* prefetch */ - -#if __has_warning("-Wconstant-logical-operand") -#if defined(__clang__) -#pragma clang diagnostic ignored "-Wconstant-logical-operand" -#elif defined(__GNUC__) -#pragma GCC diagnostic ignored "-Wconstant-logical-operand" -#else -#pragma warning disable "constant-logical-operand" -#endif -#endif /* -Wconstant-logical-operand */ - -#if __has_warning("-Wtautological-pointer-compare") -#if defined(__clang__) -#pragma clang diagnostic ignored "-Wtautological-pointer-compare" -#elif defined(__GNUC__) -#pragma GCC diagnostic ignored "-Wtautological-pointer-compare" -#else -#pragma warning disable "tautological-pointer-compare" -#endif -#endif /* -Wtautological-pointer-compare */ - -/***************************************************************************/ - -#if __GNUC_PREREQ(4, 0) -#pragma GCC visibility push(hidden) -#endif /* __GNUC_PREREQ(4,0) */ - -/*---------------------------------------------------------- Little Endian */ - -#ifndef fetch16_le_aligned + cast_aligned_64(const void *ptr) { + return (const uint64_t *)ptr; +} + +#define read_aligned(ptr, bits) (*cast_aligned_##bits(ptr)) + +#elif defined(_MSC_VER) +#define read_aligned(ptr, bits) \ + (*(const __declspec(align(ALIGNMENT_##bits)) uint##bits##_t *)(ptr)) +#else +#define read_aligned(ptr, bits) (*(const uint##bits##_t *)(ptr)) +#endif +#endif /* read_aligned */ + +#ifndef prefetch +#if (__GNUC_PREREQ(4, 0) || __has_builtin(__builtin_prefetch)) && \ + !defined(__ia32__) +#define prefetch(ptr) __builtin_prefetch(ptr) +#elif defined(_M_ARM64) || defined(_M_ARM) +#define prefetch(ptr) __prefetch(ptr) +#else +#define prefetch(ptr) \ + do { \ + (void)(ptr); \ + } while (0) +#endif +#endif /* prefetch */ + +#if __has_warning("-Wconstant-logical-operand") +#if defined(__clang__) +#pragma clang diagnostic ignored "-Wconstant-logical-operand" +#elif defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wconstant-logical-operand" +#else +#pragma warning disable "constant-logical-operand" +#endif +#endif /* -Wconstant-logical-operand */ + +#if __has_warning("-Wtautological-pointer-compare") +#if defined(__clang__) +#pragma clang diagnostic ignored "-Wtautological-pointer-compare" +#elif defined(__GNUC__) +#pragma GCC diagnostic ignored "-Wtautological-pointer-compare" +#else +#pragma warning disable "tautological-pointer-compare" +#endif +#endif /* -Wtautological-pointer-compare */ + +/***************************************************************************/ + +#if __GNUC_PREREQ(4, 0) +#pragma GCC visibility push(hidden) +#endif /* __GNUC_PREREQ(4,0) */ + +/*---------------------------------------------------------- Little Endian */ + +#ifndef fetch16_le_aligned static __maybe_unused __always_inline uint16_t fetch16_le_aligned(const void *v) { - assert(((uintptr_t)v) % ALIGNMENT_16 == 0); -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - return read_aligned(v, 16); -#else - return bswap16(read_aligned(v, 16)); -#endif -} -#endif /* fetch16_le_aligned */ - -#ifndef fetch16_le_unaligned + assert(((uintptr_t)v) % ALIGNMENT_16 == 0); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return read_aligned(v, 16); +#else + return bswap16(read_aligned(v, 16)); +#endif +} +#endif /* fetch16_le_aligned */ + +#ifndef fetch16_le_unaligned static __maybe_unused __always_inline uint16_t fetch16_le_unaligned(const void *v) { -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE - const uint8_t *p = (const uint8_t *)v; - return p[0] | (uint16_t)p[1] << 8; -#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - return read_unaligned(v, 16); -#else - return bswap16(read_unaligned(v, 16)); -#endif -} -#endif /* fetch16_le_unaligned */ - -#ifndef fetch32_le_aligned +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE + const uint8_t *p = (const uint8_t *)v; + return p[0] | (uint16_t)p[1] << 8; +#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return read_unaligned(v, 16); +#else + return bswap16(read_unaligned(v, 16)); +#endif +} +#endif /* fetch16_le_unaligned */ + +#ifndef fetch32_le_aligned static __maybe_unused __always_inline uint32_t fetch32_le_aligned(const void *v) { - assert(((uintptr_t)v) % ALIGNMENT_32 == 0); -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - return read_aligned(v, 32); -#else - return bswap32(read_aligned(v, 32)); -#endif -} -#endif /* fetch32_le_aligned */ - -#ifndef fetch32_le_unaligned + assert(((uintptr_t)v) % ALIGNMENT_32 == 0); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return read_aligned(v, 32); +#else + return bswap32(read_aligned(v, 32)); +#endif +} +#endif /* fetch32_le_aligned */ + +#ifndef fetch32_le_unaligned static __maybe_unused __always_inline uint32_t fetch32_le_unaligned(const void *v) { -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE - return fetch16_le_unaligned(v) | - (uint32_t)fetch16_le_unaligned((const uint8_t *)v + 2) << 16; -#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - return read_unaligned(v, 32); -#else - return bswap32(read_unaligned(v, 32)); -#endif -} -#endif /* fetch32_le_unaligned */ - -#ifndef fetch64_le_aligned +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE + return fetch16_le_unaligned(v) | + (uint32_t)fetch16_le_unaligned((const uint8_t *)v + 2) << 16; +#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return read_unaligned(v, 32); +#else + return bswap32(read_unaligned(v, 32)); +#endif +} +#endif /* fetch32_le_unaligned */ + +#ifndef fetch64_le_aligned static __maybe_unused __always_inline uint64_t fetch64_le_aligned(const void *v) { - assert(((uintptr_t)v) % ALIGNMENT_64 == 0); -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - return read_aligned(v, 64); -#else - return bswap64(read_aligned(v, 64)); -#endif -} -#endif /* fetch64_le_aligned */ - -#ifndef fetch64_le_unaligned + assert(((uintptr_t)v) % ALIGNMENT_64 == 0); +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return read_aligned(v, 64); +#else + return bswap64(read_aligned(v, 64)); +#endif +} +#endif /* fetch64_le_aligned */ + +#ifndef fetch64_le_unaligned static __maybe_unused __always_inline uint64_t fetch64_le_unaligned(const void *v) { -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE - return fetch32_le_unaligned(v) | - (uint64_t)fetch32_le_unaligned((const uint8_t *)v + 4) << 32; -#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - return read_unaligned(v, 64); -#else - return bswap64(read_unaligned(v, 64)); -#endif -} -#endif /* fetch64_le_unaligned */ - +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE + return fetch32_le_unaligned(v) | + (uint64_t)fetch32_le_unaligned((const uint8_t *)v + 4) << 32; +#elif __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + return read_unaligned(v, 64); +#else + return bswap64(read_unaligned(v, 64)); +#endif +} +#endif /* fetch64_le_unaligned */ + static __maybe_unused __always_inline uint64_t tail64_le_aligned(const void *v, size_t tail) { - const uint8_t *const p = (const uint8_t *)v; -#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__) - /* We can perform a 'oneshot' read, which is little bit faster. */ - const unsigned shift = ((8 - tail) & 7) << 3; - return fetch64_le_aligned(p) & ((~UINT64_C(0)) >> shift); -#else - uint64_t r = 0; - switch (tail & 7) { - default: - unreachable(); -/* fall through */ -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - /* For most CPUs this code is better when not needed byte reordering. */ - case 0: - return fetch64_le_aligned(p); - case 7: - r = (uint64_t)p[6] << 8; - /* fall through */ - case 6: - r += p[5]; - r <<= 8; - /* fall through */ - case 5: - r += p[4]; - r <<= 32; - /* fall through */ - case 4: - return r + fetch32_le_aligned(p); - case 3: - r = (uint64_t)p[2] << 16; - /* fall through */ - case 2: - return r + fetch16_le_aligned(p); - case 1: - return p[0]; -#else - case 0: - r = p[7] << 8; - /* fall through */ - case 7: - r += p[6]; - r <<= 8; - /* fall through */ - case 6: - r += p[5]; - r <<= 8; - /* fall through */ - case 5: - r += p[4]; - r <<= 8; - /* fall through */ - case 4: - r += p[3]; - r <<= 8; - /* fall through */ - case 3: - r += p[2]; - r <<= 8; - /* fall through */ - case 2: - r += p[1]; - r <<= 8; - /* fall through */ - case 1: - return r + p[0]; -#endif - } -#endif /* T1HA_USE_FAST_ONESHOT_READ */ -} - -#if T1HA_USE_FAST_ONESHOT_READ && \ - T1HA_SYS_UNALIGNED_ACCESS != T1HA_UNALIGNED_ACCESS__UNABLE && \ - defined(PAGESIZE) && PAGESIZE > 42 && !defined(__SANITIZE_ADDRESS__) -#define can_read_underside(ptr, size) \ - (((PAGESIZE - (size)) & (uintptr_t)(ptr)) != 0) -#endif /* T1HA_USE_FAST_ONESHOT_READ */ - + const uint8_t *const p = (const uint8_t *)v; +#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__) + /* We can perform a 'oneshot' read, which is little bit faster. */ + const unsigned shift = ((8 - tail) & 7) << 3; + return fetch64_le_aligned(p) & ((~UINT64_C(0)) >> shift); +#else + uint64_t r = 0; + switch (tail & 7) { + default: + unreachable(); +/* fall through */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + /* For most CPUs this code is better when not needed byte reordering. */ + case 0: + return fetch64_le_aligned(p); + case 7: + r = (uint64_t)p[6] << 8; + /* fall through */ + case 6: + r += p[5]; + r <<= 8; + /* fall through */ + case 5: + r += p[4]; + r <<= 32; + /* fall through */ + case 4: + return r + fetch32_le_aligned(p); + case 3: + r = (uint64_t)p[2] << 16; + /* fall through */ + case 2: + return r + fetch16_le_aligned(p); + case 1: + return p[0]; +#else + case 0: + r = p[7] << 8; + /* fall through */ + case 7: + r += p[6]; + r <<= 8; + /* fall through */ + case 6: + r += p[5]; + r <<= 8; + /* fall through */ + case 5: + r += p[4]; + r <<= 8; + /* fall through */ + case 4: + r += p[3]; + r <<= 8; + /* fall through */ + case 3: + r += p[2]; + r <<= 8; + /* fall through */ + case 2: + r += p[1]; + r <<= 8; + /* fall through */ + case 1: + return r + p[0]; +#endif + } +#endif /* T1HA_USE_FAST_ONESHOT_READ */ +} + +#if T1HA_USE_FAST_ONESHOT_READ && \ + T1HA_SYS_UNALIGNED_ACCESS != T1HA_UNALIGNED_ACCESS__UNABLE && \ + defined(PAGESIZE) && PAGESIZE > 42 && !defined(__SANITIZE_ADDRESS__) +#define can_read_underside(ptr, size) \ + (((PAGESIZE - (size)) & (uintptr_t)(ptr)) != 0) +#endif /* T1HA_USE_FAST_ONESHOT_READ */ + static __maybe_unused __always_inline uint64_t tail64_le_unaligned(const void *v, size_t tail) { - const uint8_t *p = (const uint8_t *)v; -#if defined(can_read_underside) && \ - (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) - /* On some systems (e.g. x86_64) we can perform a 'oneshot' read, which - * is little bit faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com> - * for the reminder. */ - const unsigned offset = (8 - tail) & 7; - const unsigned shift = offset << 3; - if (likely(can_read_underside(p, 8))) { - p -= offset; - return fetch64_le_unaligned(p) >> shift; - } - return fetch64_le_unaligned(p) & ((~UINT64_C(0)) >> shift); -#else - uint64_t r = 0; - switch (tail & 7) { - default: - unreachable(); -/* fall through */ -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT && \ - __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - /* For most CPUs this code is better when not needed - * copying for alignment or byte reordering. */ - case 0: - return fetch64_le_unaligned(p); - case 7: - r = (uint64_t)p[6] << 8; - /* fall through */ - case 6: - r += p[5]; - r <<= 8; - /* fall through */ - case 5: - r += p[4]; - r <<= 32; - /* fall through */ - case 4: - return r + fetch32_le_unaligned(p); - case 3: - r = (uint64_t)p[2] << 16; - /* fall through */ - case 2: - return r + fetch16_le_unaligned(p); - case 1: - return p[0]; -#else - /* For most CPUs this code is better than a - * copying for alignment and/or byte reordering. */ - case 0: - r = p[7] << 8; - /* fall through */ - case 7: - r += p[6]; - r <<= 8; - /* fall through */ - case 6: - r += p[5]; - r <<= 8; - /* fall through */ - case 5: - r += p[4]; - r <<= 8; - /* fall through */ - case 4: - r += p[3]; - r <<= 8; - /* fall through */ - case 3: - r += p[2]; - r <<= 8; - /* fall through */ - case 2: - r += p[1]; - r <<= 8; - /* fall through */ - case 1: - return r + p[0]; -#endif - } -#endif /* can_read_underside */ -} - -/*------------------------------------------------------------- Big Endian */ - -#ifndef fetch16_be_aligned -static __maybe_unused __always_inline uint16_t -fetch16_be_aligned(const void *v) { - assert(((uintptr_t)v) % ALIGNMENT_16 == 0); -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - return read_aligned(v, 16); -#else - return bswap16(read_aligned(v, 16)); -#endif -} -#endif /* fetch16_be_aligned */ - -#ifndef fetch16_be_unaligned -static __maybe_unused __always_inline uint16_t -fetch16_be_unaligned(const void *v) { -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE - const uint8_t *p = (const uint8_t *)v; - return (uint16_t)p[0] << 8 | p[1]; -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - return read_unaligned(v, 16); -#else - return bswap16(read_unaligned(v, 16)); -#endif -} -#endif /* fetch16_be_unaligned */ - -#ifndef fetch32_be_aligned -static __maybe_unused __always_inline uint32_t -fetch32_be_aligned(const void *v) { - assert(((uintptr_t)v) % ALIGNMENT_32 == 0); -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - return read_aligned(v, 32); -#else - return bswap32(read_aligned(v, 32)); -#endif -} -#endif /* fetch32_be_aligned */ - -#ifndef fetch32_be_unaligned -static __maybe_unused __always_inline uint32_t -fetch32_be_unaligned(const void *v) { -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE - return (uint32_t)fetch16_be_unaligned(v) << 16 | - fetch16_be_unaligned((const uint8_t *)v + 2); -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - return read_unaligned(v, 32); -#else - return bswap32(read_unaligned(v, 32)); -#endif -} -#endif /* fetch32_be_unaligned */ - -#ifndef fetch64_be_aligned -static __maybe_unused __always_inline uint64_t -fetch64_be_aligned(const void *v) { - assert(((uintptr_t)v) % ALIGNMENT_64 == 0); -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - return read_aligned(v, 64); -#else - return bswap64(read_aligned(v, 64)); -#endif -} -#endif /* fetch64_be_aligned */ - -#ifndef fetch64_be_unaligned -static __maybe_unused __always_inline uint64_t -fetch64_be_unaligned(const void *v) { -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE - return (uint64_t)fetch32_be_unaligned(v) << 32 | - fetch32_be_unaligned((const uint8_t *)v + 4); -#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - return read_unaligned(v, 64); -#else - return bswap64(read_unaligned(v, 64)); -#endif -} -#endif /* fetch64_be_unaligned */ - -static __maybe_unused __always_inline uint64_t tail64_be_aligned(const void *v, - size_t tail) { - const uint8_t *const p = (const uint8_t *)v; -#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__) - /* We can perform a 'oneshot' read, which is little bit faster. */ - const unsigned shift = ((8 - tail) & 7) << 3; - return fetch64_be_aligned(p) >> shift; -#else - switch (tail & 7) { - default: - unreachable(); -/* fall through */ -#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - /* For most CPUs this code is better when not byte reordering. */ - case 1: - return p[0]; - case 2: - return fetch16_be_aligned(p); - case 3: - return (uint32_t)fetch16_be_aligned(p) << 8 | p[2]; - case 4: - return fetch32_be_aligned(p); - case 5: - return (uint64_t)fetch32_be_aligned(p) << 8 | p[4]; - case 6: - return (uint64_t)fetch32_be_aligned(p) << 16 | fetch16_be_aligned(p + 4); - case 7: - return (uint64_t)fetch32_be_aligned(p) << 24 | - (uint32_t)fetch16_be_aligned(p + 4) << 8 | p[6]; - case 0: - return fetch64_be_aligned(p); -#else - case 1: - return p[0]; - case 2: - return p[1] | (uint32_t)p[0] << 8; - case 3: - return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16; - case 4: - return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 | - (uint32_t)p[0] << 24; - case 5: - return p[4] | (uint32_t)p[3] << 8 | (uint32_t)p[2] << 16 | - (uint32_t)p[1] << 24 | (uint64_t)p[0] << 32; - case 6: - return p[5] | (uint32_t)p[4] << 8 | (uint32_t)p[3] << 16 | - (uint32_t)p[2] << 24 | (uint64_t)p[1] << 32 | (uint64_t)p[0] << 40; - case 7: - return p[6] | (uint32_t)p[5] << 8 | (uint32_t)p[4] << 16 | - (uint32_t)p[3] << 24 | (uint64_t)p[2] << 32 | (uint64_t)p[1] << 40 | - (uint64_t)p[0] << 48; - case 0: - return p[7] | (uint32_t)p[6] << 8 | (uint32_t)p[5] << 16 | - (uint32_t)p[4] << 24 | (uint64_t)p[3] << 32 | (uint64_t)p[2] << 40 | - (uint64_t)p[1] << 48 | (uint64_t)p[0] << 56; -#endif - } -#endif /* T1HA_USE_FAST_ONESHOT_READ */ -} - -static __maybe_unused __always_inline uint64_t -tail64_be_unaligned(const void *v, size_t tail) { - const uint8_t *p = (const uint8_t *)v; -#if defined(can_read_underside) && \ - (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) - /* On some systems (e.g. x86_64) we can perform a 'oneshot' read, which - * is little bit faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com> - * for the reminder. */ - const unsigned offset = (8 - tail) & 7; - const unsigned shift = offset << 3; - if (likely(can_read_underside(p, 8))) { - p -= offset; - return fetch64_be_unaligned(p) & ((~UINT64_C(0)) >> shift); - } - return fetch64_be_unaligned(p) >> shift; -#else - switch (tail & 7) { - default: - unreachable(); -/* fall through */ -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT && \ - __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ - /* For most CPUs this code is better when not needed - * copying for alignment or byte reordering. */ - case 1: - return p[0]; - case 2: - return fetch16_be_unaligned(p); - case 3: - return (uint32_t)fetch16_be_unaligned(p) << 8 | p[2]; - case 4: - return fetch32_be(p); - case 5: - return (uint64_t)fetch32_be_unaligned(p) << 8 | p[4]; - case 6: - return (uint64_t)fetch32_be_unaligned(p) << 16 | - fetch16_be_unaligned(p + 4); - case 7: - return (uint64_t)fetch32_be_unaligned(p) << 24 | - (uint32_t)fetch16_be_unaligned(p + 4) << 8 | p[6]; - case 0: - return fetch64_be_unaligned(p); -#else - /* For most CPUs this code is better than a - * copying for alignment and/or byte reordering. */ - case 1: - return p[0]; - case 2: - return p[1] | (uint32_t)p[0] << 8; - case 3: - return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16; - case 4: - return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 | - (uint32_t)p[0] << 24; - case 5: - return p[4] | (uint32_t)p[3] << 8 | (uint32_t)p[2] << 16 | - (uint32_t)p[1] << 24 | (uint64_t)p[0] << 32; - case 6: - return p[5] | (uint32_t)p[4] << 8 | (uint32_t)p[3] << 16 | - (uint32_t)p[2] << 24 | (uint64_t)p[1] << 32 | (uint64_t)p[0] << 40; - case 7: - return p[6] | (uint32_t)p[5] << 8 | (uint32_t)p[4] << 16 | - (uint32_t)p[3] << 24 | (uint64_t)p[2] << 32 | (uint64_t)p[1] << 40 | - (uint64_t)p[0] << 48; - case 0: - return p[7] | (uint32_t)p[6] << 8 | (uint32_t)p[5] << 16 | - (uint32_t)p[4] << 24 | (uint64_t)p[3] << 32 | (uint64_t)p[2] << 40 | - (uint64_t)p[1] << 48 | (uint64_t)p[0] << 56; -#endif - } -#endif /* can_read_underside */ -} - -/***************************************************************************/ - -#ifndef rot64 + const uint8_t *p = (const uint8_t *)v; +#if defined(can_read_underside) && \ + (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) + /* On some systems (e.g. x86_64) we can perform a 'oneshot' read, which + * is little bit faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com> + * for the reminder. */ + const unsigned offset = (8 - tail) & 7; + const unsigned shift = offset << 3; + if (likely(can_read_underside(p, 8))) { + p -= offset; + return fetch64_le_unaligned(p) >> shift; + } + return fetch64_le_unaligned(p) & ((~UINT64_C(0)) >> shift); +#else + uint64_t r = 0; + switch (tail & 7) { + default: + unreachable(); +/* fall through */ +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT && \ + __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + /* For most CPUs this code is better when not needed + * copying for alignment or byte reordering. */ + case 0: + return fetch64_le_unaligned(p); + case 7: + r = (uint64_t)p[6] << 8; + /* fall through */ + case 6: + r += p[5]; + r <<= 8; + /* fall through */ + case 5: + r += p[4]; + r <<= 32; + /* fall through */ + case 4: + return r + fetch32_le_unaligned(p); + case 3: + r = (uint64_t)p[2] << 16; + /* fall through */ + case 2: + return r + fetch16_le_unaligned(p); + case 1: + return p[0]; +#else + /* For most CPUs this code is better than a + * copying for alignment and/or byte reordering. */ + case 0: + r = p[7] << 8; + /* fall through */ + case 7: + r += p[6]; + r <<= 8; + /* fall through */ + case 6: + r += p[5]; + r <<= 8; + /* fall through */ + case 5: + r += p[4]; + r <<= 8; + /* fall through */ + case 4: + r += p[3]; + r <<= 8; + /* fall through */ + case 3: + r += p[2]; + r <<= 8; + /* fall through */ + case 2: + r += p[1]; + r <<= 8; + /* fall through */ + case 1: + return r + p[0]; +#endif + } +#endif /* can_read_underside */ +} + +/*------------------------------------------------------------- Big Endian */ + +#ifndef fetch16_be_aligned +static __maybe_unused __always_inline uint16_t +fetch16_be_aligned(const void *v) { + assert(((uintptr_t)v) % ALIGNMENT_16 == 0); +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return read_aligned(v, 16); +#else + return bswap16(read_aligned(v, 16)); +#endif +} +#endif /* fetch16_be_aligned */ + +#ifndef fetch16_be_unaligned +static __maybe_unused __always_inline uint16_t +fetch16_be_unaligned(const void *v) { +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE + const uint8_t *p = (const uint8_t *)v; + return (uint16_t)p[0] << 8 | p[1]; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return read_unaligned(v, 16); +#else + return bswap16(read_unaligned(v, 16)); +#endif +} +#endif /* fetch16_be_unaligned */ + +#ifndef fetch32_be_aligned +static __maybe_unused __always_inline uint32_t +fetch32_be_aligned(const void *v) { + assert(((uintptr_t)v) % ALIGNMENT_32 == 0); +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return read_aligned(v, 32); +#else + return bswap32(read_aligned(v, 32)); +#endif +} +#endif /* fetch32_be_aligned */ + +#ifndef fetch32_be_unaligned +static __maybe_unused __always_inline uint32_t +fetch32_be_unaligned(const void *v) { +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE + return (uint32_t)fetch16_be_unaligned(v) << 16 | + fetch16_be_unaligned((const uint8_t *)v + 2); +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return read_unaligned(v, 32); +#else + return bswap32(read_unaligned(v, 32)); +#endif +} +#endif /* fetch32_be_unaligned */ + +#ifndef fetch64_be_aligned +static __maybe_unused __always_inline uint64_t +fetch64_be_aligned(const void *v) { + assert(((uintptr_t)v) % ALIGNMENT_64 == 0); +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return read_aligned(v, 64); +#else + return bswap64(read_aligned(v, 64)); +#endif +} +#endif /* fetch64_be_aligned */ + +#ifndef fetch64_be_unaligned +static __maybe_unused __always_inline uint64_t +fetch64_be_unaligned(const void *v) { +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__UNABLE + return (uint64_t)fetch32_be_unaligned(v) << 32 | + fetch32_be_unaligned((const uint8_t *)v + 4); +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + return read_unaligned(v, 64); +#else + return bswap64(read_unaligned(v, 64)); +#endif +} +#endif /* fetch64_be_unaligned */ + +static __maybe_unused __always_inline uint64_t tail64_be_aligned(const void *v, + size_t tail) { + const uint8_t *const p = (const uint8_t *)v; +#if T1HA_USE_FAST_ONESHOT_READ && !defined(__SANITIZE_ADDRESS__) + /* We can perform a 'oneshot' read, which is little bit faster. */ + const unsigned shift = ((8 - tail) & 7) << 3; + return fetch64_be_aligned(p) >> shift; +#else + switch (tail & 7) { + default: + unreachable(); +/* fall through */ +#if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + /* For most CPUs this code is better when not byte reordering. */ + case 1: + return p[0]; + case 2: + return fetch16_be_aligned(p); + case 3: + return (uint32_t)fetch16_be_aligned(p) << 8 | p[2]; + case 4: + return fetch32_be_aligned(p); + case 5: + return (uint64_t)fetch32_be_aligned(p) << 8 | p[4]; + case 6: + return (uint64_t)fetch32_be_aligned(p) << 16 | fetch16_be_aligned(p + 4); + case 7: + return (uint64_t)fetch32_be_aligned(p) << 24 | + (uint32_t)fetch16_be_aligned(p + 4) << 8 | p[6]; + case 0: + return fetch64_be_aligned(p); +#else + case 1: + return p[0]; + case 2: + return p[1] | (uint32_t)p[0] << 8; + case 3: + return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16; + case 4: + return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 | + (uint32_t)p[0] << 24; + case 5: + return p[4] | (uint32_t)p[3] << 8 | (uint32_t)p[2] << 16 | + (uint32_t)p[1] << 24 | (uint64_t)p[0] << 32; + case 6: + return p[5] | (uint32_t)p[4] << 8 | (uint32_t)p[3] << 16 | + (uint32_t)p[2] << 24 | (uint64_t)p[1] << 32 | (uint64_t)p[0] << 40; + case 7: + return p[6] | (uint32_t)p[5] << 8 | (uint32_t)p[4] << 16 | + (uint32_t)p[3] << 24 | (uint64_t)p[2] << 32 | (uint64_t)p[1] << 40 | + (uint64_t)p[0] << 48; + case 0: + return p[7] | (uint32_t)p[6] << 8 | (uint32_t)p[5] << 16 | + (uint32_t)p[4] << 24 | (uint64_t)p[3] << 32 | (uint64_t)p[2] << 40 | + (uint64_t)p[1] << 48 | (uint64_t)p[0] << 56; +#endif + } +#endif /* T1HA_USE_FAST_ONESHOT_READ */ +} + +static __maybe_unused __always_inline uint64_t +tail64_be_unaligned(const void *v, size_t tail) { + const uint8_t *p = (const uint8_t *)v; +#if defined(can_read_underside) && \ + (UINTPTR_MAX > 0xffffFFFFul || ULONG_MAX > 0xffffFFFFul) + /* On some systems (e.g. x86_64) we can perform a 'oneshot' read, which + * is little bit faster. Thanks Marcin Żukowski <marcin.zukowski@gmail.com> + * for the reminder. */ + const unsigned offset = (8 - tail) & 7; + const unsigned shift = offset << 3; + if (likely(can_read_underside(p, 8))) { + p -= offset; + return fetch64_be_unaligned(p) & ((~UINT64_C(0)) >> shift); + } + return fetch64_be_unaligned(p) >> shift; +#else + switch (tail & 7) { + default: + unreachable(); +/* fall through */ +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT && \ + __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + /* For most CPUs this code is better when not needed + * copying for alignment or byte reordering. */ + case 1: + return p[0]; + case 2: + return fetch16_be_unaligned(p); + case 3: + return (uint32_t)fetch16_be_unaligned(p) << 8 | p[2]; + case 4: + return fetch32_be(p); + case 5: + return (uint64_t)fetch32_be_unaligned(p) << 8 | p[4]; + case 6: + return (uint64_t)fetch32_be_unaligned(p) << 16 | + fetch16_be_unaligned(p + 4); + case 7: + return (uint64_t)fetch32_be_unaligned(p) << 24 | + (uint32_t)fetch16_be_unaligned(p + 4) << 8 | p[6]; + case 0: + return fetch64_be_unaligned(p); +#else + /* For most CPUs this code is better than a + * copying for alignment and/or byte reordering. */ + case 1: + return p[0]; + case 2: + return p[1] | (uint32_t)p[0] << 8; + case 3: + return p[2] | (uint32_t)p[1] << 8 | (uint32_t)p[0] << 16; + case 4: + return p[3] | (uint32_t)p[2] << 8 | (uint32_t)p[1] << 16 | + (uint32_t)p[0] << 24; + case 5: + return p[4] | (uint32_t)p[3] << 8 | (uint32_t)p[2] << 16 | + (uint32_t)p[1] << 24 | (uint64_t)p[0] << 32; + case 6: + return p[5] | (uint32_t)p[4] << 8 | (uint32_t)p[3] << 16 | + (uint32_t)p[2] << 24 | (uint64_t)p[1] << 32 | (uint64_t)p[0] << 40; + case 7: + return p[6] | (uint32_t)p[5] << 8 | (uint32_t)p[4] << 16 | + (uint32_t)p[3] << 24 | (uint64_t)p[2] << 32 | (uint64_t)p[1] << 40 | + (uint64_t)p[0] << 48; + case 0: + return p[7] | (uint32_t)p[6] << 8 | (uint32_t)p[5] << 16 | + (uint32_t)p[4] << 24 | (uint64_t)p[3] << 32 | (uint64_t)p[2] << 40 | + (uint64_t)p[1] << 48 | (uint64_t)p[0] << 56; +#endif + } +#endif /* can_read_underside */ +} + +/***************************************************************************/ + +#ifndef rot64 static __maybe_unused __always_inline uint64_t rot64(uint64_t v, unsigned s) { - return (v >> s) | (v << (64 - s)); -} -#endif /* rot64 */ - -#ifndef mul_32x32_64 + return (v >> s) | (v << (64 - s)); +} +#endif /* rot64 */ + +#ifndef mul_32x32_64 static __maybe_unused __always_inline uint64_t mul_32x32_64(uint32_t a, uint32_t b) { - return a * (uint64_t)b; -} -#endif /* mul_32x32_64 */ - -#ifndef add64carry_first -static __maybe_unused __always_inline unsigned -add64carry_first(uint64_t base, uint64_t addend, uint64_t *sum) { -#if __has_builtin(__builtin_addcll) - unsigned long long carryout; - *sum = __builtin_addcll(base, addend, 0, &carryout); - return (unsigned)carryout; -#else - *sum = base + addend; - return *sum < addend; -#endif /* __has_builtin(__builtin_addcll) */ -} -#endif /* add64carry_fist */ - -#ifndef add64carry_next -static __maybe_unused __always_inline unsigned -add64carry_next(unsigned carry, uint64_t base, uint64_t addend, uint64_t *sum) { -#if __has_builtin(__builtin_addcll) - unsigned long long carryout; - *sum = __builtin_addcll(base, addend, carry, &carryout); - return (unsigned)carryout; -#else - *sum = base + addend + carry; - return *sum < addend || (carry && *sum == addend); -#endif /* __has_builtin(__builtin_addcll) */ -} -#endif /* add64carry_next */ - -#ifndef add64carry_last -static __maybe_unused __always_inline void -add64carry_last(unsigned carry, uint64_t base, uint64_t addend, uint64_t *sum) { -#if __has_builtin(__builtin_addcll) - unsigned long long carryout; - *sum = __builtin_addcll(base, addend, carry, &carryout); - (void)carryout; -#else - *sum = base + addend + carry; -#endif /* __has_builtin(__builtin_addcll) */ -} -#endif /* add64carry_last */ - -#ifndef mul_64x64_128 -static __maybe_unused __always_inline uint64_t mul_64x64_128(uint64_t a, - uint64_t b, - uint64_t *h) { + return a * (uint64_t)b; +} +#endif /* mul_32x32_64 */ + +#ifndef add64carry_first +static __maybe_unused __always_inline unsigned +add64carry_first(uint64_t base, uint64_t addend, uint64_t *sum) { +#if __has_builtin(__builtin_addcll) + unsigned long long carryout; + *sum = __builtin_addcll(base, addend, 0, &carryout); + return (unsigned)carryout; +#else + *sum = base + addend; + return *sum < addend; +#endif /* __has_builtin(__builtin_addcll) */ +} +#endif /* add64carry_fist */ + +#ifndef add64carry_next +static __maybe_unused __always_inline unsigned +add64carry_next(unsigned carry, uint64_t base, uint64_t addend, uint64_t *sum) { +#if __has_builtin(__builtin_addcll) + unsigned long long carryout; + *sum = __builtin_addcll(base, addend, carry, &carryout); + return (unsigned)carryout; +#else + *sum = base + addend + carry; + return *sum < addend || (carry && *sum == addend); +#endif /* __has_builtin(__builtin_addcll) */ +} +#endif /* add64carry_next */ + +#ifndef add64carry_last +static __maybe_unused __always_inline void +add64carry_last(unsigned carry, uint64_t base, uint64_t addend, uint64_t *sum) { +#if __has_builtin(__builtin_addcll) + unsigned long long carryout; + *sum = __builtin_addcll(base, addend, carry, &carryout); + (void)carryout; +#else + *sum = base + addend + carry; +#endif /* __has_builtin(__builtin_addcll) */ +} +#endif /* add64carry_last */ + +#ifndef mul_64x64_128 +static __maybe_unused __always_inline uint64_t mul_64x64_128(uint64_t a, + uint64_t b, + uint64_t *h) { #if (defined(__SIZEOF_INT128__) || \ (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128)) && \ (!defined(__LCC__) || __LCC__ != 124) - __uint128_t r = (__uint128_t)a * (__uint128_t)b; - /* modern GCC could nicely optimize this */ - *h = (uint64_t)(r >> 64); - return (uint64_t)r; -#elif defined(mul_64x64_high) - *h = mul_64x64_high(a, b); - return a * b; -#else - /* performs 64x64 to 128 bit multiplication */ - const uint64_t ll = mul_32x32_64((uint32_t)a, (uint32_t)b); - const uint64_t lh = mul_32x32_64(a >> 32, (uint32_t)b); - const uint64_t hl = mul_32x32_64((uint32_t)a, b >> 32); - const uint64_t hh = mul_32x32_64(a >> 32, b >> 32); - - /* Few simplification are possible here for 32-bit architectures, - * but thus we would lost compatibility with the original 64-bit - * version. Think is very bad idea, because then 32-bit t1ha will - * still (relatively) very slowly and well yet not compatible. */ - uint64_t l; - add64carry_last(add64carry_first(ll, lh << 32, &l), hh, lh >> 32, h); - add64carry_last(add64carry_first(l, hl << 32, &l), *h, hl >> 32, h); - return l; -#endif -} -#endif /* mul_64x64_128() */ - -#ifndef mul_64x64_high -static __maybe_unused __always_inline uint64_t mul_64x64_high(uint64_t a, - uint64_t b) { - uint64_t h; - mul_64x64_128(a, b, &h); - return h; -} -#endif /* mul_64x64_high */ - -/***************************************************************************/ - -/* 'magic' primes */ -static const uint64_t prime_0 = UINT64_C(0xEC99BF0D8372CAAB); -static const uint64_t prime_1 = UINT64_C(0x82434FE90EDCEF39); -static const uint64_t prime_2 = UINT64_C(0xD4F06DB99D67BE4B); -static const uint64_t prime_3 = UINT64_C(0xBD9CACC22C6E9571); -static const uint64_t prime_4 = UINT64_C(0x9C06FAF4D023E3AB); -static const uint64_t prime_5 = UINT64_C(0xC060724A8424F345); -static const uint64_t prime_6 = UINT64_C(0xCB5AF53AE3AAAC31); - -/* xor high and low parts of full 128-bit product */ -static __maybe_unused __always_inline uint64_t mux64(uint64_t v, - uint64_t prime) { - uint64_t l, h; - l = mul_64x64_128(v, prime, &h); - return l ^ h; -} - + __uint128_t r = (__uint128_t)a * (__uint128_t)b; + /* modern GCC could nicely optimize this */ + *h = (uint64_t)(r >> 64); + return (uint64_t)r; +#elif defined(mul_64x64_high) + *h = mul_64x64_high(a, b); + return a * b; +#else + /* performs 64x64 to 128 bit multiplication */ + const uint64_t ll = mul_32x32_64((uint32_t)a, (uint32_t)b); + const uint64_t lh = mul_32x32_64(a >> 32, (uint32_t)b); + const uint64_t hl = mul_32x32_64((uint32_t)a, b >> 32); + const uint64_t hh = mul_32x32_64(a >> 32, b >> 32); + + /* Few simplification are possible here for 32-bit architectures, + * but thus we would lost compatibility with the original 64-bit + * version. Think is very bad idea, because then 32-bit t1ha will + * still (relatively) very slowly and well yet not compatible. */ + uint64_t l; + add64carry_last(add64carry_first(ll, lh << 32, &l), hh, lh >> 32, h); + add64carry_last(add64carry_first(l, hl << 32, &l), *h, hl >> 32, h); + return l; +#endif +} +#endif /* mul_64x64_128() */ + +#ifndef mul_64x64_high +static __maybe_unused __always_inline uint64_t mul_64x64_high(uint64_t a, + uint64_t b) { + uint64_t h; + mul_64x64_128(a, b, &h); + return h; +} +#endif /* mul_64x64_high */ + +/***************************************************************************/ + +/* 'magic' primes */ +static const uint64_t prime_0 = UINT64_C(0xEC99BF0D8372CAAB); +static const uint64_t prime_1 = UINT64_C(0x82434FE90EDCEF39); +static const uint64_t prime_2 = UINT64_C(0xD4F06DB99D67BE4B); +static const uint64_t prime_3 = UINT64_C(0xBD9CACC22C6E9571); +static const uint64_t prime_4 = UINT64_C(0x9C06FAF4D023E3AB); +static const uint64_t prime_5 = UINT64_C(0xC060724A8424F345); +static const uint64_t prime_6 = UINT64_C(0xCB5AF53AE3AAAC31); + +/* xor high and low parts of full 128-bit product */ +static __maybe_unused __always_inline uint64_t mux64(uint64_t v, + uint64_t prime) { + uint64_t l, h; + l = mul_64x64_128(v, prime, &h); + return l ^ h; +} + static __maybe_unused __always_inline uint64_t final64(uint64_t a, uint64_t b) { - uint64_t x = (a + rot64(b, 41)) * prime_0; - uint64_t y = (rot64(a, 23) + b) * prime_6; - return mux64(x ^ y, prime_5); -} - + uint64_t x = (a + rot64(b, 41)) * prime_0; + uint64_t y = (rot64(a, 23) + b) * prime_6; + return mux64(x ^ y, prime_5); +} + static __maybe_unused __always_inline void mixup64(uint64_t *__restrict a, uint64_t *__restrict b, uint64_t v, uint64_t prime) { - uint64_t h; - *a ^= mul_64x64_128(*b + v, prime, &h); - *b += h; -} - -/***************************************************************************/ - -typedef union t1ha_uint128 { -#if defined(__SIZEOF_INT128__) || \ - (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) - __uint128_t v; -#endif - struct { -#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ - uint64_t l, h; -#else - uint64_t h, l; -#endif - }; -} t1ha_uint128_t; - + uint64_t h; + *a ^= mul_64x64_128(*b + v, prime, &h); + *b += h; +} + +/***************************************************************************/ + +typedef union t1ha_uint128 { +#if defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + __uint128_t v; +#endif + struct { +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + uint64_t l, h; +#else + uint64_t h, l; +#endif + }; +} t1ha_uint128_t; + static __maybe_unused __always_inline t1ha_uint128_t not128(const t1ha_uint128_t v) { - t1ha_uint128_t r; -#if defined(__SIZEOF_INT128__) || \ - (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) - r.v = ~v.v; -#else - r.l = ~v.l; - r.h = ~v.h; -#endif - return r; -} - + t1ha_uint128_t r; +#if defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + r.v = ~v.v; +#else + r.l = ~v.l; + r.h = ~v.h; +#endif + return r; +} + static __maybe_unused __always_inline t1ha_uint128_t left128(const t1ha_uint128_t v, unsigned s) { - t1ha_uint128_t r; - assert(s < 128); -#if defined(__SIZEOF_INT128__) || \ - (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) - r.v = v.v << s; -#else - r.l = (s < 64) ? v.l << s : 0; - r.h = (s < 64) ? (v.h << s) | (s ? v.l >> (64 - s) : 0) : v.l << (s - 64); -#endif - return r; -} - + t1ha_uint128_t r; + assert(s < 128); +#if defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + r.v = v.v << s; +#else + r.l = (s < 64) ? v.l << s : 0; + r.h = (s < 64) ? (v.h << s) | (s ? v.l >> (64 - s) : 0) : v.l << (s - 64); +#endif + return r; +} + static __maybe_unused __always_inline t1ha_uint128_t right128(const t1ha_uint128_t v, unsigned s) { - t1ha_uint128_t r; - assert(s < 128); -#if defined(__SIZEOF_INT128__) || \ - (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) - r.v = v.v >> s; -#else - r.l = (s < 64) ? (s ? v.h << (64 - s) : 0) | (v.l >> s) : v.h >> (s - 64); - r.h = (s < 64) ? v.h >> s : 0; -#endif - return r; -} - + t1ha_uint128_t r; + assert(s < 128); +#if defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + r.v = v.v >> s; +#else + r.l = (s < 64) ? (s ? v.h << (64 - s) : 0) | (v.l >> s) : v.h >> (s - 64); + r.h = (s < 64) ? v.h >> s : 0; +#endif + return r; +} + static __maybe_unused __always_inline t1ha_uint128_t or128(t1ha_uint128_t x, t1ha_uint128_t y) { - t1ha_uint128_t r; -#if defined(__SIZEOF_INT128__) || \ - (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) - r.v = x.v | y.v; -#else - r.l = x.l | y.l; - r.h = x.h | y.h; -#endif - return r; -} - + t1ha_uint128_t r; +#if defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + r.v = x.v | y.v; +#else + r.l = x.l | y.l; + r.h = x.h | y.h; +#endif + return r; +} + static __maybe_unused __always_inline t1ha_uint128_t xor128(t1ha_uint128_t x, t1ha_uint128_t y) { - t1ha_uint128_t r; -#if defined(__SIZEOF_INT128__) || \ - (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) - r.v = x.v ^ y.v; -#else - r.l = x.l ^ y.l; - r.h = x.h ^ y.h; -#endif - return r; -} - + t1ha_uint128_t r; +#if defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + r.v = x.v ^ y.v; +#else + r.l = x.l ^ y.l; + r.h = x.h ^ y.h; +#endif + return r; +} + static __maybe_unused __always_inline t1ha_uint128_t rot128(t1ha_uint128_t v, unsigned s) { - s &= 127; -#if defined(__SIZEOF_INT128__) || \ - (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) - v.v = (v.v << (128 - s)) | (v.v >> s); - return v; -#else - return s ? or128(left128(v, 128 - s), right128(v, s)) : v; -#endif -} - + s &= 127; +#if defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + v.v = (v.v << (128 - s)) | (v.v >> s); + return v; +#else + return s ? or128(left128(v, 128 - s), right128(v, s)) : v; +#endif +} + static __maybe_unused __always_inline t1ha_uint128_t add128(t1ha_uint128_t x, t1ha_uint128_t y) { - t1ha_uint128_t r; -#if defined(__SIZEOF_INT128__) || \ - (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) - r.v = x.v + y.v; -#else - add64carry_last(add64carry_first(x.l, y.l, &r.l), x.h, y.h, &r.h); -#endif - return r; -} - + t1ha_uint128_t r; +#if defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + r.v = x.v + y.v; +#else + add64carry_last(add64carry_first(x.l, y.l, &r.l), x.h, y.h, &r.h); +#endif + return r; +} + static __maybe_unused __always_inline t1ha_uint128_t mul128(t1ha_uint128_t x, t1ha_uint128_t y) { - t1ha_uint128_t r; -#if defined(__SIZEOF_INT128__) || \ - (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) - r.v = x.v * y.v; -#else - r.l = mul_64x64_128(x.l, y.l, &r.h); - r.h += x.l * y.h + y.l * x.h; -#endif - return r; -} - -/***************************************************************************/ - -#if T1HA0_AESNI_AVAILABLE && defined(__ia32__) -uint64_t t1ha_ia32cpu_features(void); - + t1ha_uint128_t r; +#if defined(__SIZEOF_INT128__) || \ + (defined(_INTEGRAL_MAX_BITS) && _INTEGRAL_MAX_BITS >= 128) + r.v = x.v * y.v; +#else + r.l = mul_64x64_128(x.l, y.l, &r.h); + r.h += x.l * y.h + y.l * x.h; +#endif + return r; +} + +/***************************************************************************/ + +#if T1HA0_AESNI_AVAILABLE && defined(__ia32__) +uint64_t t1ha_ia32cpu_features(void); + static __maybe_unused __always_inline bool t1ha_ia32_AESNI_avail(uint64_t ia32cpu_features) { - /* check for AES-NI */ - return (ia32cpu_features & UINT32_C(0x02000000)) != 0; -} - + /* check for AES-NI */ + return (ia32cpu_features & UINT32_C(0x02000000)) != 0; +} + static __maybe_unused __always_inline bool t1ha_ia32_AVX_avail(uint64_t ia32cpu_features) { - /* check for any AVX */ - return (ia32cpu_features & UINT32_C(0x1A000000)) == UINT32_C(0x1A000000); -} - + /* check for any AVX */ + return (ia32cpu_features & UINT32_C(0x1A000000)) == UINT32_C(0x1A000000); +} + static __maybe_unused __always_inline bool t1ha_ia32_AVX2_avail(uint64_t ia32cpu_features) { - /* check for 'Advanced Vector Extensions 2' */ - return ((ia32cpu_features >> 32) & 32) != 0; -} - -#endif /* T1HA0_AESNI_AVAILABLE && __ia32__ */ + /* check for 'Advanced Vector Extensions 2' */ + return ((ia32cpu_features >> 32) & 32) != 0; +} + +#endif /* T1HA0_AESNI_AVAILABLE && __ia32__ */ diff --git a/contrib/libs/t1ha/src/t1ha_selfcheck.c b/contrib/libs/t1ha/src/t1ha_selfcheck.c index b92eb948a3..ee9394bf3b 100644 --- a/contrib/libs/t1ha/src/t1ha_selfcheck.c +++ b/contrib/libs/t1ha/src/t1ha_selfcheck.c @@ -1,98 +1,98 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#include "t1ha_selfcheck.h" -#include "t1ha_bits.h" - -const uint8_t t1ha_test_pattern[64] = { - 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0x7F, 0x3F, - 0x1F, 0xF, 8, 16, 32, 64, 0x80, 0xFE, 0xFC, 0xF8, 0xF0, - 0xE0, 0xC0, 0xFD, 0xFB, 0xF7, 0xEF, 0xDF, 0xBF, 0x55, 0xAA, 11, - 17, 19, 23, 29, 37, 42, 43, 'a', 'b', 'c', 'd', - 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', - 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x'}; - -static __inline bool probe(uint64_t (*hash)(const void *, size_t, uint64_t), - const uint64_t reference, const void *data, - unsigned len, uint64_t seed) { - const uint64_t actual = hash(data, len, seed); - assert(actual == reference); - return actual != reference; -} - -__cold int t1ha_selfcheck(uint64_t (*hash)(const void *, size_t, uint64_t), - const uint64_t *reference_values) { - bool failed = false; - - const uint64_t zero = 0; - failed |= probe(hash, /* empty-zero */ *reference_values++, NULL, 0, zero); - failed |= probe(hash, /* empty-all1 */ *reference_values++, NULL, 0, ~zero); - failed |= probe(hash, /* bin64-zero */ *reference_values++, t1ha_test_pattern, - 64, zero); - - uint64_t seed = 1; - for (int i = 1; i < 64; i++) { - /* bin%i-1p%i */ - failed |= probe(hash, *reference_values++, t1ha_test_pattern, i, seed); - seed <<= 1; - } - - seed = ~zero; - for (int i = 1; i <= 7; i++) { - seed <<= 1; - /* align%i_F%i */; - failed |= - probe(hash, *reference_values++, t1ha_test_pattern + i, 64 - i, seed); - } - - uint8_t pattern_long[512]; - for (size_t i = 0; i < sizeof(pattern_long); ++i) - pattern_long[i] = (uint8_t)i; - for (int i = 0; i <= 7; i++) { - /* long-%05i */ - failed |= - probe(hash, *reference_values++, pattern_long + i, 128 + i * 17, seed); - } - - return failed ? -1 : 0; -} + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#include "t1ha_selfcheck.h" +#include "t1ha_bits.h" + +const uint8_t t1ha_test_pattern[64] = { + 0, 1, 2, 3, 4, 5, 6, 7, 0xFF, 0x7F, 0x3F, + 0x1F, 0xF, 8, 16, 32, 64, 0x80, 0xFE, 0xFC, 0xF8, 0xF0, + 0xE0, 0xC0, 0xFD, 0xFB, 0xF7, 0xEF, 0xDF, 0xBF, 0x55, 0xAA, 11, + 17, 19, 23, 29, 37, 42, 43, 'a', 'b', 'c', 'd', + 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', + 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x'}; + +static __inline bool probe(uint64_t (*hash)(const void *, size_t, uint64_t), + const uint64_t reference, const void *data, + unsigned len, uint64_t seed) { + const uint64_t actual = hash(data, len, seed); + assert(actual == reference); + return actual != reference; +} + +__cold int t1ha_selfcheck(uint64_t (*hash)(const void *, size_t, uint64_t), + const uint64_t *reference_values) { + bool failed = false; + + const uint64_t zero = 0; + failed |= probe(hash, /* empty-zero */ *reference_values++, NULL, 0, zero); + failed |= probe(hash, /* empty-all1 */ *reference_values++, NULL, 0, ~zero); + failed |= probe(hash, /* bin64-zero */ *reference_values++, t1ha_test_pattern, + 64, zero); + + uint64_t seed = 1; + for (int i = 1; i < 64; i++) { + /* bin%i-1p%i */ + failed |= probe(hash, *reference_values++, t1ha_test_pattern, i, seed); + seed <<= 1; + } + + seed = ~zero; + for (int i = 1; i <= 7; i++) { + seed <<= 1; + /* align%i_F%i */; + failed |= + probe(hash, *reference_values++, t1ha_test_pattern + i, 64 - i, seed); + } + + uint8_t pattern_long[512]; + for (size_t i = 0; i < sizeof(pattern_long); ++i) + pattern_long[i] = (uint8_t)i; + for (int i = 0; i <= 7; i++) { + /* long-%05i */ + failed |= + probe(hash, *reference_values++, pattern_long + i, 128 + i * 17, seed); + } + + return failed ? -1 : 0; +} diff --git a/contrib/libs/t1ha/src/t1ha_selfcheck.h b/contrib/libs/t1ha/src/t1ha_selfcheck.h index 043f5e6a2d..e83cd2417d 100644 --- a/contrib/libs/t1ha/src/t1ha_selfcheck.h +++ b/contrib/libs/t1ha/src/t1ha_selfcheck.h @@ -1,76 +1,76 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#pragma once -#if defined(_MSC_VER) && _MSC_VER > 1800 -#pragma warning(disable : 4464) /* relative include path contains '..' */ -#endif /* MSVC */ -#include "../t1ha.h" - -/***************************************************************************/ -/* Self-checking */ - -extern const uint8_t t1ha_test_pattern[64]; -int t1ha_selfcheck(uint64_t (*hash)(const void *, size_t, uint64_t), - const uint64_t *reference_values); - -#ifndef T1HA2_DISABLED -extern const uint64_t t1ha_refval_2atonce[81]; -extern const uint64_t t1ha_refval_2atonce128[81]; -extern const uint64_t t1ha_refval_2stream[81]; -extern const uint64_t t1ha_refval_2stream128[81]; -#endif /* T1HA2_DISABLED */ - -#ifndef T1HA1_DISABLED -extern const uint64_t t1ha_refval_64le[81]; -extern const uint64_t t1ha_refval_64be[81]; -#endif /* T1HA1_DISABLED */ - -#ifndef T1HA0_DISABLED -extern const uint64_t t1ha_refval_32le[81]; -extern const uint64_t t1ha_refval_32be[81]; -#if T1HA0_AESNI_AVAILABLE -extern const uint64_t t1ha_refval_ia32aes_a[81]; -extern const uint64_t t1ha_refval_ia32aes_b[81]; -#endif /* T1HA0_AESNI_AVAILABLE */ -#endif /* T1HA0_DISABLED */ + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#pragma once +#if defined(_MSC_VER) && _MSC_VER > 1800 +#pragma warning(disable : 4464) /* relative include path contains '..' */ +#endif /* MSVC */ +#include "../t1ha.h" + +/***************************************************************************/ +/* Self-checking */ + +extern const uint8_t t1ha_test_pattern[64]; +int t1ha_selfcheck(uint64_t (*hash)(const void *, size_t, uint64_t), + const uint64_t *reference_values); + +#ifndef T1HA2_DISABLED +extern const uint64_t t1ha_refval_2atonce[81]; +extern const uint64_t t1ha_refval_2atonce128[81]; +extern const uint64_t t1ha_refval_2stream[81]; +extern const uint64_t t1ha_refval_2stream128[81]; +#endif /* T1HA2_DISABLED */ + +#ifndef T1HA1_DISABLED +extern const uint64_t t1ha_refval_64le[81]; +extern const uint64_t t1ha_refval_64be[81]; +#endif /* T1HA1_DISABLED */ + +#ifndef T1HA0_DISABLED +extern const uint64_t t1ha_refval_32le[81]; +extern const uint64_t t1ha_refval_32be[81]; +#if T1HA0_AESNI_AVAILABLE +extern const uint64_t t1ha_refval_ia32aes_a[81]; +extern const uint64_t t1ha_refval_ia32aes_b[81]; +#endif /* T1HA0_AESNI_AVAILABLE */ +#endif /* T1HA0_DISABLED */ diff --git a/contrib/libs/t1ha/src/t1ha_selfcheck_all.c b/contrib/libs/t1ha/src/t1ha_selfcheck_all.c index ea0a46b068..f916cef716 100644 --- a/contrib/libs/t1ha/src/t1ha_selfcheck_all.c +++ b/contrib/libs/t1ha/src/t1ha_selfcheck_all.c @@ -1,63 +1,63 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#include "t1ha_bits.h" -#include "t1ha_selfcheck.h" - -__cold int t1ha_selfcheck__all_enabled(void) { - int rc = 0; - -#ifndef T1HA2_DISABLED - rc |= t1ha_selfcheck__t1ha2(); -#endif /* T1HA2_DISABLED */ - -#ifndef T1HA1_DISABLED - rc |= t1ha_selfcheck__t1ha1(); -#endif /* T1HA1_DISABLED */ - -#ifndef T1HA0_DISABLED - rc |= t1ha_selfcheck__t1ha0(); -#endif /* T1HA0_DISABLED */ - - return rc; -} + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#include "t1ha_bits.h" +#include "t1ha_selfcheck.h" + +__cold int t1ha_selfcheck__all_enabled(void) { + int rc = 0; + +#ifndef T1HA2_DISABLED + rc |= t1ha_selfcheck__t1ha2(); +#endif /* T1HA2_DISABLED */ + +#ifndef T1HA1_DISABLED + rc |= t1ha_selfcheck__t1ha1(); +#endif /* T1HA1_DISABLED */ + +#ifndef T1HA0_DISABLED + rc |= t1ha_selfcheck__t1ha0(); +#endif /* T1HA0_DISABLED */ + + return rc; +} |