diff options
author | Vladislav Kuznetsov <va.kuznecov@physics.msu.ru> | 2022-02-10 16:46:54 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:54 +0300 |
commit | 3cbae1ba94bff7a82ee848c3e9b2cebd96a69dd5 (patch) | |
tree | 49e222ea1c5804306084bb3ae065bb702625360f /contrib/libs/t1ha/src/t1ha2.c | |
parent | de20f5598f0832a6e646f61b4feca942c00da928 (diff) | |
download | ydb-3cbae1ba94bff7a82ee848c3e9b2cebd96a69dd5.tar.gz |
Restoring authorship annotation for Vladislav Kuznetsov <va.kuznecov@physics.msu.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/t1ha/src/t1ha2.c')
-rw-r--r-- | contrib/libs/t1ha/src/t1ha2.c | 652 |
1 files changed, 326 insertions, 326 deletions
diff --git a/contrib/libs/t1ha/src/t1ha2.c b/contrib/libs/t1ha/src/t1ha2.c index 68a1ea4eb9..009f922751 100644 --- a/contrib/libs/t1ha/src/t1ha2.c +++ b/contrib/libs/t1ha/src/t1ha2.c @@ -1,383 +1,383 @@ -/* +/* * Copyright (c) 2016-2020 Positive Technologies, https://www.ptsecurity.com, - * Fast Positive Hash. - * + * Fast Positive Hash. + * * Portions Copyright (c) 2010-2020 Leonid Yuriev <leo@yuriev.ru>, - * The 1Hippeus project (t1h). - * - * This software is provided 'as-is', without any express or implied - * warranty. In no event will the authors be held liable for any damages - * arising from the use of this software. - * - * Permission is granted to anyone to use this software for any purpose, - * including commercial applications, and to alter it and redistribute it - * freely, subject to the following restrictions: - * - * 1. The origin of this software must not be misrepresented; you must not - * claim that you wrote the original software. If you use this software - * in a product, an acknowledgement in the product documentation would be - * appreciated but is not required. - * 2. Altered source versions must be plainly marked as such, and must not be - * misrepresented as being the original software. - * 3. This notice may not be removed or altered from any source distribution. - */ - -/* - * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } - * by [Positive Technologies](https://www.ptsecurity.ru) - * - * Briefly, it is a 64-bit Hash Function: - * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, - * but portable and without penalties it can run on any 64-bit CPU. - * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash - * and all others portable hash-functions (which do not use specific - * hardware tricks). - * 3. Not suitable for cryptography. - * + * The 1Hippeus project (t1h). + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must not + * claim that you wrote the original software. If you use this software + * in a product, an acknowledgement in the product documentation would be + * appreciated but is not required. + * 2. Altered source versions must be plainly marked as such, and must not be + * misrepresented as being the original software. + * 3. This notice may not be removed or altered from any source distribution. + */ + +/* + * t1ha = { Fast Positive Hash, aka "Позитивный Хэш" } + * by [Positive Technologies](https://www.ptsecurity.ru) + * + * Briefly, it is a 64-bit Hash Function: + * 1. Created for 64-bit little-endian platforms, in predominantly for x86_64, + * but portable and without penalties it can run on any 64-bit CPU. + * 2. In most cases up to 15% faster than City64, xxHash, mum-hash, metro-hash + * and all others portable hash-functions (which do not use specific + * hardware tricks). + * 3. Not suitable for cryptography. + * * The Future will (be) Positive. Всё будет хорошо. - * - * ACKNOWLEDGEMENT: - * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) - * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! - */ - -#ifndef T1HA2_DISABLED -#include "t1ha_bits.h" -#include "t1ha_selfcheck.h" - -static __always_inline void init_ab(t1ha_state256_t *s, uint64_t x, - uint64_t y) { - s->n.a = x; - s->n.b = y; -} - -static __always_inline void init_cd(t1ha_state256_t *s, uint64_t x, - uint64_t y) { - s->n.c = rot64(y, 23) + ~x; - s->n.d = ~y + rot64(x, 19); -} - -/* TODO: C++ template in the next version */ -#define T1HA2_UPDATE(ENDIANNES, ALIGNESS, state, v) \ - do { \ - t1ha_state256_t *const s = state; \ - const uint64_t w0 = fetch64_##ENDIANNES##_##ALIGNESS(v + 0); \ - const uint64_t w1 = fetch64_##ENDIANNES##_##ALIGNESS(v + 1); \ - const uint64_t w2 = fetch64_##ENDIANNES##_##ALIGNESS(v + 2); \ - const uint64_t w3 = fetch64_##ENDIANNES##_##ALIGNESS(v + 3); \ - \ - const uint64_t d02 = w0 + rot64(w2 + s->n.d, 56); \ - const uint64_t c13 = w1 + rot64(w3 + s->n.c, 19); \ - s->n.d ^= s->n.b + rot64(w1, 38); \ - s->n.c ^= s->n.a + rot64(w0, 57); \ - s->n.b ^= prime_6 * (c13 + w2); \ - s->n.a ^= prime_5 * (d02 + w3); \ - } while (0) - -static __always_inline void squash(t1ha_state256_t *s) { - s->n.a ^= prime_6 * (s->n.c + rot64(s->n.d, 23)); - s->n.b ^= prime_5 * (rot64(s->n.c, 19) + s->n.d); -} - -/* TODO: C++ template in the next version */ -#define T1HA2_LOOP(ENDIANNES, ALIGNESS, state, data, len) \ - do { \ - const void *detent = (const uint8_t *)data + len - 31; \ - do { \ - const uint64_t *v = (const uint64_t *)data; \ - data = (const uint64_t *)data + 4; \ - prefetch(data); \ - T1HA2_UPDATE(le, ALIGNESS, state, v); \ - } while (likely(data < detent)); \ - } while (0) - -/* TODO: C++ template in the next version */ -#define T1HA2_TAIL_AB(ENDIANNES, ALIGNESS, state, data, len) \ - do { \ - t1ha_state256_t *const s = state; \ - const uint64_t *v = (const uint64_t *)data; \ - switch (len) { \ - default: \ - mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ - prime_4); \ - /* fall through */ \ - case 24: \ - case 23: \ - case 22: \ - case 21: \ - case 20: \ - case 19: \ - case 18: \ - case 17: \ - mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ - prime_3); \ - /* fall through */ \ - case 16: \ - case 15: \ - case 14: \ - case 13: \ - case 12: \ - case 11: \ - case 10: \ - case 9: \ - mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ - prime_2); \ - /* fall through */ \ - case 8: \ - case 7: \ - case 6: \ - case 5: \ - case 4: \ - case 3: \ - case 2: \ - case 1: \ - mixup64(&s->n.b, &s->n.a, tail64_##ENDIANNES##_##ALIGNESS(v, len), \ - prime_1); \ - /* fall through */ \ - case 0: \ - return final64(s->n.a, s->n.b); \ - } \ - } while (0) - -/* TODO: C++ template in the next version */ -#define T1HA2_TAIL_ABCD(ENDIANNES, ALIGNESS, state, data, len) \ - do { \ - t1ha_state256_t *const s = state; \ - const uint64_t *v = (const uint64_t *)data; \ - switch (len) { \ - default: \ - mixup64(&s->n.a, &s->n.d, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ - prime_4); \ - /* fall through */ \ - case 24: \ - case 23: \ - case 22: \ - case 21: \ - case 20: \ - case 19: \ - case 18: \ - case 17: \ - mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ - prime_3); \ - /* fall through */ \ - case 16: \ - case 15: \ - case 14: \ - case 13: \ - case 12: \ - case 11: \ - case 10: \ - case 9: \ - mixup64(&s->n.c, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ - prime_2); \ - /* fall through */ \ - case 8: \ - case 7: \ - case 6: \ - case 5: \ - case 4: \ - case 3: \ - case 2: \ - case 1: \ - mixup64(&s->n.d, &s->n.c, tail64_##ENDIANNES##_##ALIGNESS(v, len), \ - prime_1); \ - /* fall through */ \ - case 0: \ - return final128(s->n.a, s->n.b, s->n.c, s->n.d, extra_result); \ - } \ - } while (0) - -static __always_inline uint64_t final128(uint64_t a, uint64_t b, uint64_t c, - uint64_t d, uint64_t *h) { - mixup64(&a, &b, rot64(c, 41) ^ d, prime_0); - mixup64(&b, &c, rot64(d, 23) ^ a, prime_6); - mixup64(&c, &d, rot64(a, 19) ^ b, prime_5); - mixup64(&d, &a, rot64(b, 31) ^ c, prime_4); - *h = c + d; - return a ^ b; -} - -//------------------------------------------------------------------------------ - -uint64_t t1ha2_atonce(const void *data, size_t length, uint64_t seed) { - t1ha_state256_t state; - init_ab(&state, seed, length); - -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT - if (unlikely(length > 32)) { - init_cd(&state, seed, length); + * + * ACKNOWLEDGEMENT: + * The t1ha was originally developed by Leonid Yuriev (Леонид Юрьев) + * for The 1Hippeus project - zerocopy messaging in the spirit of Sparta! + */ + +#ifndef T1HA2_DISABLED +#include "t1ha_bits.h" +#include "t1ha_selfcheck.h" + +static __always_inline void init_ab(t1ha_state256_t *s, uint64_t x, + uint64_t y) { + s->n.a = x; + s->n.b = y; +} + +static __always_inline void init_cd(t1ha_state256_t *s, uint64_t x, + uint64_t y) { + s->n.c = rot64(y, 23) + ~x; + s->n.d = ~y + rot64(x, 19); +} + +/* TODO: C++ template in the next version */ +#define T1HA2_UPDATE(ENDIANNES, ALIGNESS, state, v) \ + do { \ + t1ha_state256_t *const s = state; \ + const uint64_t w0 = fetch64_##ENDIANNES##_##ALIGNESS(v + 0); \ + const uint64_t w1 = fetch64_##ENDIANNES##_##ALIGNESS(v + 1); \ + const uint64_t w2 = fetch64_##ENDIANNES##_##ALIGNESS(v + 2); \ + const uint64_t w3 = fetch64_##ENDIANNES##_##ALIGNESS(v + 3); \ + \ + const uint64_t d02 = w0 + rot64(w2 + s->n.d, 56); \ + const uint64_t c13 = w1 + rot64(w3 + s->n.c, 19); \ + s->n.d ^= s->n.b + rot64(w1, 38); \ + s->n.c ^= s->n.a + rot64(w0, 57); \ + s->n.b ^= prime_6 * (c13 + w2); \ + s->n.a ^= prime_5 * (d02 + w3); \ + } while (0) + +static __always_inline void squash(t1ha_state256_t *s) { + s->n.a ^= prime_6 * (s->n.c + rot64(s->n.d, 23)); + s->n.b ^= prime_5 * (rot64(s->n.c, 19) + s->n.d); +} + +/* TODO: C++ template in the next version */ +#define T1HA2_LOOP(ENDIANNES, ALIGNESS, state, data, len) \ + do { \ + const void *detent = (const uint8_t *)data + len - 31; \ + do { \ + const uint64_t *v = (const uint64_t *)data; \ + data = (const uint64_t *)data + 4; \ + prefetch(data); \ + T1HA2_UPDATE(le, ALIGNESS, state, v); \ + } while (likely(data < detent)); \ + } while (0) + +/* TODO: C++ template in the next version */ +#define T1HA2_TAIL_AB(ENDIANNES, ALIGNESS, state, data, len) \ + do { \ + t1ha_state256_t *const s = state; \ + const uint64_t *v = (const uint64_t *)data; \ + switch (len) { \ + default: \ + mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_4); \ + /* fall through */ \ + case 24: \ + case 23: \ + case 22: \ + case 21: \ + case 20: \ + case 19: \ + case 18: \ + case 17: \ + mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_3); \ + /* fall through */ \ + case 16: \ + case 15: \ + case 14: \ + case 13: \ + case 12: \ + case 11: \ + case 10: \ + case 9: \ + mixup64(&s->n.a, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_2); \ + /* fall through */ \ + case 8: \ + case 7: \ + case 6: \ + case 5: \ + case 4: \ + case 3: \ + case 2: \ + case 1: \ + mixup64(&s->n.b, &s->n.a, tail64_##ENDIANNES##_##ALIGNESS(v, len), \ + prime_1); \ + /* fall through */ \ + case 0: \ + return final64(s->n.a, s->n.b); \ + } \ + } while (0) + +/* TODO: C++ template in the next version */ +#define T1HA2_TAIL_ABCD(ENDIANNES, ALIGNESS, state, data, len) \ + do { \ + t1ha_state256_t *const s = state; \ + const uint64_t *v = (const uint64_t *)data; \ + switch (len) { \ + default: \ + mixup64(&s->n.a, &s->n.d, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_4); \ + /* fall through */ \ + case 24: \ + case 23: \ + case 22: \ + case 21: \ + case 20: \ + case 19: \ + case 18: \ + case 17: \ + mixup64(&s->n.b, &s->n.a, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_3); \ + /* fall through */ \ + case 16: \ + case 15: \ + case 14: \ + case 13: \ + case 12: \ + case 11: \ + case 10: \ + case 9: \ + mixup64(&s->n.c, &s->n.b, fetch64_##ENDIANNES##_##ALIGNESS(v++), \ + prime_2); \ + /* fall through */ \ + case 8: \ + case 7: \ + case 6: \ + case 5: \ + case 4: \ + case 3: \ + case 2: \ + case 1: \ + mixup64(&s->n.d, &s->n.c, tail64_##ENDIANNES##_##ALIGNESS(v, len), \ + prime_1); \ + /* fall through */ \ + case 0: \ + return final128(s->n.a, s->n.b, s->n.c, s->n.d, extra_result); \ + } \ + } while (0) + +static __always_inline uint64_t final128(uint64_t a, uint64_t b, uint64_t c, + uint64_t d, uint64_t *h) { + mixup64(&a, &b, rot64(c, 41) ^ d, prime_0); + mixup64(&b, &c, rot64(d, 23) ^ a, prime_6); + mixup64(&c, &d, rot64(a, 19) ^ b, prime_5); + mixup64(&d, &a, rot64(b, 31) ^ c, prime_4); + *h = c + d; + return a ^ b; +} + +//------------------------------------------------------------------------------ + +uint64_t t1ha2_atonce(const void *data, size_t length, uint64_t seed) { + t1ha_state256_t state; + init_ab(&state, seed, length); + +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT + if (unlikely(length > 32)) { + init_cd(&state, seed, length); #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, unaligned, &state, data, length); - squash(&state); - length &= 31; - } - T1HA2_TAIL_AB(le, unaligned, &state, data, length); -#else - const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; - if (misaligned) { - if (unlikely(length > 32)) { - init_cd(&state, seed, length); + T1HA2_LOOP(le, unaligned, &state, data, length); + squash(&state); + length &= 31; + } + T1HA2_TAIL_AB(le, unaligned, &state, data, length); +#else + const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; + if (misaligned) { + if (unlikely(length > 32)) { + init_cd(&state, seed, length); #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, unaligned, &state, data, length); - squash(&state); - length &= 31; - } - T1HA2_TAIL_AB(le, unaligned, &state, data, length); - } else { - if (unlikely(length > 32)) { - init_cd(&state, seed, length); + T1HA2_LOOP(le, unaligned, &state, data, length); + squash(&state); + length &= 31; + } + T1HA2_TAIL_AB(le, unaligned, &state, data, length); + } else { + if (unlikely(length > 32)) { + init_cd(&state, seed, length); #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, aligned, &state, data, length); - squash(&state); - length &= 31; - } - T1HA2_TAIL_AB(le, aligned, &state, data, length); - } -#endif -} - -uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result, - const void *__restrict data, size_t length, - uint64_t seed) { - t1ha_state256_t state; - init_ab(&state, seed, length); - init_cd(&state, seed, length); - -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT - if (unlikely(length > 32)) { + T1HA2_LOOP(le, aligned, &state, data, length); + squash(&state); + length &= 31; + } + T1HA2_TAIL_AB(le, aligned, &state, data, length); + } +#endif +} + +uint64_t t1ha2_atonce128(uint64_t *__restrict extra_result, + const void *__restrict data, size_t length, + uint64_t seed) { + t1ha_state256_t state; + init_ab(&state, seed, length); + init_cd(&state, seed, length); + +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT + if (unlikely(length > 32)) { #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, unaligned, &state, data, length); - length &= 31; - } - T1HA2_TAIL_ABCD(le, unaligned, &state, data, length); -#else - const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; - if (misaligned) { - if (unlikely(length > 32)) { + T1HA2_LOOP(le, unaligned, &state, data, length); + length &= 31; + } + T1HA2_TAIL_ABCD(le, unaligned, &state, data, length); +#else + const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; + if (misaligned) { + if (unlikely(length > 32)) { #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, unaligned, &state, data, length); - length &= 31; - } - T1HA2_TAIL_ABCD(le, unaligned, &state, data, length); - } else { - if (unlikely(length > 32)) { + T1HA2_LOOP(le, unaligned, &state, data, length); + length &= 31; + } + T1HA2_TAIL_ABCD(le, unaligned, &state, data, length); + } else { + if (unlikely(length > 32)) { #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, aligned, &state, data, length); - length &= 31; - } - T1HA2_TAIL_ABCD(le, aligned, &state, data, length); - } -#endif -} - -//------------------------------------------------------------------------------ - -void t1ha2_init(t1ha_context_t *ctx, uint64_t seed_x, uint64_t seed_y) { - init_ab(&ctx->state, seed_x, seed_y); - init_cd(&ctx->state, seed_x, seed_y); - ctx->partial = 0; - ctx->total = 0; -} - -void t1ha2_update(t1ha_context_t *__restrict ctx, const void *__restrict data, - size_t length) { - ctx->total += length; - - if (ctx->partial) { - const size_t left = 32 - ctx->partial; - const size_t chunk = (length >= left) ? left : length; - memcpy(ctx->buffer.bytes + ctx->partial, data, chunk); - ctx->partial += chunk; - if (ctx->partial < 32) { - assert(left >= length); - return; - } - ctx->partial = 0; - data = (const uint8_t *)data + chunk; - length -= chunk; - T1HA2_UPDATE(le, aligned, &ctx->state, ctx->buffer.u64); - } - - if (length >= 32) { -#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT + T1HA2_LOOP(le, aligned, &state, data, length); + length &= 31; + } + T1HA2_TAIL_ABCD(le, aligned, &state, data, length); + } +#endif +} + +//------------------------------------------------------------------------------ + +void t1ha2_init(t1ha_context_t *ctx, uint64_t seed_x, uint64_t seed_y) { + init_ab(&ctx->state, seed_x, seed_y); + init_cd(&ctx->state, seed_x, seed_y); + ctx->partial = 0; + ctx->total = 0; +} + +void t1ha2_update(t1ha_context_t *__restrict ctx, const void *__restrict data, + size_t length) { + ctx->total += length; + + if (ctx->partial) { + const size_t left = 32 - ctx->partial; + const size_t chunk = (length >= left) ? left : length; + memcpy(ctx->buffer.bytes + ctx->partial, data, chunk); + ctx->partial += chunk; + if (ctx->partial < 32) { + assert(left >= length); + return; + } + ctx->partial = 0; + data = (const uint8_t *)data + chunk; + length -= chunk; + T1HA2_UPDATE(le, aligned, &ctx->state, ctx->buffer.u64); + } + + if (length >= 32) { +#if T1HA_SYS_UNALIGNED_ACCESS == T1HA_UNALIGNED_ACCESS__EFFICIENT #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, unaligned, &ctx->state, data, length); -#else - const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; - if (misaligned) { + T1HA2_LOOP(le, unaligned, &ctx->state, data, length); +#else + const bool misaligned = (((uintptr_t)data) & (ALIGNMENT_64 - 1)) != 0; + if (misaligned) { #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, unaligned, &ctx->state, data, length); - } else { + T1HA2_LOOP(le, unaligned, &ctx->state, data, length); + } else { #if defined(__LCC__) && __LCC__ > 123 /* Форсирует комбинирование пар арифметических операций в двухэтажные операции * в ближайшем после объявления директивы цикле, даже если эвристики оптимизации * говорят, что это нецелесообразно */ #pragma comb_oper #endif /* E2K LCC > 1.23 */ - T1HA2_LOOP(le, aligned, &ctx->state, data, length); - } -#endif - length &= 31; - } - - if (length) - memcpy(ctx->buffer.bytes, data, ctx->partial = length); -} - -uint64_t t1ha2_final(t1ha_context_t *__restrict ctx, - uint64_t *__restrict extra_result) { - uint64_t bits = (ctx->total << 3) ^ (UINT64_C(1) << 63); -#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ - bits = bswap64(bits); -#endif - t1ha2_update(ctx, &bits, 8); - - if (likely(!extra_result)) { - squash(&ctx->state); - T1HA2_TAIL_AB(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial); - } - - T1HA2_TAIL_ABCD(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial); -} - -#endif /* T1HA2_DISABLED */ + T1HA2_LOOP(le, aligned, &ctx->state, data, length); + } +#endif + length &= 31; + } + + if (length) + memcpy(ctx->buffer.bytes, data, ctx->partial = length); +} + +uint64_t t1ha2_final(t1ha_context_t *__restrict ctx, + uint64_t *__restrict extra_result) { + uint64_t bits = (ctx->total << 3) ^ (UINT64_C(1) << 63); +#if __BYTE_ORDER__ != __ORDER_LITTLE_ENDIAN__ + bits = bswap64(bits); +#endif + t1ha2_update(ctx, &bits, 8); + + if (likely(!extra_result)) { + squash(&ctx->state); + T1HA2_TAIL_AB(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial); + } + + T1HA2_TAIL_ABCD(le, aligned, &ctx->state, ctx->buffer.u64, ctx->partial); +} + +#endif /* T1HA2_DISABLED */ |