diff options
author | thegeorg <thegeorg@yandex-team.ru> | 2022-02-10 16:45:12 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:45:12 +0300 |
commit | 49116032d905455a7b1c994e4a696afc885c1e71 (patch) | |
tree | be835aa92c6248212e705f25388ebafcf84bc7a1 /contrib/libs/hyperscan/src/rose/validate_shufti.h | |
parent | 4e839db24a3bbc9f1c610c43d6faaaa99824dcca (diff) | |
download | ydb-49116032d905455a7b1c994e4a696afc885c1e71.tar.gz |
Restoring authorship annotation for <thegeorg@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/hyperscan/src/rose/validate_shufti.h')
-rw-r--r-- | contrib/libs/hyperscan/src/rose/validate_shufti.h | 158 |
1 files changed, 79 insertions, 79 deletions
diff --git a/contrib/libs/hyperscan/src/rose/validate_shufti.h b/contrib/libs/hyperscan/src/rose/validate_shufti.h index de300df4a3..351df36a76 100644 --- a/contrib/libs/hyperscan/src/rose/validate_shufti.h +++ b/contrib/libs/hyperscan/src/rose/validate_shufti.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020, Intel Corporation + * Copyright (c) 2016-2020, Intel Corporation * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are met: @@ -175,85 +175,85 @@ int validateShuftiMask32x16(const m256 data, return !cmp_result; } -#ifdef HAVE_AVX512 +#ifdef HAVE_AVX512 +static really_inline +int validateShuftiMask64x8(const m512 data, const m512 hi_mask, + const m512 lo_mask, const m512 and_mask, + const u64a neg_mask, const u64a valid_data_mask) { + m512 low4bits = set64x8(0xf); + m512 c_lo = pshufb_m512(lo_mask, and512(data, low4bits)); + m512 c_hi = pshufb_m512(hi_mask, + rshift64_m512(andnot512(low4bits, data), 4)); + m512 t = and512(c_lo, c_hi); + u64a nresult = eq512mask(and512(t, and_mask), zeroes512()); +#ifdef DEBUG + DEBUG_PRINTF("data\n"); + dumpMask(&data, 64); + DEBUG_PRINTF("hi_mask\n"); + dumpMask(&hi_mask, 64); + DEBUG_PRINTF("lo_mask\n"); + dumpMask(&lo_mask, 64); + DEBUG_PRINTF("c_lo\n"); + dumpMask(&c_lo, 64); + DEBUG_PRINTF("c_hi\n"); + dumpMask(&c_hi, 64); + DEBUG_PRINTF("nresult %llx\n", nresult); + DEBUG_PRINTF("valid_data_mask %llx\n", valid_data_mask); +#endif + u64a cmp_result = (nresult ^ neg_mask) & valid_data_mask; + return !cmp_result; +} + +static really_inline +int validateShuftiMask64x16(const m512 data, + const m512 hi_mask_1, const m512 hi_mask_2, + const m512 lo_mask_1, const m512 lo_mask_2, + const m512 and_mask_hi, const m512 and_mask_lo, + const u64a neg_mask, const u64a valid_data_mask) { + m512 low4bits = set64x8(0xf); + m512 data_lo = and512(data, low4bits); + m512 data_hi = and512(rshift64_m512(data, 4), low4bits); + m512 c_lo_1 = pshufb_m512(lo_mask_1, data_lo); + m512 c_lo_2 = pshufb_m512(lo_mask_2, data_lo); + m512 c_hi_1 = pshufb_m512(hi_mask_1, data_hi); + m512 c_hi_2 = pshufb_m512(hi_mask_2, data_hi); + m512 t1 = and512(c_lo_1, c_hi_1); + m512 t2 = and512(c_lo_2, c_hi_2); + m512 result = or512(and512(t1, and_mask_lo), and512(t2, and_mask_hi)); + u64a nresult = eq512mask(result, zeroes512()); +#ifdef DEBUG + DEBUG_PRINTF("data\n"); + dumpMask(&data, 64); + DEBUG_PRINTF("data_lo\n"); + dumpMask(&data_lo, 64); + DEBUG_PRINTF("data_hi\n"); + dumpMask(&data_hi, 64); + DEBUG_PRINTF("hi_mask_1\n"); + dumpMask(&hi_mask_1, 64); + DEBUG_PRINTF("hi_mask_2\n"); + dumpMask(&hi_mask_2, 64); + DEBUG_PRINTF("lo_mask_1\n"); + dumpMask(&lo_mask_1, 64); + DEBUG_PRINTF("lo_mask_2\n"); + dumpMask(&lo_mask_2, 64); + DEBUG_PRINTF("c_lo_1\n"); + dumpMask(&c_lo_1, 64); + DEBUG_PRINTF("c_lo_2\n"); + dumpMask(&c_lo_2, 64); + DEBUG_PRINTF("c_hi_1\n"); + dumpMask(&c_hi_1, 64); + DEBUG_PRINTF("c_hi_2\n"); + dumpMask(&c_hi_2, 64); + DEBUG_PRINTF("result\n"); + dumpMask(&result, 64); + DEBUG_PRINTF("valid_data_mask %llx\n", valid_data_mask); +#endif + u64a cmp_result = (nresult ^ neg_mask) & valid_data_mask; + return !cmp_result; +} +#endif + static really_inline -int validateShuftiMask64x8(const m512 data, const m512 hi_mask, - const m512 lo_mask, const m512 and_mask, - const u64a neg_mask, const u64a valid_data_mask) { - m512 low4bits = set64x8(0xf); - m512 c_lo = pshufb_m512(lo_mask, and512(data, low4bits)); - m512 c_hi = pshufb_m512(hi_mask, - rshift64_m512(andnot512(low4bits, data), 4)); - m512 t = and512(c_lo, c_hi); - u64a nresult = eq512mask(and512(t, and_mask), zeroes512()); -#ifdef DEBUG - DEBUG_PRINTF("data\n"); - dumpMask(&data, 64); - DEBUG_PRINTF("hi_mask\n"); - dumpMask(&hi_mask, 64); - DEBUG_PRINTF("lo_mask\n"); - dumpMask(&lo_mask, 64); - DEBUG_PRINTF("c_lo\n"); - dumpMask(&c_lo, 64); - DEBUG_PRINTF("c_hi\n"); - dumpMask(&c_hi, 64); - DEBUG_PRINTF("nresult %llx\n", nresult); - DEBUG_PRINTF("valid_data_mask %llx\n", valid_data_mask); -#endif - u64a cmp_result = (nresult ^ neg_mask) & valid_data_mask; - return !cmp_result; -} - -static really_inline -int validateShuftiMask64x16(const m512 data, - const m512 hi_mask_1, const m512 hi_mask_2, - const m512 lo_mask_1, const m512 lo_mask_2, - const m512 and_mask_hi, const m512 and_mask_lo, - const u64a neg_mask, const u64a valid_data_mask) { - m512 low4bits = set64x8(0xf); - m512 data_lo = and512(data, low4bits); - m512 data_hi = and512(rshift64_m512(data, 4), low4bits); - m512 c_lo_1 = pshufb_m512(lo_mask_1, data_lo); - m512 c_lo_2 = pshufb_m512(lo_mask_2, data_lo); - m512 c_hi_1 = pshufb_m512(hi_mask_1, data_hi); - m512 c_hi_2 = pshufb_m512(hi_mask_2, data_hi); - m512 t1 = and512(c_lo_1, c_hi_1); - m512 t2 = and512(c_lo_2, c_hi_2); - m512 result = or512(and512(t1, and_mask_lo), and512(t2, and_mask_hi)); - u64a nresult = eq512mask(result, zeroes512()); -#ifdef DEBUG - DEBUG_PRINTF("data\n"); - dumpMask(&data, 64); - DEBUG_PRINTF("data_lo\n"); - dumpMask(&data_lo, 64); - DEBUG_PRINTF("data_hi\n"); - dumpMask(&data_hi, 64); - DEBUG_PRINTF("hi_mask_1\n"); - dumpMask(&hi_mask_1, 64); - DEBUG_PRINTF("hi_mask_2\n"); - dumpMask(&hi_mask_2, 64); - DEBUG_PRINTF("lo_mask_1\n"); - dumpMask(&lo_mask_1, 64); - DEBUG_PRINTF("lo_mask_2\n"); - dumpMask(&lo_mask_2, 64); - DEBUG_PRINTF("c_lo_1\n"); - dumpMask(&c_lo_1, 64); - DEBUG_PRINTF("c_lo_2\n"); - dumpMask(&c_lo_2, 64); - DEBUG_PRINTF("c_hi_1\n"); - dumpMask(&c_hi_1, 64); - DEBUG_PRINTF("c_hi_2\n"); - dumpMask(&c_hi_2, 64); - DEBUG_PRINTF("result\n"); - dumpMask(&result, 64); - DEBUG_PRINTF("valid_data_mask %llx\n", valid_data_mask); -#endif - u64a cmp_result = (nresult ^ neg_mask) & valid_data_mask; - return !cmp_result; -} -#endif - -static really_inline int checkMultipath32(u32 data, u32 hi_bits, u32 lo_bits) { u32 t = ~(data | hi_bits); t += lo_bits; |