diff options
author | shadchin <shadchin@yandex-team.ru> | 2022-02-10 16:44:39 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:39 +0300 |
commit | e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch) | |
tree | 64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/libs/lzmasdk/AesOpt.c | |
parent | 2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff) | |
download | ydb-e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0.tar.gz |
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/lzmasdk/AesOpt.c')
-rw-r--r-- | contrib/libs/lzmasdk/AesOpt.c | 356 |
1 files changed, 178 insertions, 178 deletions
diff --git a/contrib/libs/lzmasdk/AesOpt.c b/contrib/libs/lzmasdk/AesOpt.c index 6ade8f42ff..00291288ba 100644 --- a/contrib/libs/lzmasdk/AesOpt.c +++ b/contrib/libs/lzmasdk/AesOpt.c @@ -1,188 +1,188 @@ -/* AesOpt.c -- Intel's AES -2017-06-08 : Igor Pavlov : Public domain */ - -#include "Precomp.h" - -#include "CpuArch.h" - -#ifdef MY_CPU_X86_OR_AMD64 -#if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729) -#define USE_INTEL_AES -#endif -#endif - -#ifdef USE_INTEL_AES - +/* AesOpt.c -- Intel's AES +2017-06-08 : Igor Pavlov : Public domain */ + +#include "Precomp.h" + +#include "CpuArch.h" + +#ifdef MY_CPU_X86_OR_AMD64 +#if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729) +#define USE_INTEL_AES +#endif +#endif + +#ifdef USE_INTEL_AES + #if defined(__clang__) #define TARGET_AES __attribute__((__target__("aes"))) #else #define TARGET_AES #endif -#include <wmmintrin.h> - +#include <wmmintrin.h> + void TARGET_AES MY_FAST_CALL AesCbc_Encode_Intel(__m128i *p, __m128i *data, size_t numBlocks) -{ - __m128i m = *p; - for (; numBlocks != 0; numBlocks--, data++) - { - UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; - const __m128i *w = p + 3; - m = _mm_xor_si128(m, *data); - m = _mm_xor_si128(m, p[2]); - do - { - m = _mm_aesenc_si128(m, w[0]); - m = _mm_aesenc_si128(m, w[1]); - w += 2; - } - while (--numRounds2 != 0); - m = _mm_aesenc_si128(m, w[0]); - m = _mm_aesenclast_si128(m, w[1]); - *data = m; - } - *p = m; -} - -#define NUM_WAYS 3 - -#define AES_OP_W(op, n) { \ - const __m128i t = w[n]; \ - m0 = op(m0, t); \ - m1 = op(m1, t); \ - m2 = op(m2, t); \ - } - -#define AES_DEC(n) AES_OP_W(_mm_aesdec_si128, n) -#define AES_DEC_LAST(n) AES_OP_W(_mm_aesdeclast_si128, n) -#define AES_ENC(n) AES_OP_W(_mm_aesenc_si128, n) -#define AES_ENC_LAST(n) AES_OP_W(_mm_aesenclast_si128, n) - +{ + __m128i m = *p; + for (; numBlocks != 0; numBlocks--, data++) + { + UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; + const __m128i *w = p + 3; + m = _mm_xor_si128(m, *data); + m = _mm_xor_si128(m, p[2]); + do + { + m = _mm_aesenc_si128(m, w[0]); + m = _mm_aesenc_si128(m, w[1]); + w += 2; + } + while (--numRounds2 != 0); + m = _mm_aesenc_si128(m, w[0]); + m = _mm_aesenclast_si128(m, w[1]); + *data = m; + } + *p = m; +} + +#define NUM_WAYS 3 + +#define AES_OP_W(op, n) { \ + const __m128i t = w[n]; \ + m0 = op(m0, t); \ + m1 = op(m1, t); \ + m2 = op(m2, t); \ + } + +#define AES_DEC(n) AES_OP_W(_mm_aesdec_si128, n) +#define AES_DEC_LAST(n) AES_OP_W(_mm_aesdeclast_si128, n) +#define AES_ENC(n) AES_OP_W(_mm_aesenc_si128, n) +#define AES_ENC_LAST(n) AES_OP_W(_mm_aesenclast_si128, n) + void TARGET_AES MY_FAST_CALL AesCbc_Decode_Intel(__m128i *p, __m128i *data, size_t numBlocks) -{ - __m128i iv = *p; - for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS) - { - UInt32 numRounds2 = *(const UInt32 *)(p + 1); - const __m128i *w = p + numRounds2 * 2; - __m128i m0, m1, m2; - { - const __m128i t = w[2]; - m0 = _mm_xor_si128(t, data[0]); - m1 = _mm_xor_si128(t, data[1]); - m2 = _mm_xor_si128(t, data[2]); - } - numRounds2--; - do - { - AES_DEC(1) - AES_DEC(0) - w -= 2; - } - while (--numRounds2 != 0); - AES_DEC(1) - AES_DEC_LAST(0) - - { - __m128i t; - t = _mm_xor_si128(m0, iv); iv = data[0]; data[0] = t; - t = _mm_xor_si128(m1, iv); iv = data[1]; data[1] = t; - t = _mm_xor_si128(m2, iv); iv = data[2]; data[2] = t; - } - } - for (; numBlocks != 0; numBlocks--, data++) - { - UInt32 numRounds2 = *(const UInt32 *)(p + 1); - const __m128i *w = p + numRounds2 * 2; - __m128i m = _mm_xor_si128(w[2], *data); - numRounds2--; - do - { - m = _mm_aesdec_si128(m, w[1]); - m = _mm_aesdec_si128(m, w[0]); - w -= 2; - } - while (--numRounds2 != 0); - m = _mm_aesdec_si128(m, w[1]); - m = _mm_aesdeclast_si128(m, w[0]); - - m = _mm_xor_si128(m, iv); - iv = *data; - *data = m; - } - *p = iv; -} - +{ + __m128i iv = *p; + for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS) + { + UInt32 numRounds2 = *(const UInt32 *)(p + 1); + const __m128i *w = p + numRounds2 * 2; + __m128i m0, m1, m2; + { + const __m128i t = w[2]; + m0 = _mm_xor_si128(t, data[0]); + m1 = _mm_xor_si128(t, data[1]); + m2 = _mm_xor_si128(t, data[2]); + } + numRounds2--; + do + { + AES_DEC(1) + AES_DEC(0) + w -= 2; + } + while (--numRounds2 != 0); + AES_DEC(1) + AES_DEC_LAST(0) + + { + __m128i t; + t = _mm_xor_si128(m0, iv); iv = data[0]; data[0] = t; + t = _mm_xor_si128(m1, iv); iv = data[1]; data[1] = t; + t = _mm_xor_si128(m2, iv); iv = data[2]; data[2] = t; + } + } + for (; numBlocks != 0; numBlocks--, data++) + { + UInt32 numRounds2 = *(const UInt32 *)(p + 1); + const __m128i *w = p + numRounds2 * 2; + __m128i m = _mm_xor_si128(w[2], *data); + numRounds2--; + do + { + m = _mm_aesdec_si128(m, w[1]); + m = _mm_aesdec_si128(m, w[0]); + w -= 2; + } + while (--numRounds2 != 0); + m = _mm_aesdec_si128(m, w[1]); + m = _mm_aesdeclast_si128(m, w[0]); + + m = _mm_xor_si128(m, iv); + iv = *data; + *data = m; + } + *p = iv; +} + void TARGET_AES MY_FAST_CALL AesCtr_Code_Intel(__m128i *p, __m128i *data, size_t numBlocks) -{ - __m128i ctr = *p; +{ + __m128i ctr = *p; __m128i one = _mm_set_epi64x(1, 0); - for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS) - { - UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; - const __m128i *w = p; - __m128i m0, m1, m2; - { - const __m128i t = w[2]; - ctr = _mm_add_epi64(ctr, one); m0 = _mm_xor_si128(ctr, t); - ctr = _mm_add_epi64(ctr, one); m1 = _mm_xor_si128(ctr, t); - ctr = _mm_add_epi64(ctr, one); m2 = _mm_xor_si128(ctr, t); - } - w += 3; - do - { - AES_ENC(0) - AES_ENC(1) - w += 2; - } - while (--numRounds2 != 0); - AES_ENC(0) - AES_ENC_LAST(1) - data[0] = _mm_xor_si128(data[0], m0); - data[1] = _mm_xor_si128(data[1], m1); - data[2] = _mm_xor_si128(data[2], m2); - } - for (; numBlocks != 0; numBlocks--, data++) - { - UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; - const __m128i *w = p; - __m128i m; - ctr = _mm_add_epi64(ctr, one); - m = _mm_xor_si128(ctr, p[2]); - w += 3; - do - { - m = _mm_aesenc_si128(m, w[0]); - m = _mm_aesenc_si128(m, w[1]); - w += 2; - } - while (--numRounds2 != 0); - m = _mm_aesenc_si128(m, w[0]); - m = _mm_aesenclast_si128(m, w[1]); - *data = _mm_xor_si128(*data, m); - } - *p = ctr; -} - -#else - -void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks); -void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks); -void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks); - -void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *p, Byte *data, size_t numBlocks) -{ - AesCbc_Encode(p, data, numBlocks); -} - -void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *p, Byte *data, size_t numBlocks) -{ - AesCbc_Decode(p, data, numBlocks); -} - -void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *p, Byte *data, size_t numBlocks) -{ - AesCtr_Code(p, data, numBlocks); -} - -#endif + for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS) + { + UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; + const __m128i *w = p; + __m128i m0, m1, m2; + { + const __m128i t = w[2]; + ctr = _mm_add_epi64(ctr, one); m0 = _mm_xor_si128(ctr, t); + ctr = _mm_add_epi64(ctr, one); m1 = _mm_xor_si128(ctr, t); + ctr = _mm_add_epi64(ctr, one); m2 = _mm_xor_si128(ctr, t); + } + w += 3; + do + { + AES_ENC(0) + AES_ENC(1) + w += 2; + } + while (--numRounds2 != 0); + AES_ENC(0) + AES_ENC_LAST(1) + data[0] = _mm_xor_si128(data[0], m0); + data[1] = _mm_xor_si128(data[1], m1); + data[2] = _mm_xor_si128(data[2], m2); + } + for (; numBlocks != 0; numBlocks--, data++) + { + UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1; + const __m128i *w = p; + __m128i m; + ctr = _mm_add_epi64(ctr, one); + m = _mm_xor_si128(ctr, p[2]); + w += 3; + do + { + m = _mm_aesenc_si128(m, w[0]); + m = _mm_aesenc_si128(m, w[1]); + w += 2; + } + while (--numRounds2 != 0); + m = _mm_aesenc_si128(m, w[0]); + m = _mm_aesenclast_si128(m, w[1]); + *data = _mm_xor_si128(*data, m); + } + *p = ctr; +} + +#else + +void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks); +void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks); +void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks); + +void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *p, Byte *data, size_t numBlocks) +{ + AesCbc_Encode(p, data, numBlocks); +} + +void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *p, Byte *data, size_t numBlocks) +{ + AesCbc_Decode(p, data, numBlocks); +} + +void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *p, Byte *data, size_t numBlocks) +{ + AesCtr_Code(p, data, numBlocks); +} + +#endif |