aboutsummaryrefslogtreecommitdiffstats
path: root/contrib/libs/lzmasdk/AesOpt.c
diff options
context:
space:
mode:
authorshadchin <shadchin@yandex-team.ru>2022-02-10 16:44:39 +0300
committerDaniil Cherednik <dcherednik@yandex-team.ru>2022-02-10 16:44:39 +0300
commite9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (patch)
tree64175d5cadab313b3e7039ebaa06c5bc3295e274 /contrib/libs/lzmasdk/AesOpt.c
parent2598ef1d0aee359b4b6d5fdd1758916d5907d04f (diff)
downloadydb-e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0.tar.gz
Restoring authorship annotation for <shadchin@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'contrib/libs/lzmasdk/AesOpt.c')
-rw-r--r--contrib/libs/lzmasdk/AesOpt.c356
1 files changed, 178 insertions, 178 deletions
diff --git a/contrib/libs/lzmasdk/AesOpt.c b/contrib/libs/lzmasdk/AesOpt.c
index 6ade8f42ff..00291288ba 100644
--- a/contrib/libs/lzmasdk/AesOpt.c
+++ b/contrib/libs/lzmasdk/AesOpt.c
@@ -1,188 +1,188 @@
-/* AesOpt.c -- Intel's AES
-2017-06-08 : Igor Pavlov : Public domain */
-
-#include "Precomp.h"
-
-#include "CpuArch.h"
-
-#ifdef MY_CPU_X86_OR_AMD64
-#if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729)
-#define USE_INTEL_AES
-#endif
-#endif
-
-#ifdef USE_INTEL_AES
-
+/* AesOpt.c -- Intel's AES
+2017-06-08 : Igor Pavlov : Public domain */
+
+#include "Precomp.h"
+
+#include "CpuArch.h"
+
+#ifdef MY_CPU_X86_OR_AMD64
+#if (_MSC_VER > 1500) || (_MSC_FULL_VER >= 150030729)
+#define USE_INTEL_AES
+#endif
+#endif
+
+#ifdef USE_INTEL_AES
+
#if defined(__clang__)
#define TARGET_AES __attribute__((__target__("aes")))
#else
#define TARGET_AES
#endif
-#include <wmmintrin.h>
-
+#include <wmmintrin.h>
+
void TARGET_AES MY_FAST_CALL AesCbc_Encode_Intel(__m128i *p, __m128i *data, size_t numBlocks)
-{
- __m128i m = *p;
- for (; numBlocks != 0; numBlocks--, data++)
- {
- UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
- const __m128i *w = p + 3;
- m = _mm_xor_si128(m, *data);
- m = _mm_xor_si128(m, p[2]);
- do
- {
- m = _mm_aesenc_si128(m, w[0]);
- m = _mm_aesenc_si128(m, w[1]);
- w += 2;
- }
- while (--numRounds2 != 0);
- m = _mm_aesenc_si128(m, w[0]);
- m = _mm_aesenclast_si128(m, w[1]);
- *data = m;
- }
- *p = m;
-}
-
-#define NUM_WAYS 3
-
-#define AES_OP_W(op, n) { \
- const __m128i t = w[n]; \
- m0 = op(m0, t); \
- m1 = op(m1, t); \
- m2 = op(m2, t); \
- }
-
-#define AES_DEC(n) AES_OP_W(_mm_aesdec_si128, n)
-#define AES_DEC_LAST(n) AES_OP_W(_mm_aesdeclast_si128, n)
-#define AES_ENC(n) AES_OP_W(_mm_aesenc_si128, n)
-#define AES_ENC_LAST(n) AES_OP_W(_mm_aesenclast_si128, n)
-
+{
+ __m128i m = *p;
+ for (; numBlocks != 0; numBlocks--, data++)
+ {
+ UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
+ const __m128i *w = p + 3;
+ m = _mm_xor_si128(m, *data);
+ m = _mm_xor_si128(m, p[2]);
+ do
+ {
+ m = _mm_aesenc_si128(m, w[0]);
+ m = _mm_aesenc_si128(m, w[1]);
+ w += 2;
+ }
+ while (--numRounds2 != 0);
+ m = _mm_aesenc_si128(m, w[0]);
+ m = _mm_aesenclast_si128(m, w[1]);
+ *data = m;
+ }
+ *p = m;
+}
+
+#define NUM_WAYS 3
+
+#define AES_OP_W(op, n) { \
+ const __m128i t = w[n]; \
+ m0 = op(m0, t); \
+ m1 = op(m1, t); \
+ m2 = op(m2, t); \
+ }
+
+#define AES_DEC(n) AES_OP_W(_mm_aesdec_si128, n)
+#define AES_DEC_LAST(n) AES_OP_W(_mm_aesdeclast_si128, n)
+#define AES_ENC(n) AES_OP_W(_mm_aesenc_si128, n)
+#define AES_ENC_LAST(n) AES_OP_W(_mm_aesenclast_si128, n)
+
void TARGET_AES MY_FAST_CALL AesCbc_Decode_Intel(__m128i *p, __m128i *data, size_t numBlocks)
-{
- __m128i iv = *p;
- for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS)
- {
- UInt32 numRounds2 = *(const UInt32 *)(p + 1);
- const __m128i *w = p + numRounds2 * 2;
- __m128i m0, m1, m2;
- {
- const __m128i t = w[2];
- m0 = _mm_xor_si128(t, data[0]);
- m1 = _mm_xor_si128(t, data[1]);
- m2 = _mm_xor_si128(t, data[2]);
- }
- numRounds2--;
- do
- {
- AES_DEC(1)
- AES_DEC(0)
- w -= 2;
- }
- while (--numRounds2 != 0);
- AES_DEC(1)
- AES_DEC_LAST(0)
-
- {
- __m128i t;
- t = _mm_xor_si128(m0, iv); iv = data[0]; data[0] = t;
- t = _mm_xor_si128(m1, iv); iv = data[1]; data[1] = t;
- t = _mm_xor_si128(m2, iv); iv = data[2]; data[2] = t;
- }
- }
- for (; numBlocks != 0; numBlocks--, data++)
- {
- UInt32 numRounds2 = *(const UInt32 *)(p + 1);
- const __m128i *w = p + numRounds2 * 2;
- __m128i m = _mm_xor_si128(w[2], *data);
- numRounds2--;
- do
- {
- m = _mm_aesdec_si128(m, w[1]);
- m = _mm_aesdec_si128(m, w[0]);
- w -= 2;
- }
- while (--numRounds2 != 0);
- m = _mm_aesdec_si128(m, w[1]);
- m = _mm_aesdeclast_si128(m, w[0]);
-
- m = _mm_xor_si128(m, iv);
- iv = *data;
- *data = m;
- }
- *p = iv;
-}
-
+{
+ __m128i iv = *p;
+ for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS)
+ {
+ UInt32 numRounds2 = *(const UInt32 *)(p + 1);
+ const __m128i *w = p + numRounds2 * 2;
+ __m128i m0, m1, m2;
+ {
+ const __m128i t = w[2];
+ m0 = _mm_xor_si128(t, data[0]);
+ m1 = _mm_xor_si128(t, data[1]);
+ m2 = _mm_xor_si128(t, data[2]);
+ }
+ numRounds2--;
+ do
+ {
+ AES_DEC(1)
+ AES_DEC(0)
+ w -= 2;
+ }
+ while (--numRounds2 != 0);
+ AES_DEC(1)
+ AES_DEC_LAST(0)
+
+ {
+ __m128i t;
+ t = _mm_xor_si128(m0, iv); iv = data[0]; data[0] = t;
+ t = _mm_xor_si128(m1, iv); iv = data[1]; data[1] = t;
+ t = _mm_xor_si128(m2, iv); iv = data[2]; data[2] = t;
+ }
+ }
+ for (; numBlocks != 0; numBlocks--, data++)
+ {
+ UInt32 numRounds2 = *(const UInt32 *)(p + 1);
+ const __m128i *w = p + numRounds2 * 2;
+ __m128i m = _mm_xor_si128(w[2], *data);
+ numRounds2--;
+ do
+ {
+ m = _mm_aesdec_si128(m, w[1]);
+ m = _mm_aesdec_si128(m, w[0]);
+ w -= 2;
+ }
+ while (--numRounds2 != 0);
+ m = _mm_aesdec_si128(m, w[1]);
+ m = _mm_aesdeclast_si128(m, w[0]);
+
+ m = _mm_xor_si128(m, iv);
+ iv = *data;
+ *data = m;
+ }
+ *p = iv;
+}
+
void TARGET_AES MY_FAST_CALL AesCtr_Code_Intel(__m128i *p, __m128i *data, size_t numBlocks)
-{
- __m128i ctr = *p;
+{
+ __m128i ctr = *p;
__m128i one = _mm_set_epi64x(1, 0);
- for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS)
- {
- UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
- const __m128i *w = p;
- __m128i m0, m1, m2;
- {
- const __m128i t = w[2];
- ctr = _mm_add_epi64(ctr, one); m0 = _mm_xor_si128(ctr, t);
- ctr = _mm_add_epi64(ctr, one); m1 = _mm_xor_si128(ctr, t);
- ctr = _mm_add_epi64(ctr, one); m2 = _mm_xor_si128(ctr, t);
- }
- w += 3;
- do
- {
- AES_ENC(0)
- AES_ENC(1)
- w += 2;
- }
- while (--numRounds2 != 0);
- AES_ENC(0)
- AES_ENC_LAST(1)
- data[0] = _mm_xor_si128(data[0], m0);
- data[1] = _mm_xor_si128(data[1], m1);
- data[2] = _mm_xor_si128(data[2], m2);
- }
- for (; numBlocks != 0; numBlocks--, data++)
- {
- UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
- const __m128i *w = p;
- __m128i m;
- ctr = _mm_add_epi64(ctr, one);
- m = _mm_xor_si128(ctr, p[2]);
- w += 3;
- do
- {
- m = _mm_aesenc_si128(m, w[0]);
- m = _mm_aesenc_si128(m, w[1]);
- w += 2;
- }
- while (--numRounds2 != 0);
- m = _mm_aesenc_si128(m, w[0]);
- m = _mm_aesenclast_si128(m, w[1]);
- *data = _mm_xor_si128(*data, m);
- }
- *p = ctr;
-}
-
-#else
-
-void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks);
-void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks);
-void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks);
-
-void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *p, Byte *data, size_t numBlocks)
-{
- AesCbc_Encode(p, data, numBlocks);
-}
-
-void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *p, Byte *data, size_t numBlocks)
-{
- AesCbc_Decode(p, data, numBlocks);
-}
-
-void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *p, Byte *data, size_t numBlocks)
-{
- AesCtr_Code(p, data, numBlocks);
-}
-
-#endif
+ for (; numBlocks >= NUM_WAYS; numBlocks -= NUM_WAYS, data += NUM_WAYS)
+ {
+ UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
+ const __m128i *w = p;
+ __m128i m0, m1, m2;
+ {
+ const __m128i t = w[2];
+ ctr = _mm_add_epi64(ctr, one); m0 = _mm_xor_si128(ctr, t);
+ ctr = _mm_add_epi64(ctr, one); m1 = _mm_xor_si128(ctr, t);
+ ctr = _mm_add_epi64(ctr, one); m2 = _mm_xor_si128(ctr, t);
+ }
+ w += 3;
+ do
+ {
+ AES_ENC(0)
+ AES_ENC(1)
+ w += 2;
+ }
+ while (--numRounds2 != 0);
+ AES_ENC(0)
+ AES_ENC_LAST(1)
+ data[0] = _mm_xor_si128(data[0], m0);
+ data[1] = _mm_xor_si128(data[1], m1);
+ data[2] = _mm_xor_si128(data[2], m2);
+ }
+ for (; numBlocks != 0; numBlocks--, data++)
+ {
+ UInt32 numRounds2 = *(const UInt32 *)(p + 1) - 1;
+ const __m128i *w = p;
+ __m128i m;
+ ctr = _mm_add_epi64(ctr, one);
+ m = _mm_xor_si128(ctr, p[2]);
+ w += 3;
+ do
+ {
+ m = _mm_aesenc_si128(m, w[0]);
+ m = _mm_aesenc_si128(m, w[1]);
+ w += 2;
+ }
+ while (--numRounds2 != 0);
+ m = _mm_aesenc_si128(m, w[0]);
+ m = _mm_aesenclast_si128(m, w[1]);
+ *data = _mm_xor_si128(*data, m);
+ }
+ *p = ctr;
+}
+
+#else
+
+void MY_FAST_CALL AesCbc_Encode(UInt32 *ivAes, Byte *data, size_t numBlocks);
+void MY_FAST_CALL AesCbc_Decode(UInt32 *ivAes, Byte *data, size_t numBlocks);
+void MY_FAST_CALL AesCtr_Code(UInt32 *ivAes, Byte *data, size_t numBlocks);
+
+void MY_FAST_CALL AesCbc_Encode_Intel(UInt32 *p, Byte *data, size_t numBlocks)
+{
+ AesCbc_Encode(p, data, numBlocks);
+}
+
+void MY_FAST_CALL AesCbc_Decode_Intel(UInt32 *p, Byte *data, size_t numBlocks)
+{
+ AesCbc_Decode(p, data, numBlocks);
+}
+
+void MY_FAST_CALL AesCtr_Code_Intel(UInt32 *p, Byte *data, size_t numBlocks)
+{
+ AesCtr_Code(p, data, numBlocks);
+}
+
+#endif