1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
|
/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
* SPDX-License-Identifier: Apache-2.0"
*
* Written by Nir Drucker, Shay Gueron and Dusan Kostic,
* AWS Cryptographic Algorithms Group.
*/
#if defined(S2N_BIKE_R3_AVX512)
#include <assert.h>
#include "cleanup.h"
#include "gf2x_internal.h"
#define AVX512_INTERNAL
#include "x86_64_intrinsic.h"
void karatzuba_add1_avx512(OUT uint64_t *alah,
OUT uint64_t *blbh,
IN const uint64_t *a,
IN const uint64_t *b,
IN const size_t qwords_len)
{
assert(qwords_len % REG_QWORDS == 0);
REG_T va0, va1, vb0, vb1;
for(size_t i = 0; i < qwords_len; i += REG_QWORDS) {
va0 = LOAD(&a[i]);
va1 = LOAD(&a[i + qwords_len]);
vb0 = LOAD(&b[i]);
vb1 = LOAD(&b[i + qwords_len]);
STORE(&alah[i], va0 ^ va1);
STORE(&blbh[i], vb0 ^ vb1);
}
}
void karatzuba_add2_avx512(OUT uint64_t *z,
IN const uint64_t *x,
IN const uint64_t *y,
IN const size_t qwords_len)
{
assert(qwords_len % REG_QWORDS == 0);
REG_T vx, vy;
for(size_t i = 0; i < qwords_len; i += REG_QWORDS) {
vx = LOAD(&x[i]);
vy = LOAD(&y[i]);
STORE(&z[i], vx ^ vy);
}
}
void karatzuba_add3_avx512(OUT uint64_t *c,
IN const uint64_t *mid,
IN const size_t qwords_len)
{
assert(qwords_len % REG_QWORDS == 0);
REG_T vr0, vr1, vr2, vr3, vt;
uint64_t *c0 = c;
uint64_t *c1 = &c[qwords_len];
uint64_t *c2 = &c[2 * qwords_len];
uint64_t *c3 = &c[3 * qwords_len];
for(size_t i = 0; i < qwords_len; i += REG_QWORDS) {
vr0 = LOAD(&c0[i]);
vr1 = LOAD(&c1[i]);
vr2 = LOAD(&c2[i]);
vr3 = LOAD(&c3[i]);
vt = LOAD(&mid[i]);
STORE(&c1[i], vt ^ vr0 ^ vr1);
STORE(&c2[i], vt ^ vr2 ^ vr3);
}
}
// c = a mod (x^r - 1)
void gf2x_red_avx512(OUT pad_r_t *c, IN const dbl_pad_r_t *a)
{
const uint64_t *a64 = (const uint64_t *)a;
uint64_t * c64 = (uint64_t *)c;
for(size_t i = 0; i < R_QWORDS; i += REG_QWORDS) {
REG_T vt0 = LOAD(&a64[i]);
REG_T vt1 = LOAD(&a64[i + R_QWORDS]);
REG_T vt2 = LOAD(&a64[i + R_QWORDS - 1]);
vt1 = SLLI_I64(vt1, LAST_R_QWORD_TRAIL);
vt2 = SRLI_I64(vt2, LAST_R_QWORD_LEAD);
vt0 ^= (vt1 | vt2);
STORE(&c64[i], vt0);
}
c64[R_QWORDS - 1] &= LAST_R_QWORD_MASK;
// Clean the secrets from the upper part of c
secure_clean((uint8_t *)&c64[R_QWORDS],
(R_PADDED_QWORDS - R_QWORDS) * sizeof(uint64_t));
}
#endif
typedef int dummy_typedef_to_avoid_empty_translation_unit_warning;
|