contrib/restricted/aws/s2n/pq-crypto/bike_r2/gf2x_portable.c


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108

/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. 
 * SPDX-License-Identifier: Apache-2.0" 
 * 
 * Written by Nir Drucker and Shay Gueron, 
 * AWS Cryptographic Algorithms Group. 
 * (ndrucker@amazon.com, gueron@amazon.com) 
 */ 
 
#include "gf2x.h" 
#include "utilities.h" 
 
#if !defined(USE_OPENSSL_GF2M) 
 
// The algorithm is based on the windowing method, for example as in: 
// Brent, R. P., Gaudry, P., Thomé, E., & Zimmermann, P. (2008, May), "Faster 
// multiplication in GF (2)[x]". In: International Algorithmic Number Theory 
// Symposium (pp. 153-166). Springer, Berlin, Heidelberg. In this implementation, 
// the last three bits are multiplied using a schoolbook multiplicaiton. 
void 
gf2x_mul_1x1(uint64_t *c, uint64_t a, uint64_t b) 
{ 
  uint64_t       h = 0, l = 0, u[8]; 
  const uint64_t w = 64; 
  const uint64_t s = 3; 
  // Multiplying 64 bits by 7 can results in an overflow of 3 bits. 
  // Therefore, these bits are masked out, and are treated in step 3. 
  const uint64_t b0 = b & 0x1fffffffffffffff; 
 
  // Step 1: Calculate a multiplication table with 8 entries. 
  u[0] = 0; 
  u[1] = b0; 
  u[2] = u[1] << 1; 
  u[3] = u[2] ^ b0; 
  u[4] = u[2] << 1; 
  u[5] = u[4] ^ b0; 
  u[6] = u[3] << 1; 
  u[7] = u[6] ^ b0; 
 
  // Step 2: Multiply two elements in parallel in poisitions i,i+s 
  l = u[a & 7] ^ (u[(a >> 3) & 7] << 3); 
  h = (u[(a >> 3) & 7] >> 61); 
  for(uint32_t i = (2 * s); i < w; i += (2 * s)) 
  { 
    uint64_t g1 = u[(a >> i) & 7]; 
    uint64_t g2 = u[(a >> (i + s)) & 7]; 
 
    l ^= (g1 << i) ^ (g2 << (i + s)); 
    h ^= (g1 >> (w - i)) ^ (g2 >> (w - (i + s))); 
  } 
 
  // Step 3: Multiply the last three bits. 
  for(uint8_t i = 61; i < 64; i++) 
  { 
    uint64_t mask = (-((b >> i) & 1)); 
    l ^= ((a << i) & mask); 
    h ^= ((a >> (w - i)) & mask); 
  } 
 
  c[0] = l; 
  c[1] = h; 
} 
 
void 
karatzuba_add1(OUT const uint64_t *res, 
               IN const uint64_t *a, 
               IN const uint64_t *b, 
               IN const uint64_t  n_half, 
               IN uint64_t *alah) 
{ 
  for(uint32_t j = 0; j < n_half; j++) 
  { 
    alah[j + 0 * n_half] = a[j] ^ a[n_half + j]; 
    alah[j + 1 * n_half] = b[j] ^ b[n_half + j]; 
    alah[j + 2 * n_half] = res[n_half + j] ^ res[2 * n_half + j]; 
  } 
} 
 
void 
karatzuba_add2(OUT uint64_t *res1, 
               OUT uint64_t *res2, 
               IN const uint64_t *res, 
               IN const uint64_t *tmp, 
               IN const uint64_t  n_half) 
{ 
  for(uint32_t j = 0; j < n_half; j++) 
  { 
    res1[j] ^= res[j] ^ tmp[j]; 
    res2[j] ^= res2[n_half + j] ^ tmp[j]; 
  } 
} 
 
void 
red(uint64_t *a) 
{ 
  for(uint32_t i = 0; i < R_QW; i++) 
  { 
    const uint64_t temp0 = a[R_QW + i - 1]; 
    const uint64_t temp1 = a[R_QW + i]; 
    a[i] ^= (temp0 >> LAST_R_QW_LEAD) | (temp1 << LAST_R_QW_TRAIL); 
  } 
 
  a[R_QW - 1] &= LAST_R_QW_MASK; 
 
  // Clean the secrets from the upper half of a. 
  secure_clean((uint8_t *)&a[R_QW], sizeof(uint64_t) * R_QW); 
} 
 
#endif