diff options
author | orivej <orivej@yandex-team.ru> | 2022-02-10 16:44:49 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:44:49 +0300 |
commit | 718c552901d703c502ccbefdfc3c9028d608b947 (patch) | |
tree | 46534a98bbefcd7b1f3faa5b52c138ab27db75b7 /contrib/restricted/aws/s2n/pq-crypto | |
parent | e9656aae26e0358d5378e5b63dcac5c8dbe0e4d0 (diff) | |
download | ydb-718c552901d703c502ccbefdfc3c9028d608b947.tar.gz |
Restoring authorship annotation for <orivej@yandex-team.ru>. Commit 1 of 2.
Diffstat (limited to 'contrib/restricted/aws/s2n/pq-crypto')
126 files changed, 17149 insertions, 17149 deletions
diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/aes_ctr_prf.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/aes_ctr_prf.c index 26c99bc80d..90b2f10824 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/aes_ctr_prf.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/aes_ctr_prf.c @@ -1,105 +1,105 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#include "aes_ctr_prf.h" -#include "utilities.h" -#include <string.h> - -ret_t -init_aes_ctr_prf_state(OUT aes_ctr_prf_state_t *s, - IN const uint32_t max_invokations, - IN const seed_t *seed) -{ - if(0 == max_invokations) - { - BIKE_ERROR(E_AES_CTR_PRF_INIT_FAIL); - } - - // Set the key schedule (from seed). - // Make sure the size matches the AES256 key size - DEFER_CLEANUP(aes256_key_t key, aes256_key_cleanup); - - bike_static_assert(sizeof(*seed) == sizeof(key.raw), seed_size_equals_ky_size); - memcpy(key.raw, seed->raw, sizeof(key.raw)); - - GUARD(aes256_key_expansion(&s->ks_ptr, &key)); - - // Initialize buffer and counter - s->ctr.u.qw[0] = 0; - s->ctr.u.qw[1] = 0; - s->buffer.u.qw[0] = 0; - s->buffer.u.qw[1] = 0; - - s->pos = AES256_BLOCK_SIZE; - s->rem_invokations = max_invokations; - - SEDMSG(" Init aes_prf_ctr state:\n"); - SEDMSG(" s.pos = %d\n", s->pos); - SEDMSG(" s.rem_invokations = %u\n", s->rem_invokations); - SEDMSG(" s.ctr = 0x\n"); - - return SUCCESS; -} - -_INLINE_ ret_t -perform_aes(OUT uint8_t *ct, IN OUT aes_ctr_prf_state_t *s) -{ - // Ensure that the CTR is big enough - bike_static_assert( - ((sizeof(s->ctr.u.qw[0]) == 8) && (BIT(33) >= MAX_AES_INVOKATION)), - ctr_size_is_too_small); - - if(0 == s->rem_invokations) - { - BIKE_ERROR(E_AES_OVER_USED); - } - - GUARD(aes256_enc(ct, s->ctr.u.bytes, &s->ks_ptr)); - - s->ctr.u.qw[0]++; - s->rem_invokations--; - - return SUCCESS; -} - -ret_t -aes_ctr_prf(OUT uint8_t *a, IN OUT aes_ctr_prf_state_t *s, IN const uint32_t len) -{ - // When Len is smaller than whats left in the buffer - // No need in additional AES - if((len + s->pos) <= AES256_BLOCK_SIZE) - { - memcpy(a, &s->buffer.u.bytes[s->pos], len); - s->pos += len; - - return SUCCESS; - } - - // If s.pos != AES256_BLOCK_SIZE then copy whats left in the buffer - // Else copy zero bytes - uint32_t idx = AES256_BLOCK_SIZE - s->pos; - memcpy(a, &s->buffer.u.bytes[s->pos], idx); - - // Init s.pos - s->pos = 0; - - // Copy full AES blocks - while((len - idx) >= AES256_BLOCK_SIZE) - { - GUARD(perform_aes(&a[idx], s)); - idx += AES256_BLOCK_SIZE; - } - - GUARD(perform_aes(s->buffer.u.bytes, s)); - - // Copy the tail - s->pos = len - idx; - memcpy(&a[idx], s->buffer.u.bytes, s->pos); - - return SUCCESS; -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#include "aes_ctr_prf.h" +#include "utilities.h" +#include <string.h> + +ret_t +init_aes_ctr_prf_state(OUT aes_ctr_prf_state_t *s, + IN const uint32_t max_invokations, + IN const seed_t *seed) +{ + if(0 == max_invokations) + { + BIKE_ERROR(E_AES_CTR_PRF_INIT_FAIL); + } + + // Set the key schedule (from seed). + // Make sure the size matches the AES256 key size + DEFER_CLEANUP(aes256_key_t key, aes256_key_cleanup); + + bike_static_assert(sizeof(*seed) == sizeof(key.raw), seed_size_equals_ky_size); + memcpy(key.raw, seed->raw, sizeof(key.raw)); + + GUARD(aes256_key_expansion(&s->ks_ptr, &key)); + + // Initialize buffer and counter + s->ctr.u.qw[0] = 0; + s->ctr.u.qw[1] = 0; + s->buffer.u.qw[0] = 0; + s->buffer.u.qw[1] = 0; + + s->pos = AES256_BLOCK_SIZE; + s->rem_invokations = max_invokations; + + SEDMSG(" Init aes_prf_ctr state:\n"); + SEDMSG(" s.pos = %d\n", s->pos); + SEDMSG(" s.rem_invokations = %u\n", s->rem_invokations); + SEDMSG(" s.ctr = 0x\n"); + + return SUCCESS; +} + +_INLINE_ ret_t +perform_aes(OUT uint8_t *ct, IN OUT aes_ctr_prf_state_t *s) +{ + // Ensure that the CTR is big enough + bike_static_assert( + ((sizeof(s->ctr.u.qw[0]) == 8) && (BIT(33) >= MAX_AES_INVOKATION)), + ctr_size_is_too_small); + + if(0 == s->rem_invokations) + { + BIKE_ERROR(E_AES_OVER_USED); + } + + GUARD(aes256_enc(ct, s->ctr.u.bytes, &s->ks_ptr)); + + s->ctr.u.qw[0]++; + s->rem_invokations--; + + return SUCCESS; +} + +ret_t +aes_ctr_prf(OUT uint8_t *a, IN OUT aes_ctr_prf_state_t *s, IN const uint32_t len) +{ + // When Len is smaller than whats left in the buffer + // No need in additional AES + if((len + s->pos) <= AES256_BLOCK_SIZE) + { + memcpy(a, &s->buffer.u.bytes[s->pos], len); + s->pos += len; + + return SUCCESS; + } + + // If s.pos != AES256_BLOCK_SIZE then copy whats left in the buffer + // Else copy zero bytes + uint32_t idx = AES256_BLOCK_SIZE - s->pos; + memcpy(a, &s->buffer.u.bytes[s->pos], idx); + + // Init s.pos + s->pos = 0; + + // Copy full AES blocks + while((len - idx) >= AES256_BLOCK_SIZE) + { + GUARD(perform_aes(&a[idx], s)); + idx += AES256_BLOCK_SIZE; + } + + GUARD(perform_aes(s->buffer.u.bytes, s)); + + // Copy the tail + s->pos = len - idx; + memcpy(&a[idx], s->buffer.u.bytes, s->pos); + + return SUCCESS; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/aes_ctr_prf.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/aes_ctr_prf.h index ac17d4ddd5..bfcdeebd4a 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/aes_ctr_prf.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/aes_ctr_prf.h @@ -1,49 +1,49 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -#include "aes_wrap.h" - -////////////////////////////// -// Types -///////////////////////////// - -typedef struct aes_ctr_prf_state_s -{ - uint128_t ctr; - uint128_t buffer; - aes256_ks_t ks_ptr; - uint32_t rem_invokations; - uint8_t pos; -} aes_ctr_prf_state_t; - -////////////////////////////// -// Methods -///////////////////////////// - -ret_t -init_aes_ctr_prf_state(OUT aes_ctr_prf_state_t *s, - IN uint32_t max_invokations, - IN const seed_t *seed); - -ret_t -aes_ctr_prf(OUT uint8_t *a, IN OUT aes_ctr_prf_state_t *s, IN uint32_t len); - -_INLINE_ void -finalize_aes_ctr_prf(IN OUT aes_ctr_prf_state_t *s) -{ - aes256_free_ks(&s->ks_ptr); - secure_clean((uint8_t *)s, sizeof(*s)); -} - -_INLINE_ void -aes_ctr_prf_state_cleanup(IN OUT aes_ctr_prf_state_t *s) -{ - finalize_aes_ctr_prf(s); -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +#include "aes_wrap.h" + +////////////////////////////// +// Types +///////////////////////////// + +typedef struct aes_ctr_prf_state_s +{ + uint128_t ctr; + uint128_t buffer; + aes256_ks_t ks_ptr; + uint32_t rem_invokations; + uint8_t pos; +} aes_ctr_prf_state_t; + +////////////////////////////// +// Methods +///////////////////////////// + +ret_t +init_aes_ctr_prf_state(OUT aes_ctr_prf_state_t *s, + IN uint32_t max_invokations, + IN const seed_t *seed); + +ret_t +aes_ctr_prf(OUT uint8_t *a, IN OUT aes_ctr_prf_state_t *s, IN uint32_t len); + +_INLINE_ void +finalize_aes_ctr_prf(IN OUT aes_ctr_prf_state_t *s) +{ + aes256_free_ks(&s->ks_ptr); + secure_clean((uint8_t *)s, sizeof(*s)); +} + +_INLINE_ void +aes_ctr_prf_state_cleanup(IN OUT aes_ctr_prf_state_t *s) +{ + finalize_aes_ctr_prf(s); +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/aes_wrap.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/aes_wrap.h index 26f439ac5e..4d34a1c229 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/aes_wrap.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/aes_wrap.h @@ -1,70 +1,70 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker, Shay Gueron, and Dusan Kostic, - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com, dkostic@amazon.com) - */ - -#pragma once - -#include "cleanup.h" -#include <openssl/evp.h> - -#define MAX_AES_INVOKATION (MASK(32)) - -#define AES256_KEY_SIZE (32U) -#define AES256_KEY_BITS (AES256_KEY_SIZE * 8) -#define AES256_BLOCK_SIZE (16U) -#define AES256_ROUNDS (14U) - -typedef ALIGN(16) struct aes256_key_s -{ - uint8_t raw[AES256_KEY_SIZE]; -} aes256_key_t; - -_INLINE_ void -aes256_key_cleanup(aes256_key_t *o) -{ - secure_clean(o->raw, sizeof(*o)); -} - -// Using OpenSSL structures -typedef EVP_CIPHER_CTX *aes256_ks_t; - -_INLINE_ ret_t -aes256_key_expansion(OUT aes256_ks_t *ks, IN const aes256_key_t *key) -{ - *ks = EVP_CIPHER_CTX_new(); - if(*ks == NULL) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - if(0 == EVP_EncryptInit_ex(*ks, EVP_aes_256_ecb(), NULL, key->raw, NULL)) - { - EVP_CIPHER_CTX_free(*ks); - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - - EVP_CIPHER_CTX_set_padding(*ks, 0); - - return SUCCESS; -} - -_INLINE_ ret_t -aes256_enc(OUT uint8_t *ct, IN const uint8_t *pt, IN const aes256_ks_t *ks) -{ - int outlen = 0; - if(0 == EVP_EncryptUpdate(*ks, ct, &outlen, pt, AES256_BLOCK_SIZE)) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - return SUCCESS; -} - -_INLINE_ void -aes256_free_ks(OUT aes256_ks_t *ks) -{ - EVP_CIPHER_CTX_free(*ks); - *ks = NULL; -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker, Shay Gueron, and Dusan Kostic, + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com, dkostic@amazon.com) + */ + +#pragma once + +#include "cleanup.h" +#include <openssl/evp.h> + +#define MAX_AES_INVOKATION (MASK(32)) + +#define AES256_KEY_SIZE (32U) +#define AES256_KEY_BITS (AES256_KEY_SIZE * 8) +#define AES256_BLOCK_SIZE (16U) +#define AES256_ROUNDS (14U) + +typedef ALIGN(16) struct aes256_key_s +{ + uint8_t raw[AES256_KEY_SIZE]; +} aes256_key_t; + +_INLINE_ void +aes256_key_cleanup(aes256_key_t *o) +{ + secure_clean(o->raw, sizeof(*o)); +} + +// Using OpenSSL structures +typedef EVP_CIPHER_CTX *aes256_ks_t; + +_INLINE_ ret_t +aes256_key_expansion(OUT aes256_ks_t *ks, IN const aes256_key_t *key) +{ + *ks = EVP_CIPHER_CTX_new(); + if(*ks == NULL) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + if(0 == EVP_EncryptInit_ex(*ks, EVP_aes_256_ecb(), NULL, key->raw, NULL)) + { + EVP_CIPHER_CTX_free(*ks); + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + + EVP_CIPHER_CTX_set_padding(*ks, 0); + + return SUCCESS; +} + +_INLINE_ ret_t +aes256_enc(OUT uint8_t *ct, IN const uint8_t *pt, IN const aes256_ks_t *ks) +{ + int outlen = 0; + if(0 == EVP_EncryptUpdate(*ks, ct, &outlen, pt, AES256_BLOCK_SIZE)) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + return SUCCESS; +} + +_INLINE_ void +aes256_free_ks(OUT aes256_ks_t *ks) +{ + EVP_CIPHER_CTX_free(*ks); + *ks = NULL; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/bike_defs.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/bike_defs.h index c64740635e..7e9183f7e0 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/bike_defs.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/bike_defs.h @@ -1,109 +1,109 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -#include "defs.h" - -#define INDCPA -#define LEVEL 1 - -//////////////////////////////////////////// -// BIKE Parameters -/////////////////////////////////////////// -#define N0 2 - -#ifndef LEVEL -# define LEVEL 1 -#endif - -#if(LEVEL == 3) -# ifdef INDCPA -# define R_BITS 19853 -# else -# define R_BITS 24821 -# endif -# define DV 103 -# define T1 199 - -# define THRESHOLD_COEFF0 15.932 -# define THRESHOLD_COEFF1 0.0052936 - -// The gfm code is optimized to a block size in this case: -# define BLOCK_SIZE 32768 -#elif(LEVEL == 1) -// 64-bits of post-quantum security parameters (BIKE paper): -# ifdef INDCPA -# define R_BITS 10163 -# else -# define R_BITS 11779 -# endif -# define DV 71 -# define T1 134 - -# define THRESHOLD_COEFF0 13.530 -# define THRESHOLD_COEFF1 0.0069721 - -// The gfm code is optimized to a block size in this case: -# define BLOCK_SIZE (16384) -#else -# error "Bad level, choose one of 1/3" -#endif - -#ifdef INDCPA -# define NUM_OF_SEEDS 2 -#else -# define NUM_OF_SEEDS 3 -#endif - -// Round the size to the nearest byte. -// SIZE suffix, is the number of bytes (uint8_t). -#define N_BITS (R_BITS * N0) -#define R_SIZE DIVIDE_AND_CEIL(R_BITS, 8) -#define R_QW DIVIDE_AND_CEIL(R_BITS, 8 * QW_SIZE) -#define R_YMM DIVIDE_AND_CEIL(R_BITS, 8 * YMM_SIZE) -#define R_ZMM DIVIDE_AND_CEIL(R_BITS, 8 * ZMM_SIZE) - -#define N_SIZE DIVIDE_AND_CEIL(N_BITS, 8) - -#define R_BLOCKS DIVIDE_AND_CEIL(R_BITS, BLOCK_SIZE) -#define R_PADDED (R_BLOCKS * BLOCK_SIZE) -#define R_PADDED_SIZE (R_PADDED / 8) -#define R_PADDED_QW (R_PADDED / 64) - -#define N_BLOCKS DIVIDE_AND_CEIL(N_BITS, BLOCK_SIZE) -#define N_PADDED (N_BLOCKS * BLOCK_SIZE) -#define N_PADDED_SIZE (N_PADDED / 8) -#define N_PADDED_QW (N_PADDED / 64) - -#define R_DDQWORDS_BITS (DIVIDE_AND_CEIL(R_BITS, ALL_YMM_SIZE) * ALL_YMM_SIZE) -bike_static_assert((R_BITS % ALL_YMM_SIZE != 0), rbits_512_err); - -#define N_DDQWORDS_BITS (R_DDQWORDS_BITS + R_BITS) -bike_static_assert((N_BITS % ALL_YMM_SIZE != 0), nbits_512_err); - -#define LAST_R_QW_LEAD (R_BITS & MASK(6)) -#define LAST_R_QW_TRAIL (64 - LAST_R_QW_LEAD) -#define LAST_R_QW_MASK MASK(LAST_R_QW_LEAD) - -#define LAST_R_BYTE_LEAD (R_BITS & MASK(3)) -#define LAST_R_BYTE_TRAIL (8 - LAST_R_BYTE_LEAD) -#define LAST_R_BYTE_MASK MASK(LAST_R_BYTE_LEAD) - -// BIKE auxiliary functions parameters: -#define ELL_K_BITS 256 -#define ELL_K_SIZE (ELL_K_BITS / 8) - -//////////////////////////////// -// Parameters for the BG decoder. -//////////////////////////////// -#define BGF_DECODER -#define DELTA 3 -#define SLICES (LOG2_MSB(DV) + 1) - -#define BGF_DECODER +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +#include "defs.h" + +#define INDCPA +#define LEVEL 1 + +//////////////////////////////////////////// +// BIKE Parameters +/////////////////////////////////////////// +#define N0 2 + +#ifndef LEVEL +# define LEVEL 1 +#endif + +#if(LEVEL == 3) +# ifdef INDCPA +# define R_BITS 19853 +# else +# define R_BITS 24821 +# endif +# define DV 103 +# define T1 199 + +# define THRESHOLD_COEFF0 15.932 +# define THRESHOLD_COEFF1 0.0052936 + +// The gfm code is optimized to a block size in this case: +# define BLOCK_SIZE 32768 +#elif(LEVEL == 1) +// 64-bits of post-quantum security parameters (BIKE paper): +# ifdef INDCPA +# define R_BITS 10163 +# else +# define R_BITS 11779 +# endif +# define DV 71 +# define T1 134 + +# define THRESHOLD_COEFF0 13.530 +# define THRESHOLD_COEFF1 0.0069721 + +// The gfm code is optimized to a block size in this case: +# define BLOCK_SIZE (16384) +#else +# error "Bad level, choose one of 1/3" +#endif + +#ifdef INDCPA +# define NUM_OF_SEEDS 2 +#else +# define NUM_OF_SEEDS 3 +#endif + +// Round the size to the nearest byte. +// SIZE suffix, is the number of bytes (uint8_t). +#define N_BITS (R_BITS * N0) +#define R_SIZE DIVIDE_AND_CEIL(R_BITS, 8) +#define R_QW DIVIDE_AND_CEIL(R_BITS, 8 * QW_SIZE) +#define R_YMM DIVIDE_AND_CEIL(R_BITS, 8 * YMM_SIZE) +#define R_ZMM DIVIDE_AND_CEIL(R_BITS, 8 * ZMM_SIZE) + +#define N_SIZE DIVIDE_AND_CEIL(N_BITS, 8) + +#define R_BLOCKS DIVIDE_AND_CEIL(R_BITS, BLOCK_SIZE) +#define R_PADDED (R_BLOCKS * BLOCK_SIZE) +#define R_PADDED_SIZE (R_PADDED / 8) +#define R_PADDED_QW (R_PADDED / 64) + +#define N_BLOCKS DIVIDE_AND_CEIL(N_BITS, BLOCK_SIZE) +#define N_PADDED (N_BLOCKS * BLOCK_SIZE) +#define N_PADDED_SIZE (N_PADDED / 8) +#define N_PADDED_QW (N_PADDED / 64) + +#define R_DDQWORDS_BITS (DIVIDE_AND_CEIL(R_BITS, ALL_YMM_SIZE) * ALL_YMM_SIZE) +bike_static_assert((R_BITS % ALL_YMM_SIZE != 0), rbits_512_err); + +#define N_DDQWORDS_BITS (R_DDQWORDS_BITS + R_BITS) +bike_static_assert((N_BITS % ALL_YMM_SIZE != 0), nbits_512_err); + +#define LAST_R_QW_LEAD (R_BITS & MASK(6)) +#define LAST_R_QW_TRAIL (64 - LAST_R_QW_LEAD) +#define LAST_R_QW_MASK MASK(LAST_R_QW_LEAD) + +#define LAST_R_BYTE_LEAD (R_BITS & MASK(3)) +#define LAST_R_BYTE_TRAIL (8 - LAST_R_BYTE_LEAD) +#define LAST_R_BYTE_MASK MASK(LAST_R_BYTE_LEAD) + +// BIKE auxiliary functions parameters: +#define ELL_K_BITS 256 +#define ELL_K_SIZE (ELL_K_BITS / 8) + +//////////////////////////////// +// Parameters for the BG decoder. +//////////////////////////////// +#define BGF_DECODER +#define DELTA 3 +#define SLICES (LOG2_MSB(DV) + 1) + +#define BGF_DECODER diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/bike_r1_kem.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/bike_r1_kem.c index cb566d7435..5267f4ab63 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/bike_r1_kem.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/bike_r1_kem.c @@ -1,299 +1,299 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#include <string.h> - -#include "decode.h" -#include "gf2x.h" -#include "parallel_hash.h" -#include "sampling.h" -#include "tls/s2n_kem.h" - -_INLINE_ void -split_e(OUT split_e_t *splitted_e, IN const e_t *e) -{ - // Copy lower bytes (e0) - memcpy(splitted_e->val[0].raw, e->raw, R_SIZE); - - // Now load second value - for(uint32_t i = R_SIZE; i < N_SIZE; ++i) - { - splitted_e->val[1].raw[i - R_SIZE] = - ((e->raw[i] << LAST_R_BYTE_TRAIL) | (e->raw[i - 1] >> LAST_R_BYTE_LEAD)); - } - - // Fix corner case - if(N_SIZE < (2ULL * R_SIZE)) - { - splitted_e->val[1].raw[R_SIZE - 1] = (e->raw[N_SIZE - 1] >> LAST_R_BYTE_LEAD); - } - - // Fix last value - splitted_e->val[0].raw[R_SIZE - 1] &= LAST_R_BYTE_MASK; - splitted_e->val[1].raw[R_SIZE - 1] &= LAST_R_BYTE_MASK; -} - -_INLINE_ void -merge_e(OUT e_t *e, IN const split_e_t *splitted_e) -{ - memcpy(e->raw, splitted_e->val[0].raw, R_SIZE); - - e->raw[R_SIZE - 1] = ((splitted_e->val[1].raw[0] << LAST_R_BYTE_LEAD) | - (e->raw[R_SIZE - 1] & LAST_R_BYTE_MASK)); - - // Now load second value - for(uint32_t i = 1; i < R_SIZE; ++i) - { - e->raw[R_SIZE + i - 1] = - ((splitted_e->val[1].raw[i] << LAST_R_BYTE_LEAD) | - (splitted_e->val[1].raw[i - 1] >> LAST_R_BYTE_TRAIL)); - } - - // Mask last byte - if(N_SIZE == (2ULL * R_SIZE)) - { - e->raw[N_SIZE - 1] = - (splitted_e->val[1].raw[R_SIZE - 1] >> LAST_R_BYTE_TRAIL); - } -} - -_INLINE_ ret_t -encrypt(OUT ct_t *ct, - IN const pk_t *pk, - IN const seed_t *seed, - IN const split_e_t *splitted_e) -{ - DEFER_CLEANUP(padded_r_t m = {0}, padded_r_cleanup); - DEFER_CLEANUP(dbl_pad_ct_t p_ct, dbl_pad_ct_cleanup); - - // Pad the public key - pad_pk_t p_pk = {0}; - p_pk[0].val = pk->val[0]; - p_pk[1].val = pk->val[1]; - - DMSG(" Sampling m.\n"); - GUARD(sample_uniform_r_bits(&m.val, seed, NO_RESTRICTION)); - - DMSG(" Calculating the ciphertext.\n"); - - GUARD(gf2x_mod_mul((uint64_t *)&p_ct[0], (uint64_t *)&m, (uint64_t *)&p_pk[0])); - GUARD(gf2x_mod_mul((uint64_t *)&p_ct[1], (uint64_t *)&m, (uint64_t *)&p_pk[1])); - - DMSG(" Addding Error to the ciphertext.\n"); - - GUARD( - gf2x_add(p_ct[0].val.raw, p_ct[0].val.raw, splitted_e->val[0].raw, R_SIZE)); - GUARD( - gf2x_add(p_ct[1].val.raw, p_ct[1].val.raw, splitted_e->val[1].raw, R_SIZE)); - - // Copy the data outside - ct->val[0] = p_ct[0].val; - ct->val[1] = p_ct[1].val; - - print("m: ", (uint64_t *)m.val.raw, R_BITS); - print("c0: ", (uint64_t *)p_ct[0].val.raw, R_BITS); - print("c1: ", (uint64_t *)p_ct[1].val.raw, R_BITS); - - return SUCCESS; -} - -_INLINE_ ret_t -calc_pk(OUT pk_t *pk, IN const seed_t *g_seed, IN const pad_sk_t p_sk) -{ - // PK is dbl padded because modmul require some scratch space for the - // multiplication result - dbl_pad_pk_t p_pk = {0}; - - // Intialized padding to zero - DEFER_CLEANUP(padded_r_t g = {0}, padded_r_cleanup); - - GUARD(sample_uniform_r_bits(&g.val, g_seed, MUST_BE_ODD)); - - // Calculate (g0, g1) = (g*h1, g*h0) - GUARD(gf2x_mod_mul((uint64_t *)&p_pk[0], (const uint64_t *)&g, - (const uint64_t *)&p_sk[1])); - GUARD(gf2x_mod_mul((uint64_t *)&p_pk[1], (const uint64_t *)&g, - (const uint64_t *)&p_sk[0])); - - // Copy the data to the output parameters. - pk->val[0] = p_pk[0].val; - pk->val[1] = p_pk[1].val; - - print("g: ", (uint64_t *)g.val.raw, R_BITS); - print("g0: ", (uint64_t *)&p_pk[0], R_BITS); - print("g1: ", (uint64_t *)&p_pk[1], R_BITS); - - return SUCCESS; -} - -// Generate the Shared Secret (K(e)) -_INLINE_ void -get_ss(OUT ss_t *out, IN const e_t *e) -{ - DMSG(" Enter get_ss.\n"); - - // Calculate the hash - DEFER_CLEANUP(sha_hash_t hash = {0}, sha_hash_cleanup); - parallel_hash(&hash, e->raw, sizeof(*e)); - - // Truncate the final hash into K by copying only the LSBs - memcpy(out->raw, hash.u.raw, sizeof(*out)); - - secure_clean(hash.u.raw, sizeof(hash)); - DMSG(" Exit get_ss.\n"); -} - -//////////////////////////////////////////////////////////////// -// The three APIs below (keygeneration, encapsulate, decapsulate) are defined by -// NIST: In addition there are two KAT versions of this API as defined. -//////////////////////////////////////////////////////////////// -int -BIKE1_L1_R1_crypto_kem_keypair(OUT unsigned char *pk, OUT unsigned char *sk) -{ - // Convert to this implementation types - pk_t *l_pk = (pk_t *)pk; - - DEFER_CLEANUP(ALIGN(8) sk_t l_sk = {0}, sk_cleanup); - - // For DRBG and AES_PRF - DEFER_CLEANUP(seeds_t seeds = {0}, seeds_cleanup); - DEFER_CLEANUP(aes_ctr_prf_state_t h_prf_state = {0}, aes_ctr_prf_state_cleanup); - - // Padded for internal use only (the padded data is not released). - DEFER_CLEANUP(pad_sk_t p_sk = {0}, pad_sk_cleanup); - - // Get the entropy seeds. - get_seeds(&seeds); - - DMSG(" Enter crypto_kem_keypair.\n"); - DMSG(" Calculating the secret key.\n"); - - // h0 and h1 use the same context - GUARD(init_aes_ctr_prf_state(&h_prf_state, MAX_AES_INVOKATION, &seeds.seed[0])); - - GUARD(generate_sparse_rep((uint64_t *)&p_sk[0], l_sk.wlist[0].val, DV, R_BITS, - sizeof(p_sk[0]), &h_prf_state)); - // Copy data - l_sk.bin[0] = p_sk[0].val; - - GUARD(generate_sparse_rep((uint64_t *)&p_sk[1], l_sk.wlist[1].val, DV, R_BITS, - sizeof(p_sk[1]), &h_prf_state)); - - // Copy data - l_sk.bin[1] = p_sk[1].val; - - DMSG(" Calculating the public key.\n"); - - GUARD(calc_pk(l_pk, &seeds.seed[1], p_sk)); - - memcpy(sk, &l_sk, sizeof(l_sk)); - - print("h0: ", (uint64_t *)&l_sk.bin[0], R_BITS); - print("h1: ", (uint64_t *)&l_sk.bin[1], R_BITS); - print("h0c:", (uint64_t *)&l_sk.wlist[0], SIZEOF_BITS(compressed_idx_dv_t)); - print("h1c:", (uint64_t *)&l_sk.wlist[1], SIZEOF_BITS(compressed_idx_dv_t)); - DMSG(" Exit crypto_kem_keypair.\n"); - - return SUCCESS; -} - -// Encapsulate - pk is the public key, -// ct is a key encapsulation message (ciphertext), -// ss is the shared secret. -int -BIKE1_L1_R1_crypto_kem_enc(OUT unsigned char * ct, - OUT unsigned char * ss, - IN const unsigned char *pk) -{ - DMSG(" Enter crypto_kem_enc.\n"); - - // Convert to this implementation types - const pk_t *l_pk = (const pk_t *)pk; - ct_t * l_ct = (ct_t *)ct; - ss_t * l_ss = (ss_t *)ss; - DEFER_CLEANUP(padded_e_t e = {0}, padded_e_cleanup); - - // For NIST DRBG_CTR - DEFER_CLEANUP(seeds_t seeds = {0}, seeds_cleanup); - DEFER_CLEANUP(aes_ctr_prf_state_t e_prf_state = {0}, aes_ctr_prf_state_cleanup); - - // Get the entrophy seeds - get_seeds(&seeds); - - // Random data generator - // Using first seed - GUARD(init_aes_ctr_prf_state(&e_prf_state, MAX_AES_INVOKATION, &seeds.seed[0])); - - DMSG(" Generating error.\n"); - ALIGN(8) compressed_idx_t_t dummy; - GUARD(generate_sparse_rep((uint64_t *)&e, dummy.val, T1, N_BITS, sizeof(e), - &e_prf_state)); - - print("e: ", (uint64_t *)&e.val, sizeof(e) * 8); - - // Split e into e0 and e1. Initialization is done in split_e - DEFER_CLEANUP(split_e_t splitted_e, split_e_cleanup); - split_e(&splitted_e, &e.val); - - print("e0: ", (uint64_t *)splitted_e.val[0].raw, R_BITS); - print("e1: ", (uint64_t *)splitted_e.val[1].raw, R_BITS); - - // Computing ct = enc(pk, e) - // Using second seed - DMSG(" Encrypting.\n"); - GUARD(encrypt(l_ct, l_pk, &seeds.seed[1], &splitted_e)); - - DMSG(" Generating shared secret.\n"); - get_ss(l_ss, &e.val); - - print("ss: ", (uint64_t *)l_ss->raw, SIZEOF_BITS(*l_ss)); - DMSG(" Exit crypto_kem_enc.\n"); - return SUCCESS; -} - -// Decapsulate - ct is a key encapsulation message (ciphertext), -// sk is the private key, -// ss is the shared secret -int -BIKE1_L1_R1_crypto_kem_dec(OUT unsigned char * ss, - IN const unsigned char *ct, - IN const unsigned char *sk) -{ - DMSG(" Enter crypto_kem_dec.\n"); - - // Convert to this implementation types - const ct_t *l_ct = (const ct_t *)ct; - ss_t * l_ss = (ss_t *)ss; - - DEFER_CLEANUP(ALIGN(8) sk_t l_sk, sk_cleanup); - memcpy(&l_sk, sk, sizeof(l_sk)); - - // Force zero initialization - DEFER_CLEANUP(syndrome_t syndrome = {0}, syndrome_cleanup); - DEFER_CLEANUP(split_e_t e, split_e_cleanup); - DEFER_CLEANUP(e_t merged_e = {0}, e_cleanup); - - DMSG(" Computing s.\n"); - GUARD(compute_syndrome(&syndrome, l_ct, &l_sk)); - - DMSG(" Decoding.\n"); - GUARD(decode(&e, &syndrome, l_ct, &l_sk)); - - // Check if the error weight equals T1 - if(T1 != r_bits_vector_weight(&e.val[0]) + r_bits_vector_weight(&e.val[1])) - { - MSG(" Error weight is not t\n"); - BIKE_ERROR(E_ERROR_WEIGHT_IS_NOT_T); - } - - merge_e(&merged_e, &e); - get_ss(l_ss, &merged_e); - - DMSG(" Exit crypto_kem_dec.\n"); - return SUCCESS; -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#include <string.h> + +#include "decode.h" +#include "gf2x.h" +#include "parallel_hash.h" +#include "sampling.h" +#include "tls/s2n_kem.h" + +_INLINE_ void +split_e(OUT split_e_t *splitted_e, IN const e_t *e) +{ + // Copy lower bytes (e0) + memcpy(splitted_e->val[0].raw, e->raw, R_SIZE); + + // Now load second value + for(uint32_t i = R_SIZE; i < N_SIZE; ++i) + { + splitted_e->val[1].raw[i - R_SIZE] = + ((e->raw[i] << LAST_R_BYTE_TRAIL) | (e->raw[i - 1] >> LAST_R_BYTE_LEAD)); + } + + // Fix corner case + if(N_SIZE < (2ULL * R_SIZE)) + { + splitted_e->val[1].raw[R_SIZE - 1] = (e->raw[N_SIZE - 1] >> LAST_R_BYTE_LEAD); + } + + // Fix last value + splitted_e->val[0].raw[R_SIZE - 1] &= LAST_R_BYTE_MASK; + splitted_e->val[1].raw[R_SIZE - 1] &= LAST_R_BYTE_MASK; +} + +_INLINE_ void +merge_e(OUT e_t *e, IN const split_e_t *splitted_e) +{ + memcpy(e->raw, splitted_e->val[0].raw, R_SIZE); + + e->raw[R_SIZE - 1] = ((splitted_e->val[1].raw[0] << LAST_R_BYTE_LEAD) | + (e->raw[R_SIZE - 1] & LAST_R_BYTE_MASK)); + + // Now load second value + for(uint32_t i = 1; i < R_SIZE; ++i) + { + e->raw[R_SIZE + i - 1] = + ((splitted_e->val[1].raw[i] << LAST_R_BYTE_LEAD) | + (splitted_e->val[1].raw[i - 1] >> LAST_R_BYTE_TRAIL)); + } + + // Mask last byte + if(N_SIZE == (2ULL * R_SIZE)) + { + e->raw[N_SIZE - 1] = + (splitted_e->val[1].raw[R_SIZE - 1] >> LAST_R_BYTE_TRAIL); + } +} + +_INLINE_ ret_t +encrypt(OUT ct_t *ct, + IN const pk_t *pk, + IN const seed_t *seed, + IN const split_e_t *splitted_e) +{ + DEFER_CLEANUP(padded_r_t m = {0}, padded_r_cleanup); + DEFER_CLEANUP(dbl_pad_ct_t p_ct, dbl_pad_ct_cleanup); + + // Pad the public key + pad_pk_t p_pk = {0}; + p_pk[0].val = pk->val[0]; + p_pk[1].val = pk->val[1]; + + DMSG(" Sampling m.\n"); + GUARD(sample_uniform_r_bits(&m.val, seed, NO_RESTRICTION)); + + DMSG(" Calculating the ciphertext.\n"); + + GUARD(gf2x_mod_mul((uint64_t *)&p_ct[0], (uint64_t *)&m, (uint64_t *)&p_pk[0])); + GUARD(gf2x_mod_mul((uint64_t *)&p_ct[1], (uint64_t *)&m, (uint64_t *)&p_pk[1])); + + DMSG(" Addding Error to the ciphertext.\n"); + + GUARD( + gf2x_add(p_ct[0].val.raw, p_ct[0].val.raw, splitted_e->val[0].raw, R_SIZE)); + GUARD( + gf2x_add(p_ct[1].val.raw, p_ct[1].val.raw, splitted_e->val[1].raw, R_SIZE)); + + // Copy the data outside + ct->val[0] = p_ct[0].val; + ct->val[1] = p_ct[1].val; + + print("m: ", (uint64_t *)m.val.raw, R_BITS); + print("c0: ", (uint64_t *)p_ct[0].val.raw, R_BITS); + print("c1: ", (uint64_t *)p_ct[1].val.raw, R_BITS); + + return SUCCESS; +} + +_INLINE_ ret_t +calc_pk(OUT pk_t *pk, IN const seed_t *g_seed, IN const pad_sk_t p_sk) +{ + // PK is dbl padded because modmul require some scratch space for the + // multiplication result + dbl_pad_pk_t p_pk = {0}; + + // Intialized padding to zero + DEFER_CLEANUP(padded_r_t g = {0}, padded_r_cleanup); + + GUARD(sample_uniform_r_bits(&g.val, g_seed, MUST_BE_ODD)); + + // Calculate (g0, g1) = (g*h1, g*h0) + GUARD(gf2x_mod_mul((uint64_t *)&p_pk[0], (const uint64_t *)&g, + (const uint64_t *)&p_sk[1])); + GUARD(gf2x_mod_mul((uint64_t *)&p_pk[1], (const uint64_t *)&g, + (const uint64_t *)&p_sk[0])); + + // Copy the data to the output parameters. + pk->val[0] = p_pk[0].val; + pk->val[1] = p_pk[1].val; + + print("g: ", (uint64_t *)g.val.raw, R_BITS); + print("g0: ", (uint64_t *)&p_pk[0], R_BITS); + print("g1: ", (uint64_t *)&p_pk[1], R_BITS); + + return SUCCESS; +} + +// Generate the Shared Secret (K(e)) +_INLINE_ void +get_ss(OUT ss_t *out, IN const e_t *e) +{ + DMSG(" Enter get_ss.\n"); + + // Calculate the hash + DEFER_CLEANUP(sha_hash_t hash = {0}, sha_hash_cleanup); + parallel_hash(&hash, e->raw, sizeof(*e)); + + // Truncate the final hash into K by copying only the LSBs + memcpy(out->raw, hash.u.raw, sizeof(*out)); + + secure_clean(hash.u.raw, sizeof(hash)); + DMSG(" Exit get_ss.\n"); +} + +//////////////////////////////////////////////////////////////// +// The three APIs below (keygeneration, encapsulate, decapsulate) are defined by +// NIST: In addition there are two KAT versions of this API as defined. +//////////////////////////////////////////////////////////////// +int +BIKE1_L1_R1_crypto_kem_keypair(OUT unsigned char *pk, OUT unsigned char *sk) +{ + // Convert to this implementation types + pk_t *l_pk = (pk_t *)pk; + + DEFER_CLEANUP(ALIGN(8) sk_t l_sk = {0}, sk_cleanup); + + // For DRBG and AES_PRF + DEFER_CLEANUP(seeds_t seeds = {0}, seeds_cleanup); + DEFER_CLEANUP(aes_ctr_prf_state_t h_prf_state = {0}, aes_ctr_prf_state_cleanup); + + // Padded for internal use only (the padded data is not released). + DEFER_CLEANUP(pad_sk_t p_sk = {0}, pad_sk_cleanup); + + // Get the entropy seeds. + get_seeds(&seeds); + + DMSG(" Enter crypto_kem_keypair.\n"); + DMSG(" Calculating the secret key.\n"); + + // h0 and h1 use the same context + GUARD(init_aes_ctr_prf_state(&h_prf_state, MAX_AES_INVOKATION, &seeds.seed[0])); + + GUARD(generate_sparse_rep((uint64_t *)&p_sk[0], l_sk.wlist[0].val, DV, R_BITS, + sizeof(p_sk[0]), &h_prf_state)); + // Copy data + l_sk.bin[0] = p_sk[0].val; + + GUARD(generate_sparse_rep((uint64_t *)&p_sk[1], l_sk.wlist[1].val, DV, R_BITS, + sizeof(p_sk[1]), &h_prf_state)); + + // Copy data + l_sk.bin[1] = p_sk[1].val; + + DMSG(" Calculating the public key.\n"); + + GUARD(calc_pk(l_pk, &seeds.seed[1], p_sk)); + + memcpy(sk, &l_sk, sizeof(l_sk)); + + print("h0: ", (uint64_t *)&l_sk.bin[0], R_BITS); + print("h1: ", (uint64_t *)&l_sk.bin[1], R_BITS); + print("h0c:", (uint64_t *)&l_sk.wlist[0], SIZEOF_BITS(compressed_idx_dv_t)); + print("h1c:", (uint64_t *)&l_sk.wlist[1], SIZEOF_BITS(compressed_idx_dv_t)); + DMSG(" Exit crypto_kem_keypair.\n"); + + return SUCCESS; +} + +// Encapsulate - pk is the public key, +// ct is a key encapsulation message (ciphertext), +// ss is the shared secret. +int +BIKE1_L1_R1_crypto_kem_enc(OUT unsigned char * ct, + OUT unsigned char * ss, + IN const unsigned char *pk) +{ + DMSG(" Enter crypto_kem_enc.\n"); + + // Convert to this implementation types + const pk_t *l_pk = (const pk_t *)pk; + ct_t * l_ct = (ct_t *)ct; + ss_t * l_ss = (ss_t *)ss; + DEFER_CLEANUP(padded_e_t e = {0}, padded_e_cleanup); + + // For NIST DRBG_CTR + DEFER_CLEANUP(seeds_t seeds = {0}, seeds_cleanup); + DEFER_CLEANUP(aes_ctr_prf_state_t e_prf_state = {0}, aes_ctr_prf_state_cleanup); + + // Get the entrophy seeds + get_seeds(&seeds); + + // Random data generator + // Using first seed + GUARD(init_aes_ctr_prf_state(&e_prf_state, MAX_AES_INVOKATION, &seeds.seed[0])); + + DMSG(" Generating error.\n"); + ALIGN(8) compressed_idx_t_t dummy; + GUARD(generate_sparse_rep((uint64_t *)&e, dummy.val, T1, N_BITS, sizeof(e), + &e_prf_state)); + + print("e: ", (uint64_t *)&e.val, sizeof(e) * 8); + + // Split e into e0 and e1. Initialization is done in split_e + DEFER_CLEANUP(split_e_t splitted_e, split_e_cleanup); + split_e(&splitted_e, &e.val); + + print("e0: ", (uint64_t *)splitted_e.val[0].raw, R_BITS); + print("e1: ", (uint64_t *)splitted_e.val[1].raw, R_BITS); + + // Computing ct = enc(pk, e) + // Using second seed + DMSG(" Encrypting.\n"); + GUARD(encrypt(l_ct, l_pk, &seeds.seed[1], &splitted_e)); + + DMSG(" Generating shared secret.\n"); + get_ss(l_ss, &e.val); + + print("ss: ", (uint64_t *)l_ss->raw, SIZEOF_BITS(*l_ss)); + DMSG(" Exit crypto_kem_enc.\n"); + return SUCCESS; +} + +// Decapsulate - ct is a key encapsulation message (ciphertext), +// sk is the private key, +// ss is the shared secret +int +BIKE1_L1_R1_crypto_kem_dec(OUT unsigned char * ss, + IN const unsigned char *ct, + IN const unsigned char *sk) +{ + DMSG(" Enter crypto_kem_dec.\n"); + + // Convert to this implementation types + const ct_t *l_ct = (const ct_t *)ct; + ss_t * l_ss = (ss_t *)ss; + + DEFER_CLEANUP(ALIGN(8) sk_t l_sk, sk_cleanup); + memcpy(&l_sk, sk, sizeof(l_sk)); + + // Force zero initialization + DEFER_CLEANUP(syndrome_t syndrome = {0}, syndrome_cleanup); + DEFER_CLEANUP(split_e_t e, split_e_cleanup); + DEFER_CLEANUP(e_t merged_e = {0}, e_cleanup); + + DMSG(" Computing s.\n"); + GUARD(compute_syndrome(&syndrome, l_ct, &l_sk)); + + DMSG(" Decoding.\n"); + GUARD(decode(&e, &syndrome, l_ct, &l_sk)); + + // Check if the error weight equals T1 + if(T1 != r_bits_vector_weight(&e.val[0]) + r_bits_vector_weight(&e.val[1])) + { + MSG(" Error weight is not t\n"); + BIKE_ERROR(E_ERROR_WEIGHT_IS_NOT_T); + } + + merge_e(&merged_e, &e); + get_ss(l_ss, &merged_e); + + DMSG(" Exit crypto_kem_dec.\n"); + return SUCCESS; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/cleanup.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/cleanup.h index 6bacfaa45a..67205216d3 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/cleanup.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/cleanup.h @@ -1,131 +1,131 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once -#include "types.h" -#include "utils/s2n_safety.h" - -_INLINE_ void -secure_clean(OUT uint8_t *p, IN const uint32_t len) -{ -#ifdef _WIN32 - SecureZeroMemory(p, len); -#else - typedef void *(*memset_t)(void *, int, size_t); - static volatile memset_t memset_func = memset; - memset_func(p, 0, len); -#endif -} - -_INLINE_ void -r_cleanup(IN OUT r_t *o) -{ - secure_clean((uint8_t *)o, sizeof(*o)); -} - -_INLINE_ void -e_cleanup(IN OUT e_t *o) -{ - secure_clean((uint8_t *)o, sizeof(*o)); -} - -_INLINE_ void -padded_r_cleanup(IN OUT padded_r_t *o) -{ - secure_clean((uint8_t *)o, sizeof(*o)); -} - -_INLINE_ void -padded_e_cleanup(IN OUT padded_e_t *o) -{ - secure_clean((uint8_t *)o, sizeof(*o)); -} - -_INLINE_ void -split_e_cleanup(IN OUT split_e_t *o) -{ - secure_clean((uint8_t *)o, sizeof(*o)); -} - -_INLINE_ void -sk_cleanup(IN OUT sk_t *o) -{ - secure_clean((uint8_t *)o, sizeof(*o)); -} - -_INLINE_ void -pad_sk_cleanup(IN OUT pad_sk_t *o) -{ - secure_clean((uint8_t *)o[0], sizeof(*o)); -} - -_INLINE_ void -pad_ct_cleanup(IN OUT pad_ct_t *o) -{ - secure_clean((uint8_t *)o[0], sizeof(*o)); -} - -_INLINE_ void -dbl_pad_ct_cleanup(IN OUT dbl_pad_ct_t *o) -{ - secure_clean((uint8_t *)o[0], sizeof(*o)); -} - -_INLINE_ void -seed_cleanup(IN OUT seed_t *o) -{ - secure_clean((uint8_t *)o, sizeof(*o)); -} - -_INLINE_ void -syndrome_cleanup(IN OUT syndrome_t *o) -{ - secure_clean((uint8_t *)o, sizeof(*o)); -} - -_INLINE_ void -dbl_pad_syndrome_cleanup(IN OUT dbl_pad_syndrome_t *o) -{ - secure_clean((uint8_t *)o[0], sizeof(*o)); -} - -_INLINE_ void -compressed_idx_t_cleanup(IN OUT compressed_idx_t_t *o) -{ - secure_clean((uint8_t *)o, sizeof(*o)); -} - -_INLINE_ void -compressed_idx_dv_ar_cleanup(IN OUT compressed_idx_dv_ar_t *o) -{ - for(int i = 0; i < N0; i++) - { - secure_clean((uint8_t *)&(*o)[i], sizeof((*o)[0])); - } -} - -_INLINE_ void -generic_param_n_cleanup(IN OUT generic_param_n_t *o) -{ - secure_clean((uint8_t *)o, sizeof(*o)); -} - -_INLINE_ void -seeds_cleanup(IN OUT seeds_t *o) -{ - for(int i = 0; i < NUM_OF_SEEDS; i++) - { - seed_cleanup(&(o->seed[i])); - } -} - -_INLINE_ void -upc_cleanup(IN OUT upc_t *o) -{ - secure_clean((uint8_t *)o, sizeof(*o)); -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once +#include "types.h" +#include "utils/s2n_safety.h" + +_INLINE_ void +secure_clean(OUT uint8_t *p, IN const uint32_t len) +{ +#ifdef _WIN32 + SecureZeroMemory(p, len); +#else + typedef void *(*memset_t)(void *, int, size_t); + static volatile memset_t memset_func = memset; + memset_func(p, 0, len); +#endif +} + +_INLINE_ void +r_cleanup(IN OUT r_t *o) +{ + secure_clean((uint8_t *)o, sizeof(*o)); +} + +_INLINE_ void +e_cleanup(IN OUT e_t *o) +{ + secure_clean((uint8_t *)o, sizeof(*o)); +} + +_INLINE_ void +padded_r_cleanup(IN OUT padded_r_t *o) +{ + secure_clean((uint8_t *)o, sizeof(*o)); +} + +_INLINE_ void +padded_e_cleanup(IN OUT padded_e_t *o) +{ + secure_clean((uint8_t *)o, sizeof(*o)); +} + +_INLINE_ void +split_e_cleanup(IN OUT split_e_t *o) +{ + secure_clean((uint8_t *)o, sizeof(*o)); +} + +_INLINE_ void +sk_cleanup(IN OUT sk_t *o) +{ + secure_clean((uint8_t *)o, sizeof(*o)); +} + +_INLINE_ void +pad_sk_cleanup(IN OUT pad_sk_t *o) +{ + secure_clean((uint8_t *)o[0], sizeof(*o)); +} + +_INLINE_ void +pad_ct_cleanup(IN OUT pad_ct_t *o) +{ + secure_clean((uint8_t *)o[0], sizeof(*o)); +} + +_INLINE_ void +dbl_pad_ct_cleanup(IN OUT dbl_pad_ct_t *o) +{ + secure_clean((uint8_t *)o[0], sizeof(*o)); +} + +_INLINE_ void +seed_cleanup(IN OUT seed_t *o) +{ + secure_clean((uint8_t *)o, sizeof(*o)); +} + +_INLINE_ void +syndrome_cleanup(IN OUT syndrome_t *o) +{ + secure_clean((uint8_t *)o, sizeof(*o)); +} + +_INLINE_ void +dbl_pad_syndrome_cleanup(IN OUT dbl_pad_syndrome_t *o) +{ + secure_clean((uint8_t *)o[0], sizeof(*o)); +} + +_INLINE_ void +compressed_idx_t_cleanup(IN OUT compressed_idx_t_t *o) +{ + secure_clean((uint8_t *)o, sizeof(*o)); +} + +_INLINE_ void +compressed_idx_dv_ar_cleanup(IN OUT compressed_idx_dv_ar_t *o) +{ + for(int i = 0; i < N0; i++) + { + secure_clean((uint8_t *)&(*o)[i], sizeof((*o)[0])); + } +} + +_INLINE_ void +generic_param_n_cleanup(IN OUT generic_param_n_t *o) +{ + secure_clean((uint8_t *)o, sizeof(*o)); +} + +_INLINE_ void +seeds_cleanup(IN OUT seeds_t *o) +{ + for(int i = 0; i < NUM_OF_SEEDS; i++) + { + seed_cleanup(&(o->seed[i])); + } +} + +_INLINE_ void +upc_cleanup(IN OUT upc_t *o) +{ + secure_clean((uint8_t *)o, sizeof(*o)); +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/converts_portable.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/converts_portable.c index d76900b771..06e311601f 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/converts_portable.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/converts_portable.c @@ -1,62 +1,62 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - * The license is detailed in the file LICENSE.md, and applies to this file. - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#include "decode.h" -#include "utilities.h" - -// Convert a sequence of uint8_t elements which fully uses all 8-bits of -// an uint8_t element to a sequence of uint8_t which uses just a single -// bit per byte (either 0 or 1). -EXTERNC void -convert_to_redundant_rep(OUT uint8_t *out, - IN const uint8_t *in, - IN const uint64_t len) -{ - uint8_t tmp; - for(uint32_t i = 0; i < (len / 8); i++) - { - tmp = in[i]; - for(uint8_t j = 0; j < 8; j++) - { - out[8 * i + j] |= (tmp & 0x1); - tmp >>= 1; - } - } - - // Convert the reminder - tmp = in[len / 8]; - for(uint32_t j = 8 * (len / 8); j < len; j++) - { - out[j] |= (tmp & 0x1); - tmp >>= 1; - } -} - -EXTERNC uint64_t -count_ones(IN const uint8_t *in, IN const uint32_t len) -{ - uint64_t acc = 0; - for(uint32_t i = 0; i < len; i++) - { - acc += __builtin_popcount(in[i]); - } - - return acc; -} +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + * The license is detailed in the file LICENSE.md, and applies to this file. + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#include "decode.h" +#include "utilities.h" + +// Convert a sequence of uint8_t elements which fully uses all 8-bits of +// an uint8_t element to a sequence of uint8_t which uses just a single +// bit per byte (either 0 or 1). +EXTERNC void +convert_to_redundant_rep(OUT uint8_t *out, + IN const uint8_t *in, + IN const uint64_t len) +{ + uint8_t tmp; + for(uint32_t i = 0; i < (len / 8); i++) + { + tmp = in[i]; + for(uint8_t j = 0; j < 8; j++) + { + out[8 * i + j] |= (tmp & 0x1); + tmp >>= 1; + } + } + + // Convert the reminder + tmp = in[len / 8]; + for(uint32_t j = 8 * (len / 8); j < len; j++) + { + out[j] |= (tmp & 0x1); + tmp >>= 1; + } +} + +EXTERNC uint64_t +count_ones(IN const uint8_t *in, IN const uint32_t len) +{ + uint64_t acc = 0; + for(uint32_t i = 0; i < len; i++) + { + acc += __builtin_popcount(in[i]); + } + + return acc; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/decode.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/decode.c index 404c6377da..ee37e7d82a 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/decode.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/decode.c @@ -1,365 +1,365 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker, Shay Gueron, and Dusan Kostic, - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com, dkostic@amazon.com) - * - * [1] The optimizations are based on the description developed in the paper: - * Drucker, Nir, and Shay Gueron. 2019. “A Toolbox for Software Optimization - * of QC-MDPC Code-Based Cryptosystems.” Journal of Cryptographic Engineering, - * January, 1–17. https://doi.org/10.1007/s13389-018-00200-4. - * - * [2] The decoder algorithm is the Black-Gray decoder in - * the early submission of CAKE (due to N. Sandrier and R Misoczki). - * - * [3] The analysis for the constant time implementation is given in - * Drucker, Nir, Shay Gueron, and Dusan Kostic. 2019. - * “On Constant-Time QC-MDPC Decoding with Negligible Failure Rate.” - * Cryptology EPrint Archive, 2019. https://eprint.iacr.org/2019/1289. - * - * [4] it was adapted to BGF in: - * Drucker, Nir, Shay Gueron, and Dusan Kostic. 2019. - * “QC-MDPC decoders with several shades of gray.” - * Cryptology EPrint Archive, 2019. To be published. - * - * [5] Chou, T.: QcBits: Constant-Time Small-Key Code-Based Cryptography. - * In: Gier-lichs, B., Poschmann, A.Y. (eds.) Cryptographic Hardware - * and Embedded Systems– CHES 2016. pp. 280–300. Springer Berlin Heidelberg, - * Berlin, Heidelberg (2016) - * - * [6] The rotate512_small funciton is a derivative of the code described in: - * Guimarães, Antonio, Diego F Aranha, and Edson Borin. 2019. - * “Optimized Implementation of QC-MDPC Code-Based Cryptography.” - * Concurrency and Computation: Practice and Experience 31 (18): - * e5089. https://doi.org/10.1002/cpe.5089. - */ - -#include "decode.h" -#include "gf2x.h" -#include "utilities.h" -#include <string.h> - -// Decoding (bit-flipping) parameter -#ifdef BG_DECODER -# if(LEVEL == 1) -# define MAX_IT 3 -# elif(LEVEL == 3) -# define MAX_IT 4 -# elif(LEVEL == 5) -# define MAX_IT 7 -# else -# error "Level can only be 1/3/5" -# endif -#elif defined(BGF_DECODER) -# if(LEVEL == 1) -# define MAX_IT 5 -# elif(LEVEL == 3) -# define MAX_IT 6 -# elif(LEVEL == 5) -# define MAX_IT 7 -# else -# error "Level can only be 1/3/5" -# endif -#endif - -// Duplicates the first R_BITS of the syndrome three times -// |------------------------------------------| -// | Third copy | Second copy | first R_BITS | -// |------------------------------------------| -// This is required by the rotate functions. -_INLINE_ void -dup(IN OUT syndrome_t *s) -{ - s->qw[R_QW - 1] = - (s->qw[0] << LAST_R_QW_LEAD) | (s->qw[R_QW - 1] & LAST_R_QW_MASK); - - for(size_t i = 0; i < (2 * R_QW) - 1; i++) - { - s->qw[R_QW + i] = - (s->qw[i] >> LAST_R_QW_TRAIL) | (s->qw[i + 1] << LAST_R_QW_LEAD); - } -} - -ret_t -compute_syndrome(OUT syndrome_t *syndrome, IN const ct_t *ct, IN const sk_t *sk) -{ - // gf2x_mod_mul requires the values to be 64bit padded and extra (dbl) space - // for the results - DEFER_CLEANUP(dbl_pad_syndrome_t pad_s, dbl_pad_syndrome_cleanup); - DEFER_CLEANUP(pad_sk_t pad_sk = {0}, pad_sk_cleanup); - pad_sk[0].val = sk->bin[0]; - pad_sk[1].val = sk->bin[1]; - - DEFER_CLEANUP(pad_ct_t pad_ct = {0}, pad_ct_cleanup); - pad_ct[0].val = ct->val[0]; - pad_ct[1].val = ct->val[1]; - - // Compute s = c0*h0 + c1*h1: - GUARD(gf2x_mod_mul((uint64_t *)&pad_s[0], (uint64_t *)&pad_ct[0], - (uint64_t *)&pad_sk[0])); - GUARD(gf2x_mod_mul((uint64_t *)&pad_s[1], (uint64_t *)&pad_ct[1], - (uint64_t *)&pad_sk[1])); - - GUARD(gf2x_add(pad_s[0].val.raw, pad_s[0].val.raw, pad_s[1].val.raw, R_SIZE)); - - memcpy((uint8_t *)syndrome->qw, pad_s[0].val.raw, R_SIZE); - dup(syndrome); - - return SUCCESS; -} - -_INLINE_ ret_t -recompute_syndrome(OUT syndrome_t *syndrome, - IN const ct_t *ct, - IN const sk_t *sk, - IN const split_e_t *splitted_e) -{ - ct_t tmp_ct = *ct; - - // Adapt the ciphertext - GUARD(gf2x_add(tmp_ct.val[0].raw, tmp_ct.val[0].raw, splitted_e->val[0].raw, - R_SIZE)); - GUARD(gf2x_add(tmp_ct.val[1].raw, tmp_ct.val[1].raw, splitted_e->val[1].raw, - R_SIZE)); - - // Recompute the syndrome - GUARD(compute_syndrome(syndrome, &tmp_ct, sk)); - - return SUCCESS; -} - -_INLINE_ uint8_t -get_threshold(IN const syndrome_t *s) -{ - bike_static_assert(sizeof(*s) >= sizeof(r_t), syndrome_is_large_enough); - - const uint32_t syndrome_weight = r_bits_vector_weight((const r_t *)s->qw); - - // The equations below are defined in BIKE's specification: - // https://bikesuite.org/files/round2/spec/BIKE-Spec-Round2.2019.03.30.pdf - // Page 20 Section 2.4.2 - const uint8_t threshold = - THRESHOLD_COEFF0 + (THRESHOLD_COEFF1 * syndrome_weight); - - DMSG(" Thresold: %d\n", threshold); - return threshold; -} - -// Use half-adder as described in [5]. -_INLINE_ void -bit_sliced_adder(OUT upc_t *upc, - IN OUT syndrome_t *rotated_syndrome, - IN const size_t num_of_slices) -{ - // From cache-memory perspective this loop should be the outside loop - for(size_t j = 0; j < num_of_slices; j++) - { - for(size_t i = 0; i < R_QW; i++) - { - const uint64_t carry = (upc->slice[j].u.qw[i] & rotated_syndrome->qw[i]); - upc->slice[j].u.qw[i] ^= rotated_syndrome->qw[i]; - rotated_syndrome->qw[i] = carry; - } - } -} - -_INLINE_ void -bit_slice_full_subtract(OUT upc_t *upc, IN uint8_t val) -{ - // Borrow - uint64_t br[R_QW] = {0}; - - for(size_t j = 0; j < SLICES; j++) - { - - const uint64_t lsb_mask = 0 - (val & 0x1); - val >>= 1; - - // Perform a - b with c as the input/output carry - // br = 0 0 0 0 1 1 1 1 - // a = 0 0 1 1 0 0 1 1 - // b = 0 1 0 1 0 1 0 1 - // ------------------- - // o = 0 1 1 0 0 1 1 1 - // c = 0 1 0 0 1 1 0 1 - // - // o = a^b^c - // _ __ _ _ _ _ _ - // br = abc + abc + abc + abc = abc + ((a+b))c - - for(size_t i = 0; i < R_QW; i++) - { - const uint64_t a = upc->slice[j].u.qw[i]; - const uint64_t b = lsb_mask; - const uint64_t tmp = ((~a) & b & (~br[i])) | ((((~a) | b) & br[i])); - upc->slice[j].u.qw[i] = a ^ b ^ br[i]; - br[i] = tmp; - } - } -} - -// Calculate the Unsatisfied Parity Checks (UPCs) and update the errors -// vector (e) accordingy. In addition, update the black and gray errors vector -// with the relevant values. -_INLINE_ void -find_err1(OUT split_e_t *e, - OUT split_e_t *black_e, - OUT split_e_t *gray_e, - IN const syndrome_t * syndrome, - IN const compressed_idx_dv_ar_t wlist, - IN const uint8_t threshold) -{ - // This function uses the bit-slice-adder methodology of [5]: - DEFER_CLEANUP(syndrome_t rotated_syndrome = {0}, syndrome_cleanup); - DEFER_CLEANUP(upc_t upc, upc_cleanup); - - for(uint32_t i = 0; i < N0; i++) - { - // UPC must start from zero at every iteration - memset(&upc, 0, sizeof(upc)); - - // 1) Right-rotate the syndrome for every secret key set bit index - // Then slice-add it to the UPC array. - for(size_t j = 0; j < DV; j++) - { - rotate_right(&rotated_syndrome, syndrome, wlist[i].val[j]); - bit_sliced_adder(&upc, &rotated_syndrome, LOG2_MSB(j + 1)); - } - - // 2) Subtract the threshold from the UPC counters - bit_slice_full_subtract(&upc, threshold); - - // 3) Update the errors and the black errors vectors. - // The last slice of the UPC array holds the MSB of the accumulated values - // minus the threshold. Every zero bit indicates a potential error bit. - // The errors values are stored in the black array and xored with the - // errors Of the previous iteration. - const r_t *last_slice = &(upc.slice[SLICES - 1].u.r.val); - for(size_t j = 0; j < R_SIZE; j++) - { - const uint8_t sum_msb = (~last_slice->raw[j]); - black_e->val[i].raw[j] = sum_msb; - e->val[i].raw[j] ^= sum_msb; - } - - // Ensure that the padding bits (upper bits of the last byte) are zero so - // they will not be included in the multiplication and in the hash function. - e->val[i].raw[R_SIZE - 1] &= LAST_R_BYTE_MASK; - - // 4) Calculate the gray error array by adding "DELTA" to the UPC array. - // For that we reuse the rotated_syndrome variable setting it to all "1". - for(size_t l = 0; l < DELTA; l++) - { - memset((uint8_t *)rotated_syndrome.qw, 0xff, R_SIZE); - bit_sliced_adder(&upc, &rotated_syndrome, SLICES); - } - - // 5) Update the gray list with the relevant bits that are not - // set in the black list. - for(size_t j = 0; j < R_SIZE; j++) - { - const uint8_t sum_msb = (~last_slice->raw[j]); - gray_e->val[i].raw[j] = (~(black_e->val[i].raw[j])) & sum_msb; - } - } -} - -// Recalculate the UPCs and update the errors vector (e) according to it -// and to the black/gray vectors. -_INLINE_ void -find_err2(OUT split_e_t *e, - IN split_e_t *pos_e, - IN const syndrome_t * syndrome, - IN const compressed_idx_dv_ar_t wlist, - IN const uint8_t threshold) -{ - DEFER_CLEANUP(syndrome_t rotated_syndrome = {0}, syndrome_cleanup); - DEFER_CLEANUP(upc_t upc, upc_cleanup); - - for(uint32_t i = 0; i < N0; i++) - { - // UPC must start from zero at every iteration - memset(&upc, 0, sizeof(upc)); - - // 1) Right-rotate the syndrome for every secret key set bit index - // Then slice-add it to the UPC array. - for(size_t j = 0; j < DV; j++) - { - rotate_right(&rotated_syndrome, syndrome, wlist[i].val[j]); - bit_sliced_adder(&upc, &rotated_syndrome, LOG2_MSB(j + 1)); - } - - // 2) Subtract the threshold from the UPC counters - bit_slice_full_subtract(&upc, threshold); - - // 3) Update the errors vector. - // The last slice of the UPC array holds the MSB of the accumulated values - // minus the threshold. Every zero bit indicates a potential error bit. - const r_t *last_slice = &(upc.slice[SLICES - 1].u.r.val); - for(size_t j = 0; j < R_SIZE; j++) - { - const uint8_t sum_msb = (~last_slice->raw[j]); - e->val[i].raw[j] ^= (pos_e->val[i].raw[j] & sum_msb); - } - - // Ensure that the padding bits (upper bits of the last byte) are zero so - // they will not be included in the multiplication and in the hash function. - e->val[i].raw[R_SIZE - 1] &= LAST_R_BYTE_MASK; - } -} - -ret_t -decode(OUT split_e_t *e, - IN const syndrome_t *original_s, - IN const ct_t *ct, - IN const sk_t *sk) -{ - split_e_t black_e = {0}; - split_e_t gray_e = {0}; - syndrome_t s; - - // Reset (init) the error because it is xored in the find_err funcitons. - memset(e, 0, sizeof(*e)); - s = *original_s; - dup(&s); - - for(uint32_t iter = 0; iter < MAX_IT; iter++) - { - const uint8_t threshold = get_threshold(&s); - - DMSG(" Iteration: %d\n", iter); - DMSG(" Weight of e: %lu\n", - r_bits_vector_weight(&e->val[0]) + r_bits_vector_weight(&e->val[1])); - DMSG(" Weight of syndrome: %lu\n", r_bits_vector_weight((r_t *)s.qw)); - - find_err1(e, &black_e, &gray_e, &s, sk->wlist, threshold); - GUARD(recompute_syndrome(&s, ct, sk, e)); -#ifdef BGF_DECODER - if(iter >= 1) - { - continue; - } -#endif - DMSG(" Weight of e: %lu\n", - r_bits_vector_weight(&e->val[0]) + r_bits_vector_weight(&e->val[1])); - DMSG(" Weight of syndrome: %lu\n", r_bits_vector_weight((r_t *)s.qw)); - - find_err2(e, &black_e, &s, sk->wlist, ((DV + 1) / 2) + 1); - GUARD(recompute_syndrome(&s, ct, sk, e)); - - DMSG(" Weight of e: %lu\n", - r_bits_vector_weight(&e->val[0]) + r_bits_vector_weight(&e->val[1])); - DMSG(" Weight of syndrome: %lu\n", r_bits_vector_weight((r_t *)s.qw)); - - find_err2(e, &gray_e, &s, sk->wlist, ((DV + 1) / 2) + 1); - GUARD(recompute_syndrome(&s, ct, sk, e)); - } - - if(r_bits_vector_weight((r_t *)s.qw) > 0) - { - BIKE_ERROR(E_DECODING_FAILURE); - } - - return SUCCESS; -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker, Shay Gueron, and Dusan Kostic, + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com, dkostic@amazon.com) + * + * [1] The optimizations are based on the description developed in the paper: + * Drucker, Nir, and Shay Gueron. 2019. “A Toolbox for Software Optimization + * of QC-MDPC Code-Based Cryptosystems.” Journal of Cryptographic Engineering, + * January, 1–17. https://doi.org/10.1007/s13389-018-00200-4. + * + * [2] The decoder algorithm is the Black-Gray decoder in + * the early submission of CAKE (due to N. Sandrier and R Misoczki). + * + * [3] The analysis for the constant time implementation is given in + * Drucker, Nir, Shay Gueron, and Dusan Kostic. 2019. + * “On Constant-Time QC-MDPC Decoding with Negligible Failure Rate.” + * Cryptology EPrint Archive, 2019. https://eprint.iacr.org/2019/1289. + * + * [4] it was adapted to BGF in: + * Drucker, Nir, Shay Gueron, and Dusan Kostic. 2019. + * “QC-MDPC decoders with several shades of gray.” + * Cryptology EPrint Archive, 2019. To be published. + * + * [5] Chou, T.: QcBits: Constant-Time Small-Key Code-Based Cryptography. + * In: Gier-lichs, B., Poschmann, A.Y. (eds.) Cryptographic Hardware + * and Embedded Systems– CHES 2016. pp. 280–300. Springer Berlin Heidelberg, + * Berlin, Heidelberg (2016) + * + * [6] The rotate512_small funciton is a derivative of the code described in: + * Guimarães, Antonio, Diego F Aranha, and Edson Borin. 2019. + * “Optimized Implementation of QC-MDPC Code-Based Cryptography.” + * Concurrency and Computation: Practice and Experience 31 (18): + * e5089. https://doi.org/10.1002/cpe.5089. + */ + +#include "decode.h" +#include "gf2x.h" +#include "utilities.h" +#include <string.h> + +// Decoding (bit-flipping) parameter +#ifdef BG_DECODER +# if(LEVEL == 1) +# define MAX_IT 3 +# elif(LEVEL == 3) +# define MAX_IT 4 +# elif(LEVEL == 5) +# define MAX_IT 7 +# else +# error "Level can only be 1/3/5" +# endif +#elif defined(BGF_DECODER) +# if(LEVEL == 1) +# define MAX_IT 5 +# elif(LEVEL == 3) +# define MAX_IT 6 +# elif(LEVEL == 5) +# define MAX_IT 7 +# else +# error "Level can only be 1/3/5" +# endif +#endif + +// Duplicates the first R_BITS of the syndrome three times +// |------------------------------------------| +// | Third copy | Second copy | first R_BITS | +// |------------------------------------------| +// This is required by the rotate functions. +_INLINE_ void +dup(IN OUT syndrome_t *s) +{ + s->qw[R_QW - 1] = + (s->qw[0] << LAST_R_QW_LEAD) | (s->qw[R_QW - 1] & LAST_R_QW_MASK); + + for(size_t i = 0; i < (2 * R_QW) - 1; i++) + { + s->qw[R_QW + i] = + (s->qw[i] >> LAST_R_QW_TRAIL) | (s->qw[i + 1] << LAST_R_QW_LEAD); + } +} + +ret_t +compute_syndrome(OUT syndrome_t *syndrome, IN const ct_t *ct, IN const sk_t *sk) +{ + // gf2x_mod_mul requires the values to be 64bit padded and extra (dbl) space + // for the results + DEFER_CLEANUP(dbl_pad_syndrome_t pad_s, dbl_pad_syndrome_cleanup); + DEFER_CLEANUP(pad_sk_t pad_sk = {0}, pad_sk_cleanup); + pad_sk[0].val = sk->bin[0]; + pad_sk[1].val = sk->bin[1]; + + DEFER_CLEANUP(pad_ct_t pad_ct = {0}, pad_ct_cleanup); + pad_ct[0].val = ct->val[0]; + pad_ct[1].val = ct->val[1]; + + // Compute s = c0*h0 + c1*h1: + GUARD(gf2x_mod_mul((uint64_t *)&pad_s[0], (uint64_t *)&pad_ct[0], + (uint64_t *)&pad_sk[0])); + GUARD(gf2x_mod_mul((uint64_t *)&pad_s[1], (uint64_t *)&pad_ct[1], + (uint64_t *)&pad_sk[1])); + + GUARD(gf2x_add(pad_s[0].val.raw, pad_s[0].val.raw, pad_s[1].val.raw, R_SIZE)); + + memcpy((uint8_t *)syndrome->qw, pad_s[0].val.raw, R_SIZE); + dup(syndrome); + + return SUCCESS; +} + +_INLINE_ ret_t +recompute_syndrome(OUT syndrome_t *syndrome, + IN const ct_t *ct, + IN const sk_t *sk, + IN const split_e_t *splitted_e) +{ + ct_t tmp_ct = *ct; + + // Adapt the ciphertext + GUARD(gf2x_add(tmp_ct.val[0].raw, tmp_ct.val[0].raw, splitted_e->val[0].raw, + R_SIZE)); + GUARD(gf2x_add(tmp_ct.val[1].raw, tmp_ct.val[1].raw, splitted_e->val[1].raw, + R_SIZE)); + + // Recompute the syndrome + GUARD(compute_syndrome(syndrome, &tmp_ct, sk)); + + return SUCCESS; +} + +_INLINE_ uint8_t +get_threshold(IN const syndrome_t *s) +{ + bike_static_assert(sizeof(*s) >= sizeof(r_t), syndrome_is_large_enough); + + const uint32_t syndrome_weight = r_bits_vector_weight((const r_t *)s->qw); + + // The equations below are defined in BIKE's specification: + // https://bikesuite.org/files/round2/spec/BIKE-Spec-Round2.2019.03.30.pdf + // Page 20 Section 2.4.2 + const uint8_t threshold = + THRESHOLD_COEFF0 + (THRESHOLD_COEFF1 * syndrome_weight); + + DMSG(" Thresold: %d\n", threshold); + return threshold; +} + +// Use half-adder as described in [5]. +_INLINE_ void +bit_sliced_adder(OUT upc_t *upc, + IN OUT syndrome_t *rotated_syndrome, + IN const size_t num_of_slices) +{ + // From cache-memory perspective this loop should be the outside loop + for(size_t j = 0; j < num_of_slices; j++) + { + for(size_t i = 0; i < R_QW; i++) + { + const uint64_t carry = (upc->slice[j].u.qw[i] & rotated_syndrome->qw[i]); + upc->slice[j].u.qw[i] ^= rotated_syndrome->qw[i]; + rotated_syndrome->qw[i] = carry; + } + } +} + +_INLINE_ void +bit_slice_full_subtract(OUT upc_t *upc, IN uint8_t val) +{ + // Borrow + uint64_t br[R_QW] = {0}; + + for(size_t j = 0; j < SLICES; j++) + { + + const uint64_t lsb_mask = 0 - (val & 0x1); + val >>= 1; + + // Perform a - b with c as the input/output carry + // br = 0 0 0 0 1 1 1 1 + // a = 0 0 1 1 0 0 1 1 + // b = 0 1 0 1 0 1 0 1 + // ------------------- + // o = 0 1 1 0 0 1 1 1 + // c = 0 1 0 0 1 1 0 1 + // + // o = a^b^c + // _ __ _ _ _ _ _ + // br = abc + abc + abc + abc = abc + ((a+b))c + + for(size_t i = 0; i < R_QW; i++) + { + const uint64_t a = upc->slice[j].u.qw[i]; + const uint64_t b = lsb_mask; + const uint64_t tmp = ((~a) & b & (~br[i])) | ((((~a) | b) & br[i])); + upc->slice[j].u.qw[i] = a ^ b ^ br[i]; + br[i] = tmp; + } + } +} + +// Calculate the Unsatisfied Parity Checks (UPCs) and update the errors +// vector (e) accordingy. In addition, update the black and gray errors vector +// with the relevant values. +_INLINE_ void +find_err1(OUT split_e_t *e, + OUT split_e_t *black_e, + OUT split_e_t *gray_e, + IN const syndrome_t * syndrome, + IN const compressed_idx_dv_ar_t wlist, + IN const uint8_t threshold) +{ + // This function uses the bit-slice-adder methodology of [5]: + DEFER_CLEANUP(syndrome_t rotated_syndrome = {0}, syndrome_cleanup); + DEFER_CLEANUP(upc_t upc, upc_cleanup); + + for(uint32_t i = 0; i < N0; i++) + { + // UPC must start from zero at every iteration + memset(&upc, 0, sizeof(upc)); + + // 1) Right-rotate the syndrome for every secret key set bit index + // Then slice-add it to the UPC array. + for(size_t j = 0; j < DV; j++) + { + rotate_right(&rotated_syndrome, syndrome, wlist[i].val[j]); + bit_sliced_adder(&upc, &rotated_syndrome, LOG2_MSB(j + 1)); + } + + // 2) Subtract the threshold from the UPC counters + bit_slice_full_subtract(&upc, threshold); + + // 3) Update the errors and the black errors vectors. + // The last slice of the UPC array holds the MSB of the accumulated values + // minus the threshold. Every zero bit indicates a potential error bit. + // The errors values are stored in the black array and xored with the + // errors Of the previous iteration. + const r_t *last_slice = &(upc.slice[SLICES - 1].u.r.val); + for(size_t j = 0; j < R_SIZE; j++) + { + const uint8_t sum_msb = (~last_slice->raw[j]); + black_e->val[i].raw[j] = sum_msb; + e->val[i].raw[j] ^= sum_msb; + } + + // Ensure that the padding bits (upper bits of the last byte) are zero so + // they will not be included in the multiplication and in the hash function. + e->val[i].raw[R_SIZE - 1] &= LAST_R_BYTE_MASK; + + // 4) Calculate the gray error array by adding "DELTA" to the UPC array. + // For that we reuse the rotated_syndrome variable setting it to all "1". + for(size_t l = 0; l < DELTA; l++) + { + memset((uint8_t *)rotated_syndrome.qw, 0xff, R_SIZE); + bit_sliced_adder(&upc, &rotated_syndrome, SLICES); + } + + // 5) Update the gray list with the relevant bits that are not + // set in the black list. + for(size_t j = 0; j < R_SIZE; j++) + { + const uint8_t sum_msb = (~last_slice->raw[j]); + gray_e->val[i].raw[j] = (~(black_e->val[i].raw[j])) & sum_msb; + } + } +} + +// Recalculate the UPCs and update the errors vector (e) according to it +// and to the black/gray vectors. +_INLINE_ void +find_err2(OUT split_e_t *e, + IN split_e_t *pos_e, + IN const syndrome_t * syndrome, + IN const compressed_idx_dv_ar_t wlist, + IN const uint8_t threshold) +{ + DEFER_CLEANUP(syndrome_t rotated_syndrome = {0}, syndrome_cleanup); + DEFER_CLEANUP(upc_t upc, upc_cleanup); + + for(uint32_t i = 0; i < N0; i++) + { + // UPC must start from zero at every iteration + memset(&upc, 0, sizeof(upc)); + + // 1) Right-rotate the syndrome for every secret key set bit index + // Then slice-add it to the UPC array. + for(size_t j = 0; j < DV; j++) + { + rotate_right(&rotated_syndrome, syndrome, wlist[i].val[j]); + bit_sliced_adder(&upc, &rotated_syndrome, LOG2_MSB(j + 1)); + } + + // 2) Subtract the threshold from the UPC counters + bit_slice_full_subtract(&upc, threshold); + + // 3) Update the errors vector. + // The last slice of the UPC array holds the MSB of the accumulated values + // minus the threshold. Every zero bit indicates a potential error bit. + const r_t *last_slice = &(upc.slice[SLICES - 1].u.r.val); + for(size_t j = 0; j < R_SIZE; j++) + { + const uint8_t sum_msb = (~last_slice->raw[j]); + e->val[i].raw[j] ^= (pos_e->val[i].raw[j] & sum_msb); + } + + // Ensure that the padding bits (upper bits of the last byte) are zero so + // they will not be included in the multiplication and in the hash function. + e->val[i].raw[R_SIZE - 1] &= LAST_R_BYTE_MASK; + } +} + +ret_t +decode(OUT split_e_t *e, + IN const syndrome_t *original_s, + IN const ct_t *ct, + IN const sk_t *sk) +{ + split_e_t black_e = {0}; + split_e_t gray_e = {0}; + syndrome_t s; + + // Reset (init) the error because it is xored in the find_err funcitons. + memset(e, 0, sizeof(*e)); + s = *original_s; + dup(&s); + + for(uint32_t iter = 0; iter < MAX_IT; iter++) + { + const uint8_t threshold = get_threshold(&s); + + DMSG(" Iteration: %d\n", iter); + DMSG(" Weight of e: %lu\n", + r_bits_vector_weight(&e->val[0]) + r_bits_vector_weight(&e->val[1])); + DMSG(" Weight of syndrome: %lu\n", r_bits_vector_weight((r_t *)s.qw)); + + find_err1(e, &black_e, &gray_e, &s, sk->wlist, threshold); + GUARD(recompute_syndrome(&s, ct, sk, e)); +#ifdef BGF_DECODER + if(iter >= 1) + { + continue; + } +#endif + DMSG(" Weight of e: %lu\n", + r_bits_vector_weight(&e->val[0]) + r_bits_vector_weight(&e->val[1])); + DMSG(" Weight of syndrome: %lu\n", r_bits_vector_weight((r_t *)s.qw)); + + find_err2(e, &black_e, &s, sk->wlist, ((DV + 1) / 2) + 1); + GUARD(recompute_syndrome(&s, ct, sk, e)); + + DMSG(" Weight of e: %lu\n", + r_bits_vector_weight(&e->val[0]) + r_bits_vector_weight(&e->val[1])); + DMSG(" Weight of syndrome: %lu\n", r_bits_vector_weight((r_t *)s.qw)); + + find_err2(e, &gray_e, &s, sk->wlist, ((DV + 1) / 2) + 1); + GUARD(recompute_syndrome(&s, ct, sk, e)); + } + + if(r_bits_vector_weight((r_t *)s.qw) > 0) + { + BIKE_ERROR(E_DECODING_FAILURE); + } + + return SUCCESS; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/decode.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/decode.h index d8809fd829..db7cf8ec1b 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/decode.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/decode.h @@ -1,28 +1,28 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -#include "types.h" - -ret_t -compute_syndrome(OUT syndrome_t *syndrome, IN const ct_t *ct, IN const sk_t *sk); - -// e should be zeroed before calling the decoder. -ret_t -decode(OUT split_e_t *e, - IN const syndrome_t *s, - IN const ct_t *ct, - IN const sk_t *sk); - -// Rotate right the first R_BITS of a syndrome. -// Assumption: the syndrome contains three R_BITS duplications. -// The output syndrome contains only one R_BITS rotation, the other -// (2 * R_BITS) bits are undefined. -void -rotate_right(OUT syndrome_t *out, IN const syndrome_t *in, IN uint32_t bitscount); +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +#include "types.h" + +ret_t +compute_syndrome(OUT syndrome_t *syndrome, IN const ct_t *ct, IN const sk_t *sk); + +// e should be zeroed before calling the decoder. +ret_t +decode(OUT split_e_t *e, + IN const syndrome_t *s, + IN const ct_t *ct, + IN const sk_t *sk); + +// Rotate right the first R_BITS of a syndrome. +// Assumption: the syndrome contains three R_BITS duplications. +// The output syndrome contains only one R_BITS rotation, the other +// (2 * R_BITS) bits are undefined. +void +rotate_right(OUT syndrome_t *out, IN const syndrome_t *in, IN uint32_t bitscount); diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/defs.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/defs.h index 1a0fa46c45..7bec886527 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/defs.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/defs.h @@ -1,144 +1,144 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -//////////////////////////////////////////// -// Basic defs -/////////////////////////////////////////// -#define FUNC_PREFIX BIKE1_L1_R1 -#include "functions_renaming.h" - -#ifdef __cplusplus -# define EXTERNC extern "C" -#else -# define EXTERNC -#endif - -// For code clarity. -#define IN -#define OUT - -#define ALIGN(n) __attribute__((aligned(n))) -#define BIKE_UNUSED(x) (void)(x) -#define BIKE_UNUSED_ATT __attribute__((unused)) - -#define _INLINE_ static inline - -// In asm the symbols '==' and '?' are not allowed therefore if using -// divide_and_ceil in asm files we must ensure with static_assert its validity -#if(__cplusplus >= 201103L) || defined(static_assert) -# define bike_static_assert(COND, MSG) static_assert(COND, "MSG") -#else -# define bike_static_assert(COND, MSG) \ - typedef char static_assertion_##MSG[(COND) ? 1 : -1] BIKE_UNUSED_ATT -#endif - -// Divide by the divider and round up to next integer -#define DIVIDE_AND_CEIL(x, divider) (((x) + (divider)) / (divider)) - -#define BIT(len) (1ULL << (len)) - -#define MASK(len) (BIT(len) - 1) -#define SIZEOF_BITS(b) (sizeof(b) * 8) - -#define QW_SIZE 0x8 -#define XMM_SIZE 0x10 -#define YMM_SIZE 0x20 -#define ZMM_SIZE 0x40 - -#define ALL_YMM_SIZE (16 * YMM_SIZE) -#define ALL_ZMM_SIZE (32 * ZMM_SIZE) - -// Copied from (Kaz answer) -// https://stackoverflow.com/questions/466204/rounding-up-to-next-power-of-2 -#define UPTOPOW2_0(v) ((v)-1) -#define UPTOPOW2_1(v) (UPTOPOW2_0(v) | (UPTOPOW2_0(v) >> 1)) -#define UPTOPOW2_2(v) (UPTOPOW2_1(v) | (UPTOPOW2_1(v) >> 2)) -#define UPTOPOW2_3(v) (UPTOPOW2_2(v) | (UPTOPOW2_2(v) >> 4)) -#define UPTOPOW2_4(v) (UPTOPOW2_3(v) | (UPTOPOW2_3(v) >> 8)) -#define UPTOPOW2_5(v) (UPTOPOW2_4(v) | (UPTOPOW2_4(v) >> 16)) - -#define UPTOPOW2(v) (UPTOPOW2_5(v) + 1) - -// Works only for 0 < v < 512 -#define LOG2_MSB(v) \ - ((v) == 0 \ - ? 0 \ - : ((v) < 2 \ - ? 1 \ - : ((v) < 4 \ - ? 2 \ - : ((v) < 8 \ - ? 3 \ - : ((v) < 16 \ - ? 4 \ - : ((v) < 32 \ - ? 5 \ - : ((v) < 64 ? 6 \ - : ((v) < 128 \ - ? 7 \ - : ((v) < 256 \ - ? 8 \ - : 9))))))))) - -//////////////////////////////////////////// -// Debug -/////////////////////////////////////////// - -#ifndef VERBOSE -# define VERBOSE 0 -#endif - -#include <stdio.h> - -#if(VERBOSE == 4) -# define MSG(...) \ - { \ - printf(__VA_ARGS__); \ - } -# define DMSG(...) MSG(__VA_ARGS__) -# define EDMSG(...) MSG(__VA_ARGS__) -# define SEDMSG(...) MSG(__VA_ARGS__) -#elif(VERBOSE == 3) -# define MSG(...) \ - { \ - printf(__VA_ARGS__); \ - } -# define DMSG(...) MSG(__VA_ARGS__) -# define EDMSG(...) MSG(__VA_ARGS__) -# define SEDMSG(...) -#elif(VERBOSE == 2) -# define MSG(...) \ - { \ - printf(__VA_ARGS__); \ - } -# define DMSG(...) MSG(__VA_ARGS__) -# define EDMSG(...) -# define SEDMSG(...) -#elif(VERBOSE == 1) -# define MSG(...) \ - { \ - printf(__VA_ARGS__); \ - } -# define DMSG(...) -# define EDMSG(...) -# define SEDMSG(...) -#else -# define MSG(...) -# define DMSG(...) -# define EDMSG(...) -# define SEDMSG(...) -#endif - -//////////////////////////////////////////// -// Printing -/////////////////////////////////////////// -//#define PRINT_IN_BE -//#define NO_SPACE -//#define NO_NEWLINE +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +//////////////////////////////////////////// +// Basic defs +/////////////////////////////////////////// +#define FUNC_PREFIX BIKE1_L1_R1 +#include "functions_renaming.h" + +#ifdef __cplusplus +# define EXTERNC extern "C" +#else +# define EXTERNC +#endif + +// For code clarity. +#define IN +#define OUT + +#define ALIGN(n) __attribute__((aligned(n))) +#define BIKE_UNUSED(x) (void)(x) +#define BIKE_UNUSED_ATT __attribute__((unused)) + +#define _INLINE_ static inline + +// In asm the symbols '==' and '?' are not allowed therefore if using +// divide_and_ceil in asm files we must ensure with static_assert its validity +#if(__cplusplus >= 201103L) || defined(static_assert) +# define bike_static_assert(COND, MSG) static_assert(COND, "MSG") +#else +# define bike_static_assert(COND, MSG) \ + typedef char static_assertion_##MSG[(COND) ? 1 : -1] BIKE_UNUSED_ATT +#endif + +// Divide by the divider and round up to next integer +#define DIVIDE_AND_CEIL(x, divider) (((x) + (divider)) / (divider)) + +#define BIT(len) (1ULL << (len)) + +#define MASK(len) (BIT(len) - 1) +#define SIZEOF_BITS(b) (sizeof(b) * 8) + +#define QW_SIZE 0x8 +#define XMM_SIZE 0x10 +#define YMM_SIZE 0x20 +#define ZMM_SIZE 0x40 + +#define ALL_YMM_SIZE (16 * YMM_SIZE) +#define ALL_ZMM_SIZE (32 * ZMM_SIZE) + +// Copied from (Kaz answer) +// https://stackoverflow.com/questions/466204/rounding-up-to-next-power-of-2 +#define UPTOPOW2_0(v) ((v)-1) +#define UPTOPOW2_1(v) (UPTOPOW2_0(v) | (UPTOPOW2_0(v) >> 1)) +#define UPTOPOW2_2(v) (UPTOPOW2_1(v) | (UPTOPOW2_1(v) >> 2)) +#define UPTOPOW2_3(v) (UPTOPOW2_2(v) | (UPTOPOW2_2(v) >> 4)) +#define UPTOPOW2_4(v) (UPTOPOW2_3(v) | (UPTOPOW2_3(v) >> 8)) +#define UPTOPOW2_5(v) (UPTOPOW2_4(v) | (UPTOPOW2_4(v) >> 16)) + +#define UPTOPOW2(v) (UPTOPOW2_5(v) + 1) + +// Works only for 0 < v < 512 +#define LOG2_MSB(v) \ + ((v) == 0 \ + ? 0 \ + : ((v) < 2 \ + ? 1 \ + : ((v) < 4 \ + ? 2 \ + : ((v) < 8 \ + ? 3 \ + : ((v) < 16 \ + ? 4 \ + : ((v) < 32 \ + ? 5 \ + : ((v) < 64 ? 6 \ + : ((v) < 128 \ + ? 7 \ + : ((v) < 256 \ + ? 8 \ + : 9))))))))) + +//////////////////////////////////////////// +// Debug +/////////////////////////////////////////// + +#ifndef VERBOSE +# define VERBOSE 0 +#endif + +#include <stdio.h> + +#if(VERBOSE == 4) +# define MSG(...) \ + { \ + printf(__VA_ARGS__); \ + } +# define DMSG(...) MSG(__VA_ARGS__) +# define EDMSG(...) MSG(__VA_ARGS__) +# define SEDMSG(...) MSG(__VA_ARGS__) +#elif(VERBOSE == 3) +# define MSG(...) \ + { \ + printf(__VA_ARGS__); \ + } +# define DMSG(...) MSG(__VA_ARGS__) +# define EDMSG(...) MSG(__VA_ARGS__) +# define SEDMSG(...) +#elif(VERBOSE == 2) +# define MSG(...) \ + { \ + printf(__VA_ARGS__); \ + } +# define DMSG(...) MSG(__VA_ARGS__) +# define EDMSG(...) +# define SEDMSG(...) +#elif(VERBOSE == 1) +# define MSG(...) \ + { \ + printf(__VA_ARGS__); \ + } +# define DMSG(...) +# define EDMSG(...) +# define SEDMSG(...) +#else +# define MSG(...) +# define DMSG(...) +# define EDMSG(...) +# define SEDMSG(...) +#endif + +//////////////////////////////////////////// +// Printing +/////////////////////////////////////////// +//#define PRINT_IN_BE +//#define NO_SPACE +//#define NO_NEWLINE diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/error.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/error.c index b048fc06a2..0d8e5b25ce 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/error.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/error.c @@ -1,11 +1,11 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#include "error.h" - -__thread _bike_err_t bike_errno; +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#include "error.h" + +__thread _bike_err_t bike_errno; diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/error.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/error.h index eac4e2daee..19d0bb1d9b 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/error.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/error.h @@ -1,36 +1,36 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -#include "defs.h" - -#define SUCCESS 0 -#define FAIL (-1) - -#define ret_t int - -enum _bike_err -{ - E_ERROR_WEIGHT_IS_NOT_T = 1, - E_DECODING_FAILURE = 2, - E_AES_CTR_PRF_INIT_FAIL = 3, - E_AES_OVER_USED = 4, - EXTERNAL_LIB_ERROR_OPENSSL = 5, - E_FAIL_TO_GET_SEED = 6 -}; - -typedef enum _bike_err _bike_err_t; - -extern __thread _bike_err_t bike_errno; -#define BIKE_ERROR(x) \ - do \ - { \ - bike_errno = (x); \ - return FAIL; \ - } while(0) +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +#include "defs.h" + +#define SUCCESS 0 +#define FAIL (-1) + +#define ret_t int + +enum _bike_err +{ + E_ERROR_WEIGHT_IS_NOT_T = 1, + E_DECODING_FAILURE = 2, + E_AES_CTR_PRF_INIT_FAIL = 3, + E_AES_OVER_USED = 4, + EXTERNAL_LIB_ERROR_OPENSSL = 5, + E_FAIL_TO_GET_SEED = 6 +}; + +typedef enum _bike_err _bike_err_t; + +extern __thread _bike_err_t bike_errno; +#define BIKE_ERROR(x) \ + do \ + { \ + bike_errno = (x); \ + return FAIL; \ + } while(0) diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/functions_renaming.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/functions_renaming.h index f11aa90e14..09c8385803 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/functions_renaming.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/functions_renaming.h @@ -1,60 +1,60 @@ -/* - * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - * The license is detailed in the file LICENSE.md, and applies to this file. - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#ifndef __FUNCTIONS_RENAMING_H_INCLUDED__ -#define __FUNCTIONS_RENAMING_H_INCLUDED__ - -#define PASTER(x, y) x##_##y -#define EVALUATOR(x, y) PASTER(x, y) -#define RENAME_FUNC_NAME(fname) EVALUATOR(FUNC_PREFIX, fname) - -#define keypair RENAME_FUNC_NAME(keypair) -#define decaps RENAME_FUNC_NAME(decaps) -#define encaps RENAME_FUNC_NAME(encaps) - -#define aes_ctr_prf RENAME_FUNC_NAME(aes_ctr_prf) -#define sample_uniform_r_bits_with_fixed_prf_context \ - RENAME_FUNC_NAME(sample_uniform_r_bits_with_fixed_prf_context) -#define init_aes_ctr_prf_state RENAME_FUNC_NAME(init_aes_ctr_prf_state) -#define generate_sparse_rep RENAME_FUNC_NAME(generate_sparse_rep) -#define parallel_hash RENAME_FUNC_NAME(parallel_hash) -#define decode RENAME_FUNC_NAME(decode) -#define print_BE RENAME_FUNC_NAME(print_BE) -#define print_LE RENAME_FUNC_NAME(print_LE) -#define gf2x_mod_mul RENAME_FUNC_NAME(gf2x_mod_mul) -#define secure_set_bits RENAME_FUNC_NAME(secure_set_bits) -#define sha RENAME_FUNC_NAME(sha) -#define count_ones RENAME_FUNC_NAME(count_ones) -#define sha_mb RENAME_FUNC_NAME(sha_mb) -#define split_e RENAME_FUNC_NAME(split_e) -#define compute_syndrome RENAME_FUNC_NAME(compute_syndrome) -#define bike_errno RENAME_FUNC_NAME(bike_errno) -#define cyclic_product RENAME_FUNC_NAME(cyclic_product) -#define ossl_add RENAME_FUNC_NAME(ossl_add) -#define karatzuba_add1 RENAME_FUNC_NAME(karatzuba_add1) -#define karatzuba_add2 RENAME_FUNC_NAME(karatzuba_add2) -#define gf2x_add RENAME_FUNC_NAME(gf2x_add) -#define gf2_muladd_4x4 RENAME_FUNC_NAME(gf2_muladd_4x4) -#define red RENAME_FUNC_NAME(red) -#define gf2x_mul_1x1 RENAME_FUNC_NAME(gf2x_mul_1x1) -#define rotate_right RENAME_FUNC_NAME(rotate_right) -#define r_bits_vector_weight RENAME_FUNC_NAME(r_bits_vector_weight) - -#endif //__FUNCTIONS_RENAMING_H_INCLUDED__ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + * The license is detailed in the file LICENSE.md, and applies to this file. + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#ifndef __FUNCTIONS_RENAMING_H_INCLUDED__ +#define __FUNCTIONS_RENAMING_H_INCLUDED__ + +#define PASTER(x, y) x##_##y +#define EVALUATOR(x, y) PASTER(x, y) +#define RENAME_FUNC_NAME(fname) EVALUATOR(FUNC_PREFIX, fname) + +#define keypair RENAME_FUNC_NAME(keypair) +#define decaps RENAME_FUNC_NAME(decaps) +#define encaps RENAME_FUNC_NAME(encaps) + +#define aes_ctr_prf RENAME_FUNC_NAME(aes_ctr_prf) +#define sample_uniform_r_bits_with_fixed_prf_context \ + RENAME_FUNC_NAME(sample_uniform_r_bits_with_fixed_prf_context) +#define init_aes_ctr_prf_state RENAME_FUNC_NAME(init_aes_ctr_prf_state) +#define generate_sparse_rep RENAME_FUNC_NAME(generate_sparse_rep) +#define parallel_hash RENAME_FUNC_NAME(parallel_hash) +#define decode RENAME_FUNC_NAME(decode) +#define print_BE RENAME_FUNC_NAME(print_BE) +#define print_LE RENAME_FUNC_NAME(print_LE) +#define gf2x_mod_mul RENAME_FUNC_NAME(gf2x_mod_mul) +#define secure_set_bits RENAME_FUNC_NAME(secure_set_bits) +#define sha RENAME_FUNC_NAME(sha) +#define count_ones RENAME_FUNC_NAME(count_ones) +#define sha_mb RENAME_FUNC_NAME(sha_mb) +#define split_e RENAME_FUNC_NAME(split_e) +#define compute_syndrome RENAME_FUNC_NAME(compute_syndrome) +#define bike_errno RENAME_FUNC_NAME(bike_errno) +#define cyclic_product RENAME_FUNC_NAME(cyclic_product) +#define ossl_add RENAME_FUNC_NAME(ossl_add) +#define karatzuba_add1 RENAME_FUNC_NAME(karatzuba_add1) +#define karatzuba_add2 RENAME_FUNC_NAME(karatzuba_add2) +#define gf2x_add RENAME_FUNC_NAME(gf2x_add) +#define gf2_muladd_4x4 RENAME_FUNC_NAME(gf2_muladd_4x4) +#define red RENAME_FUNC_NAME(red) +#define gf2x_mul_1x1 RENAME_FUNC_NAME(gf2x_mul_1x1) +#define rotate_right RENAME_FUNC_NAME(rotate_right) +#define r_bits_vector_weight RENAME_FUNC_NAME(r_bits_vector_weight) + +#endif //__FUNCTIONS_RENAMING_H_INCLUDED__ diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/gf2x.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/gf2x.h index 2de0050ff6..7fb1695058 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/gf2x.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/gf2x.h @@ -1,55 +1,55 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron, - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -#include "types.h" - -#ifdef USE_OPENSSL -# include "openssl_utils.h" -#endif - -#ifdef USE_OPENSSL_GF2M -// res = a*b mod (x^r - 1) -// Note: the caller must allocate twice the size of res. -_INLINE_ ret_t -gf2x_mod_mul(OUT uint64_t *res, IN const uint64_t *a, IN const uint64_t *b) -{ - return cyclic_product((uint8_t *)res, (const uint8_t *)a, (const uint8_t *)b); -} - -// A wrapper for other gf2x_add implementations. -_INLINE_ ret_t -gf2x_add(OUT uint8_t *res, - IN const uint8_t *a, - IN const uint8_t *b, - IN const uint64_t size) -{ - BIKE_UNUSED(size); - return ossl_add((uint8_t *)res, a, b); -} -#else // USE_OPENSSL_GF2M - -_INLINE_ ret_t -gf2x_add(OUT uint8_t *res, - IN const uint8_t *a, - IN const uint8_t *b, - IN const uint64_t bytelen) -{ - for(uint64_t i = 0; i < bytelen; i++) - { - res[i] = a[i] ^ b[i]; - } - return SUCCESS; -} - -// res = a*b mod (x^r - 1) -// the caller must allocate twice the size of res! -ret_t -gf2x_mod_mul(OUT uint64_t *res, IN const uint64_t *a, IN const uint64_t *b); -#endif +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron, + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +#include "types.h" + +#ifdef USE_OPENSSL +# include "openssl_utils.h" +#endif + +#ifdef USE_OPENSSL_GF2M +// res = a*b mod (x^r - 1) +// Note: the caller must allocate twice the size of res. +_INLINE_ ret_t +gf2x_mod_mul(OUT uint64_t *res, IN const uint64_t *a, IN const uint64_t *b) +{ + return cyclic_product((uint8_t *)res, (const uint8_t *)a, (const uint8_t *)b); +} + +// A wrapper for other gf2x_add implementations. +_INLINE_ ret_t +gf2x_add(OUT uint8_t *res, + IN const uint8_t *a, + IN const uint8_t *b, + IN const uint64_t size) +{ + BIKE_UNUSED(size); + return ossl_add((uint8_t *)res, a, b); +} +#else // USE_OPENSSL_GF2M + +_INLINE_ ret_t +gf2x_add(OUT uint8_t *res, + IN const uint8_t *a, + IN const uint8_t *b, + IN const uint64_t bytelen) +{ + for(uint64_t i = 0; i < bytelen; i++) + { + res[i] = a[i] ^ b[i]; + } + return SUCCESS; +} + +// res = a*b mod (x^r - 1) +// the caller must allocate twice the size of res! +ret_t +gf2x_mod_mul(OUT uint64_t *res, IN const uint64_t *a, IN const uint64_t *b); +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/gf2x_internal.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/gf2x_internal.h index 74fc5b9932..779e7f9727 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/gf2x_internal.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/gf2x_internal.h @@ -1,32 +1,32 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron, - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -#include "types.h" - -EXTERNC void -karatzuba_add1(OUT uint64_t *res, - IN const uint64_t *a, - IN const uint64_t *b, - IN uint64_t n_half, - IN uint64_t *alah); - -EXTERNC void -karatzuba_add2(OUT uint64_t *res1, - OUT uint64_t *res2, - IN const uint64_t *res, - IN const uint64_t *tmp, - IN uint64_t n_half); - -EXTERNC void -red(uint64_t *res); - -void - -gf2x_mul_1x1(OUT uint64_t *res, IN uint64_t a, IN uint64_t b); +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron, + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +#include "types.h" + +EXTERNC void +karatzuba_add1(OUT uint64_t *res, + IN const uint64_t *a, + IN const uint64_t *b, + IN uint64_t n_half, + IN uint64_t *alah); + +EXTERNC void +karatzuba_add2(OUT uint64_t *res1, + OUT uint64_t *res2, + IN const uint64_t *res, + IN const uint64_t *tmp, + IN uint64_t n_half); + +EXTERNC void +red(uint64_t *res); + +void + +gf2x_mul_1x1(OUT uint64_t *res, IN uint64_t a, IN uint64_t b); diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/gf2x_mul.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/gf2x_mul.c index 84a79589db..81e55a3366 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/gf2x_mul.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/gf2x_mul.c @@ -1,97 +1,97 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron, - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#include "cleanup.h" -#include "gf2x.h" -#include "gf2x_internal.h" -#include <stdlib.h> -#include <string.h> - -#ifndef USE_OPENSSL_GF2M - -// All the temporary data (which might hold secrets) -// is stored on a secure buffer, so that it can be easily cleaned up later. -// The secure buffer required is: 3n/2 (alah|blbh|tmp) in a recursive way. -// 3n/2 + 3n/4 + 3n/8 = 3(n/2 + n/4 + n/8) < 3n -# define SECURE_BUFFER_SIZE (3 * R_PADDED_SIZE) - -// Calculate number of uint64_t values needed to store SECURE_BUFFER_SIZE bytes. Rounding up to the next whole integer. -# define SECURE_BUFFER_SIZE_64_BIT ((SECURE_BUFFER_SIZE / sizeof(uint64_t)) + ((SECURE_BUFFER_SIZE % sizeof(uint64_t)) != 0)) - -// This functions assumes that n is even. -_INLINE_ void -karatzuba(OUT uint64_t *res, - IN const uint64_t *a, - IN const uint64_t *b, - IN const uint64_t n, - uint64_t * secure_buf) -{ - if(1 == n) - { - gf2x_mul_1x1(res, a[0], b[0]); - return; - } - - const uint64_t half_n = n >> 1; - - // Define pointers for the middle of each parameter - // sepearting a=a_low and a_high (same for ba nd res) - const uint64_t *a_high = a + half_n; - const uint64_t *b_high = b + half_n; - - // Divide res into 4 parts res3|res2|res1|res in size n/2 - uint64_t *res1 = res + half_n; - uint64_t *res2 = res1 + half_n; - - // All three parameters below are allocated on the secure buffer - // All of them are in size half n - uint64_t *alah = secure_buf; - uint64_t *blbh = alah + half_n; - uint64_t *tmp = blbh + half_n; - - // Place the secure buffer ptr in the first free location, - // so the recursive function can use it. - secure_buf = tmp + half_n; - - // Calculate Z0 and store the result in res(low) - karatzuba(res, a, b, half_n, secure_buf); - - // Calculate Z2 and store the result in res(high) - karatzuba(res2, a_high, b_high, half_n, secure_buf); - - // Accomulate the results. - karatzuba_add1(res, a, b, half_n, alah); - - // (a_low + a_high)(b_low + b_high) --> res1 - karatzuba(res1, alah, blbh, half_n, secure_buf); - - karatzuba_add2(res1, res2, res, tmp, half_n); -} - -ret_t -gf2x_mod_mul(OUT uint64_t *res, IN const uint64_t *a, IN const uint64_t *b) -{ - bike_static_assert((R_PADDED_QW % 2 == 0), karatzuba_n_is_odd); - - ALIGN(sizeof(uint64_t)) uint64_t secure_buffer[SECURE_BUFFER_SIZE_64_BIT]; - - /* make sure we have the correct size allocation. */ - bike_static_assert(sizeof(secure_buffer) % sizeof(uint64_t) == 0, - secure_buffer_not_eligable_for_uint64_t); - - karatzuba(res, a, b, R_PADDED_QW, (uint64_t *)secure_buffer); - - // This function implicitly assumes that the size of res is 2*R_PADDED_QW. - red(res); - - secure_clean((uint8_t*)secure_buffer, sizeof(secure_buffer)); - - return SUCCESS; -} - -#endif // USE_OPENSSL_GF2M +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron, + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#include "cleanup.h" +#include "gf2x.h" +#include "gf2x_internal.h" +#include <stdlib.h> +#include <string.h> + +#ifndef USE_OPENSSL_GF2M + +// All the temporary data (which might hold secrets) +// is stored on a secure buffer, so that it can be easily cleaned up later. +// The secure buffer required is: 3n/2 (alah|blbh|tmp) in a recursive way. +// 3n/2 + 3n/4 + 3n/8 = 3(n/2 + n/4 + n/8) < 3n +# define SECURE_BUFFER_SIZE (3 * R_PADDED_SIZE) + +// Calculate number of uint64_t values needed to store SECURE_BUFFER_SIZE bytes. Rounding up to the next whole integer. +# define SECURE_BUFFER_SIZE_64_BIT ((SECURE_BUFFER_SIZE / sizeof(uint64_t)) + ((SECURE_BUFFER_SIZE % sizeof(uint64_t)) != 0)) + +// This functions assumes that n is even. +_INLINE_ void +karatzuba(OUT uint64_t *res, + IN const uint64_t *a, + IN const uint64_t *b, + IN const uint64_t n, + uint64_t * secure_buf) +{ + if(1 == n) + { + gf2x_mul_1x1(res, a[0], b[0]); + return; + } + + const uint64_t half_n = n >> 1; + + // Define pointers for the middle of each parameter + // sepearting a=a_low and a_high (same for ba nd res) + const uint64_t *a_high = a + half_n; + const uint64_t *b_high = b + half_n; + + // Divide res into 4 parts res3|res2|res1|res in size n/2 + uint64_t *res1 = res + half_n; + uint64_t *res2 = res1 + half_n; + + // All three parameters below are allocated on the secure buffer + // All of them are in size half n + uint64_t *alah = secure_buf; + uint64_t *blbh = alah + half_n; + uint64_t *tmp = blbh + half_n; + + // Place the secure buffer ptr in the first free location, + // so the recursive function can use it. + secure_buf = tmp + half_n; + + // Calculate Z0 and store the result in res(low) + karatzuba(res, a, b, half_n, secure_buf); + + // Calculate Z2 and store the result in res(high) + karatzuba(res2, a_high, b_high, half_n, secure_buf); + + // Accomulate the results. + karatzuba_add1(res, a, b, half_n, alah); + + // (a_low + a_high)(b_low + b_high) --> res1 + karatzuba(res1, alah, blbh, half_n, secure_buf); + + karatzuba_add2(res1, res2, res, tmp, half_n); +} + +ret_t +gf2x_mod_mul(OUT uint64_t *res, IN const uint64_t *a, IN const uint64_t *b) +{ + bike_static_assert((R_PADDED_QW % 2 == 0), karatzuba_n_is_odd); + + ALIGN(sizeof(uint64_t)) uint64_t secure_buffer[SECURE_BUFFER_SIZE_64_BIT]; + + /* make sure we have the correct size allocation. */ + bike_static_assert(sizeof(secure_buffer) % sizeof(uint64_t) == 0, + secure_buffer_not_eligable_for_uint64_t); + + karatzuba(res, a, b, R_PADDED_QW, (uint64_t *)secure_buffer); + + // This function implicitly assumes that the size of res is 2*R_PADDED_QW. + red(res); + + secure_clean((uint8_t*)secure_buffer, sizeof(secure_buffer)); + + return SUCCESS; +} + +#endif // USE_OPENSSL_GF2M diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/gf2x_portable.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/gf2x_portable.c index 1816da6e77..f59361f192 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/gf2x_portable.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/gf2x_portable.c @@ -1,108 +1,108 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron, - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#include "gf2x.h" -#include "utilities.h" - -#if !defined(USE_OPENSSL_GF2M) - -// The algorithm is based on the windowing method, for example as in: -// Brent, R. P., Gaudry, P., Thomé, E., & Zimmermann, P. (2008, May), "Faster -// multiplication in GF (2)[x]". In: International Algorithmic Number Theory -// Symposium (pp. 153-166). Springer, Berlin, Heidelberg. In this implementation, -// the last three bits are multiplied using a schoolbook multiplicaiton. -void -gf2x_mul_1x1(uint64_t *c, uint64_t a, uint64_t b) -{ - uint64_t h = 0, l = 0, u[8]; - const uint64_t w = 64; - const uint64_t s = 3; - // Multiplying 64 bits by 7 can results in an overflow of 3 bits. - // Therefore, these bits are masked out, and are treated in step 3. - const uint64_t b0 = b & 0x1fffffffffffffff; - - // Step 1: Calculate a multiplication table with 8 entries. - u[0] = 0; - u[1] = b0; - u[2] = u[1] << 1; - u[3] = u[2] ^ b0; - u[4] = u[2] << 1; - u[5] = u[4] ^ b0; - u[6] = u[3] << 1; - u[7] = u[6] ^ b0; - - // Step 2: Multiply two elements in parallel in poisitions i,i+s - l = u[a & 7] ^ (u[(a >> 3) & 7] << 3); - h = (u[(a >> 3) & 7] >> 61); - for(uint32_t i = (2 * s); i < w; i += (2 * s)) - { - uint64_t g1 = u[(a >> i) & 7]; - uint64_t g2 = u[(a >> (i + s)) & 7]; - - l ^= (g1 << i) ^ (g2 << (i + s)); - h ^= (g1 >> (w - i)) ^ (g2 >> (w - (i + s))); - } - - // Step 3: Multiply the last three bits. - for(uint8_t i = 61; i < 64; i++) - { - uint64_t mask = (-((b >> i) & 1)); - l ^= ((a << i) & mask); - h ^= ((a >> (w - i)) & mask); - } - - c[0] = l; - c[1] = h; -} - -void -karatzuba_add1(OUT const uint64_t *res, - IN const uint64_t *a, - IN const uint64_t *b, - IN const uint64_t n_half, - IN uint64_t *alah) -{ - for(uint32_t j = 0; j < n_half; j++) - { - alah[j + 0 * n_half] = a[j] ^ a[n_half + j]; - alah[j + 1 * n_half] = b[j] ^ b[n_half + j]; - alah[j + 2 * n_half] = res[n_half + j] ^ res[2 * n_half + j]; - } -} - -void -karatzuba_add2(OUT uint64_t *res1, - OUT uint64_t *res2, - IN const uint64_t *res, - IN const uint64_t *tmp, - IN const uint64_t n_half) -{ - for(uint32_t j = 0; j < n_half; j++) - { - res1[j] ^= res[j] ^ tmp[j]; - res2[j] ^= res2[n_half + j] ^ tmp[j]; - } -} - -void -red(uint64_t *a) -{ - for(uint32_t i = 0; i < R_QW; i++) - { - const uint64_t temp0 = a[R_QW + i - 1]; - const uint64_t temp1 = a[R_QW + i]; - a[i] ^= (temp0 >> LAST_R_QW_LEAD) | (temp1 << LAST_R_QW_TRAIL); - } - - a[R_QW - 1] &= LAST_R_QW_MASK; - - // Clean the secrets from the upper half of a. - secure_clean((uint8_t *)&a[R_QW], sizeof(uint64_t) * R_QW); -} - -#endif +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron, + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#include "gf2x.h" +#include "utilities.h" + +#if !defined(USE_OPENSSL_GF2M) + +// The algorithm is based on the windowing method, for example as in: +// Brent, R. P., Gaudry, P., Thomé, E., & Zimmermann, P. (2008, May), "Faster +// multiplication in GF (2)[x]". In: International Algorithmic Number Theory +// Symposium (pp. 153-166). Springer, Berlin, Heidelberg. In this implementation, +// the last three bits are multiplied using a schoolbook multiplicaiton. +void +gf2x_mul_1x1(uint64_t *c, uint64_t a, uint64_t b) +{ + uint64_t h = 0, l = 0, u[8]; + const uint64_t w = 64; + const uint64_t s = 3; + // Multiplying 64 bits by 7 can results in an overflow of 3 bits. + // Therefore, these bits are masked out, and are treated in step 3. + const uint64_t b0 = b & 0x1fffffffffffffff; + + // Step 1: Calculate a multiplication table with 8 entries. + u[0] = 0; + u[1] = b0; + u[2] = u[1] << 1; + u[3] = u[2] ^ b0; + u[4] = u[2] << 1; + u[5] = u[4] ^ b0; + u[6] = u[3] << 1; + u[7] = u[6] ^ b0; + + // Step 2: Multiply two elements in parallel in poisitions i,i+s + l = u[a & 7] ^ (u[(a >> 3) & 7] << 3); + h = (u[(a >> 3) & 7] >> 61); + for(uint32_t i = (2 * s); i < w; i += (2 * s)) + { + uint64_t g1 = u[(a >> i) & 7]; + uint64_t g2 = u[(a >> (i + s)) & 7]; + + l ^= (g1 << i) ^ (g2 << (i + s)); + h ^= (g1 >> (w - i)) ^ (g2 >> (w - (i + s))); + } + + // Step 3: Multiply the last three bits. + for(uint8_t i = 61; i < 64; i++) + { + uint64_t mask = (-((b >> i) & 1)); + l ^= ((a << i) & mask); + h ^= ((a >> (w - i)) & mask); + } + + c[0] = l; + c[1] = h; +} + +void +karatzuba_add1(OUT const uint64_t *res, + IN const uint64_t *a, + IN const uint64_t *b, + IN const uint64_t n_half, + IN uint64_t *alah) +{ + for(uint32_t j = 0; j < n_half; j++) + { + alah[j + 0 * n_half] = a[j] ^ a[n_half + j]; + alah[j + 1 * n_half] = b[j] ^ b[n_half + j]; + alah[j + 2 * n_half] = res[n_half + j] ^ res[2 * n_half + j]; + } +} + +void +karatzuba_add2(OUT uint64_t *res1, + OUT uint64_t *res2, + IN const uint64_t *res, + IN const uint64_t *tmp, + IN const uint64_t n_half) +{ + for(uint32_t j = 0; j < n_half; j++) + { + res1[j] ^= res[j] ^ tmp[j]; + res2[j] ^= res2[n_half + j] ^ tmp[j]; + } +} + +void +red(uint64_t *a) +{ + for(uint32_t i = 0; i < R_QW; i++) + { + const uint64_t temp0 = a[R_QW + i - 1]; + const uint64_t temp1 = a[R_QW + i]; + a[i] ^= (temp0 >> LAST_R_QW_LEAD) | (temp1 << LAST_R_QW_TRAIL); + } + + a[R_QW - 1] &= LAST_R_QW_MASK; + + // Clean the secrets from the upper half of a. + secure_clean((uint8_t *)&a[R_QW], sizeof(uint64_t) * R_QW); +} + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/openssl_utils.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/openssl_utils.c index 09e0af3fde..a2a97c4651 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/openssl_utils.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/openssl_utils.c @@ -1,187 +1,187 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron, - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#include "openssl_utils.h" -#include "utilities.h" -#include <assert.h> -#include <openssl/bn.h> -#include <string.h> - -#ifdef USE_OPENSSL_GF2M - -# define MAX_OPENSSL_INV_TRIALS 1000 - -_INLINE_ void -BN_CTX_cleanup(BN_CTX *ctx) -{ - if(ctx) - { - BN_CTX_end(ctx); - BN_CTX_free(ctx); - } -} - -DEFINE_POINTER_CLEANUP_FUNC(BN_CTX *, BN_CTX_cleanup); - -// Loading (big) numbers into OpenSSL should use Big Endian representation. -// Therefore, the bytes ordering of the number should be reversed. -_INLINE_ void -reverse_endian(OUT uint8_t *res, IN const uint8_t *in, IN const uint32_t n) -{ - uint32_t i; - - for(i = 0; i < (n / 2); i++) - { - uint64_t tmp = in[i]; - res[i] = in[n - 1 - i]; - res[n - 1 - i] = tmp; - } - - // If the number of blocks is odd, swap also the middle block. - if(n % 2) - { - res[i] = in[i]; - } -} - -_INLINE_ ret_t -ossl_bn2bin(OUT uint8_t *out, IN const BIGNUM *in, IN const uint32_t size) -{ - assert(size <= N_SIZE); - uint8_t be_tmp[N_SIZE] = {0}; - - memset(out, 0, size); - - if(BN_bn2bin(in, be_tmp) == -1) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - reverse_endian(out, be_tmp, BN_num_bytes(in)); - - return SUCCESS; -} - -_INLINE_ ret_t -ossl_bin2bn(IN BIGNUM *out, OUT const uint8_t *in, IN const uint32_t size) -{ - assert(size <= N_SIZE); - uint8_t be_tmp[N_SIZE] = {0}; - - reverse_endian(be_tmp, in, size); - - if(BN_bin2bn(be_tmp, size, out) == 0) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - - return SUCCESS; -} - -ret_t -ossl_add(OUT uint8_t res_bin[R_SIZE], - IN const uint8_t a_bin[R_SIZE], - IN const uint8_t b_bin[R_SIZE]) -{ - DEFER_CLEANUP(BN_CTX *bn_ctx = BN_CTX_new(), BN_CTX_cleanup_pointer); - BIGNUM *r = NULL; - BIGNUM *a = NULL; - BIGNUM *b = NULL; - - if(NULL == bn_ctx) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - - BN_CTX_start(bn_ctx); - - r = BN_CTX_get(bn_ctx); - a = BN_CTX_get(bn_ctx); - b = BN_CTX_get(bn_ctx); - - if((NULL == r) || (NULL == a) || (NULL == b)) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - - GUARD(ossl_bin2bn(a, a_bin, R_SIZE)); - GUARD(ossl_bin2bn(b, b_bin, R_SIZE)); - - if(BN_GF2m_add(r, a, b) == 0) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - - GUARD(ossl_bn2bin(res_bin, r, R_SIZE)); - - return SUCCESS; -} - -// Perform a cyclic product by using OpenSSL. -_INLINE_ ret_t -ossl_cyclic_product(OUT BIGNUM *r, - IN const BIGNUM *a, - IN const BIGNUM *b, - BN_CTX * bn_ctx) -{ - BIGNUM *m = BN_CTX_get(bn_ctx); - if(NULL == m) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - - // m = x^PARAM_R - 1 - if((BN_set_bit(m, R_BITS) == 0) || (BN_set_bit(m, 0) == 0)) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - - // r = a*b mod m - if(BN_GF2m_mod_mul(r, a, b, m, bn_ctx) == 0) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - - return SUCCESS; -} - -// Perform a cyclic product by using OpenSSL. -ret_t -cyclic_product(OUT uint8_t res_bin[R_SIZE], - IN const uint8_t a_bin[R_SIZE], - IN const uint8_t b_bin[R_SIZE]) -{ - DEFER_CLEANUP(BN_CTX *bn_ctx = BN_CTX_new(), BN_CTX_cleanup_pointer); - BIGNUM *r = NULL; - BIGNUM *a = NULL; - BIGNUM *b = NULL; - - if(NULL == bn_ctx) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - - BN_CTX_start(bn_ctx); - - r = BN_CTX_get(bn_ctx); - a = BN_CTX_get(bn_ctx); - b = BN_CTX_get(bn_ctx); - - if((NULL == r) || (NULL == a) || (NULL == b)) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - - GUARD(ossl_bin2bn(a, a_bin, R_SIZE)); - GUARD(ossl_bin2bn(b, b_bin, R_SIZE)); - GUARD(ossl_cyclic_product(r, a, b, bn_ctx)); - GUARD(ossl_bn2bin(res_bin, r, R_SIZE)); - - return SUCCESS; -} - -#endif // USE_OPENSSL_GF2M +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron, + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#include "openssl_utils.h" +#include "utilities.h" +#include <assert.h> +#include <openssl/bn.h> +#include <string.h> + +#ifdef USE_OPENSSL_GF2M + +# define MAX_OPENSSL_INV_TRIALS 1000 + +_INLINE_ void +BN_CTX_cleanup(BN_CTX *ctx) +{ + if(ctx) + { + BN_CTX_end(ctx); + BN_CTX_free(ctx); + } +} + +DEFINE_POINTER_CLEANUP_FUNC(BN_CTX *, BN_CTX_cleanup); + +// Loading (big) numbers into OpenSSL should use Big Endian representation. +// Therefore, the bytes ordering of the number should be reversed. +_INLINE_ void +reverse_endian(OUT uint8_t *res, IN const uint8_t *in, IN const uint32_t n) +{ + uint32_t i; + + for(i = 0; i < (n / 2); i++) + { + uint64_t tmp = in[i]; + res[i] = in[n - 1 - i]; + res[n - 1 - i] = tmp; + } + + // If the number of blocks is odd, swap also the middle block. + if(n % 2) + { + res[i] = in[i]; + } +} + +_INLINE_ ret_t +ossl_bn2bin(OUT uint8_t *out, IN const BIGNUM *in, IN const uint32_t size) +{ + assert(size <= N_SIZE); + uint8_t be_tmp[N_SIZE] = {0}; + + memset(out, 0, size); + + if(BN_bn2bin(in, be_tmp) == -1) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + reverse_endian(out, be_tmp, BN_num_bytes(in)); + + return SUCCESS; +} + +_INLINE_ ret_t +ossl_bin2bn(IN BIGNUM *out, OUT const uint8_t *in, IN const uint32_t size) +{ + assert(size <= N_SIZE); + uint8_t be_tmp[N_SIZE] = {0}; + + reverse_endian(be_tmp, in, size); + + if(BN_bin2bn(be_tmp, size, out) == 0) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + + return SUCCESS; +} + +ret_t +ossl_add(OUT uint8_t res_bin[R_SIZE], + IN const uint8_t a_bin[R_SIZE], + IN const uint8_t b_bin[R_SIZE]) +{ + DEFER_CLEANUP(BN_CTX *bn_ctx = BN_CTX_new(), BN_CTX_cleanup_pointer); + BIGNUM *r = NULL; + BIGNUM *a = NULL; + BIGNUM *b = NULL; + + if(NULL == bn_ctx) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + + BN_CTX_start(bn_ctx); + + r = BN_CTX_get(bn_ctx); + a = BN_CTX_get(bn_ctx); + b = BN_CTX_get(bn_ctx); + + if((NULL == r) || (NULL == a) || (NULL == b)) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + + GUARD(ossl_bin2bn(a, a_bin, R_SIZE)); + GUARD(ossl_bin2bn(b, b_bin, R_SIZE)); + + if(BN_GF2m_add(r, a, b) == 0) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + + GUARD(ossl_bn2bin(res_bin, r, R_SIZE)); + + return SUCCESS; +} + +// Perform a cyclic product by using OpenSSL. +_INLINE_ ret_t +ossl_cyclic_product(OUT BIGNUM *r, + IN const BIGNUM *a, + IN const BIGNUM *b, + BN_CTX * bn_ctx) +{ + BIGNUM *m = BN_CTX_get(bn_ctx); + if(NULL == m) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + + // m = x^PARAM_R - 1 + if((BN_set_bit(m, R_BITS) == 0) || (BN_set_bit(m, 0) == 0)) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + + // r = a*b mod m + if(BN_GF2m_mod_mul(r, a, b, m, bn_ctx) == 0) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + + return SUCCESS; +} + +// Perform a cyclic product by using OpenSSL. +ret_t +cyclic_product(OUT uint8_t res_bin[R_SIZE], + IN const uint8_t a_bin[R_SIZE], + IN const uint8_t b_bin[R_SIZE]) +{ + DEFER_CLEANUP(BN_CTX *bn_ctx = BN_CTX_new(), BN_CTX_cleanup_pointer); + BIGNUM *r = NULL; + BIGNUM *a = NULL; + BIGNUM *b = NULL; + + if(NULL == bn_ctx) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + + BN_CTX_start(bn_ctx); + + r = BN_CTX_get(bn_ctx); + a = BN_CTX_get(bn_ctx); + b = BN_CTX_get(bn_ctx); + + if((NULL == r) || (NULL == a) || (NULL == b)) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + + GUARD(ossl_bin2bn(a, a_bin, R_SIZE)); + GUARD(ossl_bin2bn(b, b_bin, R_SIZE)); + GUARD(ossl_cyclic_product(r, a, b, bn_ctx)); + GUARD(ossl_bn2bin(res_bin, r, R_SIZE)); + + return SUCCESS; +} + +#endif // USE_OPENSSL_GF2M diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/openssl_utils.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/openssl_utils.h index 59438b6d70..4f1c55bd94 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/openssl_utils.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/openssl_utils.h @@ -1,33 +1,33 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron, - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -#include "types.h" - -#ifdef USE_OPENSSL -# include <openssl/bn.h> -# ifndef OPENSSL_NO_EC2M -# define USE_OPENSSL_GF2M 1 -# endif -#endif - -#ifdef USE_OPENSSL_GF2M - -ret_t -ossl_add(OUT uint8_t res_bin[R_SIZE], - IN const uint8_t a_bin[R_SIZE], - IN const uint8_t b_bin[R_SIZE]); - -// Perform cyclic product by using OpenSSL -ret_t -cyclic_product(OUT uint8_t res_bin[R_SIZE], - IN const uint8_t a_bin[R_SIZE], - IN const uint8_t b_bin[R_SIZE]); - -#endif +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron, + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +#include "types.h" + +#ifdef USE_OPENSSL +# include <openssl/bn.h> +# ifndef OPENSSL_NO_EC2M +# define USE_OPENSSL_GF2M 1 +# endif +#endif + +#ifdef USE_OPENSSL_GF2M + +ret_t +ossl_add(OUT uint8_t res_bin[R_SIZE], + IN const uint8_t a_bin[R_SIZE], + IN const uint8_t b_bin[R_SIZE]); + +// Perform cyclic product by using OpenSSL +ret_t +cyclic_product(OUT uint8_t res_bin[R_SIZE], + IN const uint8_t a_bin[R_SIZE], + IN const uint8_t b_bin[R_SIZE]); + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/parallel_hash.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/parallel_hash.c index bde3752e0b..7099f3e415 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/parallel_hash.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/parallel_hash.c @@ -1,111 +1,111 @@ -/* - * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - * The license is detailed in the file LICENSE.md, and applies to this file. - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#include "parallel_hash.h" -#include "utilities.h" -#include <assert.h> -#include <string.h> - -#define MAX_REM_LEN (MAX_MB_SLICES * HASH_BLOCK_SIZE) - -// We must ensure that the compiler does not add padding between x and y, because -// the entire structore goes into the hash function. -#pragma pack(push, 1) - -// The struct below is a concatination of eight slices and Y. -typedef struct yx_s -{ - sha_hash_t x[MAX_MB_SLICES]; - // We define MAX_REM_LEN rather than lrem, in order to be consistent with the - // standard of C. - uint8_t y[MAX_REM_LEN]; -} yx_t; - -#pragma pack(pop) - -_INLINE_ uint64_t -compute_slice_len(IN const uint64_t la) -{ - assert((la / MAX_MB_SLICES) >= SLICE_REM); - - // alpha is the number of full blocks - const uint64_t alpha = (((la / MAX_MB_SLICES) - SLICE_REM) / HASH_BLOCK_SIZE); - return ((alpha * HASH_BLOCK_SIZE) + SLICE_REM); -} - -// This function assumes that m is of N_BITS length. -void -parallel_hash(OUT sha_hash_t *out_hash, IN const uint8_t *m, IN const uint32_t la) -{ - DMSG(" Enter parallel_hash.\n"); - - // Calculating how many bytes will go to "parallel" hashing - // and how many will remind as a tail for later on - const uint32_t ls = compute_slice_len(la); - const uint32_t lrem = (uint32_t)(la - (ls * MAX_MB_SLICES)); - yx_t yx = {0}; - -#ifdef WIN32 - DMSG(" Len=%u splits into %I64u logical streams (A1..A8) of length %u " - "bytes. ", - la, MAX_MB_SLICES, ls); - DMSG("Append the logically remaining buffer (Y) of %u - %I64u*%u = %u " - "bytes\n\n", - la, MAX_MB_SLICES, ls, lrem); -#else - DMSG(" Len=%u splits into %llu logical streams (A1..A8) of length %u " - "bytes. ", - la, MAX_MB_SLICES, ls); - DMSG("Append the logically remaining buffer (Y) of %u - %llu*%u = %u " - "bytes\n\n", - la, MAX_MB_SLICES, ls, lrem); -#endif - - print(" The (original) buffer is:\n ", (const uint64_t *)m, la * 8); - DMSG("\n"); - EDMSG(" The 8 SHA digests:\n"); - - // Use optimized API for 4 blocks. - const uint64_t partial_len = (NUM_OF_BLOCKS_IN_MB * ls); - sha_mb(&yx.x[0], m, partial_len, NUM_OF_BLOCKS_IN_MB); -#if NUM_OF_BLOCKS_IN_MB != MAX_MB_SLICES - sha_mb(&yx.x[NUM_OF_BLOCKS_IN_MB], &m[partial_len], partial_len, - NUM_OF_BLOCKS_IN_MB); -#endif - - for(uint32_t i = 0; i < MAX_MB_SLICES; i++) - { - print("X[i]:", (uint64_t *)&yx.x[i], SIZEOF_BITS(yx.x[i])); - } - - // Copy the reminder (Y) - memcpy(yx.y, &m[MAX_MB_SLICES * ls], lrem); - - // Compute the final hash (on YX). We explicitly use lrem instead of - // sizeof(yx.y) because yx.y is padded with zeros. - sha(out_hash, sizeof(yx.x) + lrem, (uint8_t *)&yx); - - print("\nY: ", (uint64_t *)yx.y, lrem * 8); - - // yx might contain secrets - secure_clean((uint8_t *)&yx, sizeof(yx)); - - DMSG(" Exit parallel_hash.\n"); -} +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + * The license is detailed in the file LICENSE.md, and applies to this file. + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#include "parallel_hash.h" +#include "utilities.h" +#include <assert.h> +#include <string.h> + +#define MAX_REM_LEN (MAX_MB_SLICES * HASH_BLOCK_SIZE) + +// We must ensure that the compiler does not add padding between x and y, because +// the entire structore goes into the hash function. +#pragma pack(push, 1) + +// The struct below is a concatination of eight slices and Y. +typedef struct yx_s +{ + sha_hash_t x[MAX_MB_SLICES]; + // We define MAX_REM_LEN rather than lrem, in order to be consistent with the + // standard of C. + uint8_t y[MAX_REM_LEN]; +} yx_t; + +#pragma pack(pop) + +_INLINE_ uint64_t +compute_slice_len(IN const uint64_t la) +{ + assert((la / MAX_MB_SLICES) >= SLICE_REM); + + // alpha is the number of full blocks + const uint64_t alpha = (((la / MAX_MB_SLICES) - SLICE_REM) / HASH_BLOCK_SIZE); + return ((alpha * HASH_BLOCK_SIZE) + SLICE_REM); +} + +// This function assumes that m is of N_BITS length. +void +parallel_hash(OUT sha_hash_t *out_hash, IN const uint8_t *m, IN const uint32_t la) +{ + DMSG(" Enter parallel_hash.\n"); + + // Calculating how many bytes will go to "parallel" hashing + // and how many will remind as a tail for later on + const uint32_t ls = compute_slice_len(la); + const uint32_t lrem = (uint32_t)(la - (ls * MAX_MB_SLICES)); + yx_t yx = {0}; + +#ifdef WIN32 + DMSG(" Len=%u splits into %I64u logical streams (A1..A8) of length %u " + "bytes. ", + la, MAX_MB_SLICES, ls); + DMSG("Append the logically remaining buffer (Y) of %u - %I64u*%u = %u " + "bytes\n\n", + la, MAX_MB_SLICES, ls, lrem); +#else + DMSG(" Len=%u splits into %llu logical streams (A1..A8) of length %u " + "bytes. ", + la, MAX_MB_SLICES, ls); + DMSG("Append the logically remaining buffer (Y) of %u - %llu*%u = %u " + "bytes\n\n", + la, MAX_MB_SLICES, ls, lrem); +#endif + + print(" The (original) buffer is:\n ", (const uint64_t *)m, la * 8); + DMSG("\n"); + EDMSG(" The 8 SHA digests:\n"); + + // Use optimized API for 4 blocks. + const uint64_t partial_len = (NUM_OF_BLOCKS_IN_MB * ls); + sha_mb(&yx.x[0], m, partial_len, NUM_OF_BLOCKS_IN_MB); +#if NUM_OF_BLOCKS_IN_MB != MAX_MB_SLICES + sha_mb(&yx.x[NUM_OF_BLOCKS_IN_MB], &m[partial_len], partial_len, + NUM_OF_BLOCKS_IN_MB); +#endif + + for(uint32_t i = 0; i < MAX_MB_SLICES; i++) + { + print("X[i]:", (uint64_t *)&yx.x[i], SIZEOF_BITS(yx.x[i])); + } + + // Copy the reminder (Y) + memcpy(yx.y, &m[MAX_MB_SLICES * ls], lrem); + + // Compute the final hash (on YX). We explicitly use lrem instead of + // sizeof(yx.y) because yx.y is padded with zeros. + sha(out_hash, sizeof(yx.x) + lrem, (uint8_t *)&yx); + + print("\nY: ", (uint64_t *)yx.y, lrem * 8); + + // yx might contain secrets + secure_clean((uint8_t *)&yx, sizeof(yx)); + + DMSG(" Exit parallel_hash.\n"); +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/parallel_hash.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/parallel_hash.h index 11e95cf89d..397c5e3ab9 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/parallel_hash.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/parallel_hash.h @@ -1,42 +1,42 @@ -/* - * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - * The license is detailed in the file LICENSE.md, and applies to this file. - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -#include "sha.h" - -// The parallel_hash algorithm uses the technique described in -// 1) S. Gueron, V. Krasnov. Simultaneous Hashing of Multiple Messages. -// Journal of Information Security 3:319-325 (2012). -// 2) S. Gueron. A j-Lanes Tree Hashing Mode and j-Lanes SHA-256. -// Journal of Information Security 4:7-11 (2013). -// See also: -// 3) S. Gueron. Parallelized Hashing via j-Lanes and j-Pointers Tree Modes, -// with Applications to SHA-256. -// Journal of Information Security 5:91-113 (2014). -// -// It is designed to convert the serial hashing to a parallelizeable process. -// -// This function assumes that m is of N_BITS length and that -// ((la / MAX_MB_SLICES) >= SLICE_REM) -void -parallel_hash(OUT sha_hash_t *out_hash, - IN const uint8_t *m, - IN const uint32_t la); +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + * The license is detailed in the file LICENSE.md, and applies to this file. + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +#include "sha.h" + +// The parallel_hash algorithm uses the technique described in +// 1) S. Gueron, V. Krasnov. Simultaneous Hashing of Multiple Messages. +// Journal of Information Security 3:319-325 (2012). +// 2) S. Gueron. A j-Lanes Tree Hashing Mode and j-Lanes SHA-256. +// Journal of Information Security 4:7-11 (2013). +// See also: +// 3) S. Gueron. Parallelized Hashing via j-Lanes and j-Pointers Tree Modes, +// with Applications to SHA-256. +// Journal of Information Security 5:91-113 (2014). +// +// It is designed to convert the serial hashing to a parallelizeable process. +// +// This function assumes that m is of N_BITS length and that +// ((la / MAX_MB_SLICES) >= SLICE_REM) +void +parallel_hash(OUT sha_hash_t *out_hash, + IN const uint8_t *m, + IN const uint32_t la); diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/sampling.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/sampling.c index 3686338fad..1efde4ddd1 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/sampling.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/sampling.c @@ -1,118 +1,118 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#include "sampling.h" -#include <assert.h> -#include <string.h> - -_INLINE_ ret_t -get_rand_mod_len(OUT uint32_t * rand_pos, - IN const uint32_t len, - IN OUT aes_ctr_prf_state_t *prf_state) -{ - const uint64_t mask = MASK(bit_scan_reverse(len)); - - do - { - // Generate 128bit of random numbers - GUARD(aes_ctr_prf((uint8_t *)rand_pos, prf_state, sizeof(*rand_pos))); - - // Mask only relevant bits - (*rand_pos) &= mask; - - // Break if a number smaller than len is found - if((*rand_pos) < len) - { - break; - } - - } while(1); - - return SUCCESS; -} - -_INLINE_ void -make_odd_weight(IN OUT r_t *r) -{ - if(((r_bits_vector_weight(r) % 2) == 1)) - { - // Already odd - return; - } - - r->raw[0] ^= 1; -} - -// IN: must_be_odd - 1 true, 0 not -ret_t -sample_uniform_r_bits_with_fixed_prf_context(OUT r_t *r, - IN OUT - aes_ctr_prf_state_t *prf_state, - IN const must_be_odd_t must_be_odd) -{ - // Generate random data - GUARD(aes_ctr_prf(r->raw, prf_state, R_SIZE)); - - // Mask upper bits of the MSByte - r->raw[R_SIZE - 1] &= MASK(R_BITS + 8 - (R_SIZE * 8)); - - if(must_be_odd == MUST_BE_ODD) - { - make_odd_weight(r); - } - - return SUCCESS; -} - -_INLINE_ int -is_new(IN const idx_t wlist[], IN const uint32_t ctr) -{ - for(uint32_t i = 0; i < ctr; i++) - { - if(wlist[i] == wlist[ctr]) - { - return 0; - } - } - - return 1; -} - -// Assumption 1) paddded_len % 64 = 0! -// Assumption 2) a is a len bits array. It is padded to be a padded_len -// bytes array. The padded area may be modified and should -// be ignored outside the function scope. -ret_t -generate_sparse_rep(OUT uint64_t * a, - OUT idx_t wlist[], - IN const uint32_t weight, - IN const uint32_t len, - IN const uint32_t padded_len, - IN OUT aes_ctr_prf_state_t *prf_state) -{ - assert(padded_len % 64 == 0); - // Bits comparison - assert((padded_len * 8) >= len); - - uint64_t ctr = 0; - - // Generate weight rand numbers - do - { - GUARD(get_rand_mod_len(&wlist[ctr], len, prf_state)); - ctr += is_new(wlist, ctr); - } while(ctr < weight); - - // Initialize to zero - memset(a, 0, (len + 7) >> 3); - - // Assign values to "a" - secure_set_bits(a, wlist, padded_len, weight); - - return SUCCESS; -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#include "sampling.h" +#include <assert.h> +#include <string.h> + +_INLINE_ ret_t +get_rand_mod_len(OUT uint32_t * rand_pos, + IN const uint32_t len, + IN OUT aes_ctr_prf_state_t *prf_state) +{ + const uint64_t mask = MASK(bit_scan_reverse(len)); + + do + { + // Generate 128bit of random numbers + GUARD(aes_ctr_prf((uint8_t *)rand_pos, prf_state, sizeof(*rand_pos))); + + // Mask only relevant bits + (*rand_pos) &= mask; + + // Break if a number smaller than len is found + if((*rand_pos) < len) + { + break; + } + + } while(1); + + return SUCCESS; +} + +_INLINE_ void +make_odd_weight(IN OUT r_t *r) +{ + if(((r_bits_vector_weight(r) % 2) == 1)) + { + // Already odd + return; + } + + r->raw[0] ^= 1; +} + +// IN: must_be_odd - 1 true, 0 not +ret_t +sample_uniform_r_bits_with_fixed_prf_context(OUT r_t *r, + IN OUT + aes_ctr_prf_state_t *prf_state, + IN const must_be_odd_t must_be_odd) +{ + // Generate random data + GUARD(aes_ctr_prf(r->raw, prf_state, R_SIZE)); + + // Mask upper bits of the MSByte + r->raw[R_SIZE - 1] &= MASK(R_BITS + 8 - (R_SIZE * 8)); + + if(must_be_odd == MUST_BE_ODD) + { + make_odd_weight(r); + } + + return SUCCESS; +} + +_INLINE_ int +is_new(IN const idx_t wlist[], IN const uint32_t ctr) +{ + for(uint32_t i = 0; i < ctr; i++) + { + if(wlist[i] == wlist[ctr]) + { + return 0; + } + } + + return 1; +} + +// Assumption 1) paddded_len % 64 = 0! +// Assumption 2) a is a len bits array. It is padded to be a padded_len +// bytes array. The padded area may be modified and should +// be ignored outside the function scope. +ret_t +generate_sparse_rep(OUT uint64_t * a, + OUT idx_t wlist[], + IN const uint32_t weight, + IN const uint32_t len, + IN const uint32_t padded_len, + IN OUT aes_ctr_prf_state_t *prf_state) +{ + assert(padded_len % 64 == 0); + // Bits comparison + assert((padded_len * 8) >= len); + + uint64_t ctr = 0; + + // Generate weight rand numbers + do + { + GUARD(get_rand_mod_len(&wlist[ctr], len, prf_state)); + ctr += is_new(wlist, ctr); + } while(ctr < weight); + + // Initialize to zero + memset(a, 0, (len + 7) >> 3); + + // Assign values to "a" + secure_set_bits(a, wlist, padded_len, weight); + + return SUCCESS; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/sampling.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/sampling.h index 1ffd56f34a..8d6caa6d7c 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/sampling.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/sampling.h @@ -1,78 +1,78 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -#include "aes_ctr_prf.h" -#include "pq-crypto/s2n_pq_random.h" -#include "utils/s2n_result.h" -#include "utilities.h" - -typedef enum -{ - NO_RESTRICTION = 0, - MUST_BE_ODD = 1 -} must_be_odd_t; - -_INLINE_ ret_t -get_seeds(OUT seeds_t *seeds) -{ - if(s2n_result_is_ok(s2n_get_random_bytes(seeds->seed[0].raw, sizeof(seeds_t)))) - { - return SUCCESS; - } - else - { - BIKE_ERROR(E_FAIL_TO_GET_SEED); - } -} - -// Return's an array of r pseudorandom bits -// No restrictions exist for the top or bottom bits - -// in case an odd number is required then set must_be_odd=1 -// Uses the provided prf context -ret_t -sample_uniform_r_bits_with_fixed_prf_context(OUT r_t *r, - IN OUT - aes_ctr_prf_state_t *prf_state, - IN must_be_odd_t must_be_odd); - -// Return's an array of r pseudorandom bits -// No restrictions exist for the top or bottom bits - -// in case an odd number is required then set must_be_odd=1 -_INLINE_ ret_t -sample_uniform_r_bits(OUT r_t *r, - IN const seed_t * seed, - IN const must_be_odd_t must_be_odd) -{ - // For the seedexpander - DEFER_CLEANUP(aes_ctr_prf_state_t prf_state = {0}, aes_ctr_prf_state_cleanup); - - GUARD(init_aes_ctr_prf_state(&prf_state, MAX_AES_INVOKATION, seed)); - - GUARD(sample_uniform_r_bits_with_fixed_prf_context(r, &prf_state, must_be_odd)); - - return SUCCESS; -} - -// Generate a pseudorandom r of length len with a set weight -// Using the pseudorandom ctx supplied -// Outputs also a compressed (not ordered) list of indices -ret_t -generate_sparse_rep(OUT uint64_t *a, - OUT idx_t wlist[], - IN uint32_t weight, - IN uint32_t len, - IN uint32_t padded_len, - IN OUT aes_ctr_prf_state_t *prf_state); - -EXTERNC void -secure_set_bits(IN OUT uint64_t *a, - IN const idx_t wlist[], - IN uint32_t a_len, - IN uint32_t weight); +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +#include "aes_ctr_prf.h" +#include "pq-crypto/s2n_pq_random.h" +#include "utils/s2n_result.h" +#include "utilities.h" + +typedef enum +{ + NO_RESTRICTION = 0, + MUST_BE_ODD = 1 +} must_be_odd_t; + +_INLINE_ ret_t +get_seeds(OUT seeds_t *seeds) +{ + if(s2n_result_is_ok(s2n_get_random_bytes(seeds->seed[0].raw, sizeof(seeds_t)))) + { + return SUCCESS; + } + else + { + BIKE_ERROR(E_FAIL_TO_GET_SEED); + } +} + +// Return's an array of r pseudorandom bits +// No restrictions exist for the top or bottom bits - +// in case an odd number is required then set must_be_odd=1 +// Uses the provided prf context +ret_t +sample_uniform_r_bits_with_fixed_prf_context(OUT r_t *r, + IN OUT + aes_ctr_prf_state_t *prf_state, + IN must_be_odd_t must_be_odd); + +// Return's an array of r pseudorandom bits +// No restrictions exist for the top or bottom bits - +// in case an odd number is required then set must_be_odd=1 +_INLINE_ ret_t +sample_uniform_r_bits(OUT r_t *r, + IN const seed_t * seed, + IN const must_be_odd_t must_be_odd) +{ + // For the seedexpander + DEFER_CLEANUP(aes_ctr_prf_state_t prf_state = {0}, aes_ctr_prf_state_cleanup); + + GUARD(init_aes_ctr_prf_state(&prf_state, MAX_AES_INVOKATION, seed)); + + GUARD(sample_uniform_r_bits_with_fixed_prf_context(r, &prf_state, must_be_odd)); + + return SUCCESS; +} + +// Generate a pseudorandom r of length len with a set weight +// Using the pseudorandom ctx supplied +// Outputs also a compressed (not ordered) list of indices +ret_t +generate_sparse_rep(OUT uint64_t *a, + OUT idx_t wlist[], + IN uint32_t weight, + IN uint32_t len, + IN uint32_t padded_len, + IN OUT aes_ctr_prf_state_t *prf_state); + +EXTERNC void +secure_set_bits(IN OUT uint64_t *a, + IN const idx_t wlist[], + IN uint32_t a_len, + IN uint32_t weight); diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/sampling_portable.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/sampling_portable.c index 1ae7a6f247..e41e6b5cf2 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/sampling_portable.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/sampling_portable.c @@ -1,48 +1,48 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#include "sampling.h" -#include <assert.h> - -#define MAX_WEIGHT (T1 > DV ? T1 : DV) - -// This implementation assumes that the wlist contains fake list -void -secure_set_bits(IN OUT uint64_t * a, - IN const idx_t wlist[], - IN const uint32_t a_len_bytes, - IN const uint32_t weight) -{ - assert(a_len_bytes % 8 == 0); - - // Set arrays to the maximum possible for the stack protector - assert(weight <= MAX_WEIGHT); - uint64_t qw_pos[MAX_WEIGHT]; - uint64_t bit_pos[MAX_WEIGHT]; - - // 1. Identify the QW position of each value and the bit position inside this - // QW. - for(uint32_t j = 0; j < weight; j++) - { - qw_pos[j] = wlist[j] >> 6; - bit_pos[j] = BIT(wlist[j] & 0x3f); - } - - // 2. Fill each QW in a constant time. - for(uint32_t qw = 0; qw < (a_len_bytes / 8); qw++) - { - uint64_t tmp = 0; - for(uint32_t j = 0; j < weight; j++) - { - uint64_t mask = (-1ULL) + (!secure_cmp32(qw_pos[j], qw)); - tmp |= (bit_pos[j] & mask); - } - // Set the bit in a masked way - a[qw] |= tmp; - } -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#include "sampling.h" +#include <assert.h> + +#define MAX_WEIGHT (T1 > DV ? T1 : DV) + +// This implementation assumes that the wlist contains fake list +void +secure_set_bits(IN OUT uint64_t * a, + IN const idx_t wlist[], + IN const uint32_t a_len_bytes, + IN const uint32_t weight) +{ + assert(a_len_bytes % 8 == 0); + + // Set arrays to the maximum possible for the stack protector + assert(weight <= MAX_WEIGHT); + uint64_t qw_pos[MAX_WEIGHT]; + uint64_t bit_pos[MAX_WEIGHT]; + + // 1. Identify the QW position of each value and the bit position inside this + // QW. + for(uint32_t j = 0; j < weight; j++) + { + qw_pos[j] = wlist[j] >> 6; + bit_pos[j] = BIT(wlist[j] & 0x3f); + } + + // 2. Fill each QW in a constant time. + for(uint32_t qw = 0; qw < (a_len_bytes / 8); qw++) + { + uint64_t tmp = 0; + for(uint32_t j = 0; j < weight; j++) + { + uint64_t mask = (-1ULL) + (!secure_cmp32(qw_pos[j], qw)); + tmp |= (bit_pos[j] & mask); + } + // Set the bit in a masked way + a[qw] |= tmp; + } +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/secure_decode_portable.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/secure_decode_portable.c index 963c3257b7..dc4fbe01d8 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/secure_decode_portable.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/secure_decode_portable.c @@ -1,66 +1,66 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#include "decode.h" -#include "utilities.h" - -#define R_QW_HALF_LOG2 UPTOPOW2(R_QW / 2) - -_INLINE_ void -rotr_big(OUT syndrome_t *out, IN const syndrome_t *in, IN size_t qw_num) -{ - // For preventing overflows (comparison in bytes) - bike_static_assert(sizeof(*out) > 8 * (R_QW + (2 * R_QW_HALF_LOG2)), - rotr_big_err); - - memcpy(out, in, sizeof(*in)); - - for(uint32_t idx = R_QW_HALF_LOG2; idx >= 1; idx >>= 1) - { - // Convert 32 bit mask to 64 bit mask - const uint64_t mask = ((uint32_t)secure_l32_mask(qw_num, idx) + 1U) - 1ULL; - qw_num = qw_num - (idx & mask); - - // Rotate R_QW quadwords and another idx quadwords needed by the next - // iteration - for(size_t i = 0; i < (R_QW + idx); i++) - { - out->qw[i] = (out->qw[i] & (~mask)) | (out->qw[i + idx] & mask); - } - } -} - -_INLINE_ void -rotr_small(OUT syndrome_t *out, IN const syndrome_t *in, IN const size_t bits) -{ - bike_static_assert(bits < 64, rotr_small_err); - bike_static_assert(sizeof(*out) > (8 * R_QW), rotr_small_qw_err); - - // Convert |bits| to 0/1 by using !!bits then create a mask of 0 or 0xffffffffff - // Use high_shift to avoid undefined behaviour when doing x << 64; - const uint64_t mask = (0 - (!!bits)); - const uint64_t high_shift = (64 - bits) & mask; - - for(size_t i = 0; i < R_QW; i++) - { - const uint64_t low_part = in->qw[i] >> bits; - const uint64_t high_part = (in->qw[i + 1] << high_shift) & mask; - out->qw[i] = low_part | high_part; - } -} - -void -rotate_right(OUT syndrome_t *out, - IN const syndrome_t *in, - IN const uint32_t bitscount) -{ - // Rotate (64-bit) quad-words - rotr_big(out, in, (bitscount / 64)); - // Rotate bits (less than 64) - rotr_small(out, out, (bitscount % 64)); -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#include "decode.h" +#include "utilities.h" + +#define R_QW_HALF_LOG2 UPTOPOW2(R_QW / 2) + +_INLINE_ void +rotr_big(OUT syndrome_t *out, IN const syndrome_t *in, IN size_t qw_num) +{ + // For preventing overflows (comparison in bytes) + bike_static_assert(sizeof(*out) > 8 * (R_QW + (2 * R_QW_HALF_LOG2)), + rotr_big_err); + + memcpy(out, in, sizeof(*in)); + + for(uint32_t idx = R_QW_HALF_LOG2; idx >= 1; idx >>= 1) + { + // Convert 32 bit mask to 64 bit mask + const uint64_t mask = ((uint32_t)secure_l32_mask(qw_num, idx) + 1U) - 1ULL; + qw_num = qw_num - (idx & mask); + + // Rotate R_QW quadwords and another idx quadwords needed by the next + // iteration + for(size_t i = 0; i < (R_QW + idx); i++) + { + out->qw[i] = (out->qw[i] & (~mask)) | (out->qw[i + idx] & mask); + } + } +} + +_INLINE_ void +rotr_small(OUT syndrome_t *out, IN const syndrome_t *in, IN const size_t bits) +{ + bike_static_assert(bits < 64, rotr_small_err); + bike_static_assert(sizeof(*out) > (8 * R_QW), rotr_small_qw_err); + + // Convert |bits| to 0/1 by using !!bits then create a mask of 0 or 0xffffffffff + // Use high_shift to avoid undefined behaviour when doing x << 64; + const uint64_t mask = (0 - (!!bits)); + const uint64_t high_shift = (64 - bits) & mask; + + for(size_t i = 0; i < R_QW; i++) + { + const uint64_t low_part = in->qw[i] >> bits; + const uint64_t high_part = (in->qw[i + 1] << high_shift) & mask; + out->qw[i] = low_part | high_part; + } +} + +void +rotate_right(OUT syndrome_t *out, + IN const syndrome_t *in, + IN const uint32_t bitscount) +{ + // Rotate (64-bit) quad-words + rotr_big(out, in, (bitscount / 64)); + // Rotate bits (less than 64) + rotr_small(out, out, (bitscount % 64)); +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/sha.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/sha.h index a1cc55e04a..7005a03322 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/sha.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/sha.h @@ -1,61 +1,61 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -#include "cleanup.h" -#include "types.h" -#include "utilities.h" -#include <openssl/sha.h> - -#define SHA384_HASH_SIZE 48ULL -#define SHA384_HASH_QWORDS (SHA384_HASH_SIZE / 8) - -typedef struct sha384_hash_s -{ - union { - uint8_t raw[SHA384_HASH_SIZE]; - uint64_t qw[SHA384_HASH_QWORDS]; - } u; -} sha384_hash_t; -bike_static_assert(sizeof(sha384_hash_t) == SHA384_HASH_SIZE, sha384_hash_size); - -typedef sha384_hash_t sha_hash_t; - -_INLINE_ void -sha_hash_cleanup(IN OUT sha_hash_t *o) -{ - secure_clean(o->u.raw, sizeof(*o)); -} - -#define NUM_OF_BLOCKS_IN_MB 4ULL -#define SLICE_REM 111ULL -#define MAX_MB_SLICES 8ULL -#define HASH_BLOCK_SIZE 128ULL - -_INLINE_ int -sha(OUT sha_hash_t *hash_out, IN const uint32_t byte_len, IN const uint8_t *msg) -{ - SHA384(msg, byte_len, hash_out->u.raw); - return 1; -} - -_INLINE_ void -sha_mb(OUT sha_hash_t *hash_out, - IN const uint8_t *msg, - IN const uint32_t byte_len, - IN const uint32_t num) -{ - const uint32_t ls = (byte_len / NUM_OF_BLOCKS_IN_MB); - - // Hash each block (X[i]) - for(uint32_t i = 0; i < num; i++) - { - SHA384(&msg[i * ls], ls, hash_out[i].u.raw); - } -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +#include "cleanup.h" +#include "types.h" +#include "utilities.h" +#include <openssl/sha.h> + +#define SHA384_HASH_SIZE 48ULL +#define SHA384_HASH_QWORDS (SHA384_HASH_SIZE / 8) + +typedef struct sha384_hash_s +{ + union { + uint8_t raw[SHA384_HASH_SIZE]; + uint64_t qw[SHA384_HASH_QWORDS]; + } u; +} sha384_hash_t; +bike_static_assert(sizeof(sha384_hash_t) == SHA384_HASH_SIZE, sha384_hash_size); + +typedef sha384_hash_t sha_hash_t; + +_INLINE_ void +sha_hash_cleanup(IN OUT sha_hash_t *o) +{ + secure_clean(o->u.raw, sizeof(*o)); +} + +#define NUM_OF_BLOCKS_IN_MB 4ULL +#define SLICE_REM 111ULL +#define MAX_MB_SLICES 8ULL +#define HASH_BLOCK_SIZE 128ULL + +_INLINE_ int +sha(OUT sha_hash_t *hash_out, IN const uint32_t byte_len, IN const uint8_t *msg) +{ + SHA384(msg, byte_len, hash_out->u.raw); + return 1; +} + +_INLINE_ void +sha_mb(OUT sha_hash_t *hash_out, + IN const uint8_t *msg, + IN const uint32_t byte_len, + IN const uint32_t num) +{ + const uint32_t ls = (byte_len / NUM_OF_BLOCKS_IN_MB); + + // Hash each block (X[i]) + for(uint32_t i = 0; i < num; i++) + { + SHA384(&msg[i * ls], ls, hash_out[i].u.raw); + } +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/types.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/types.h index 044b7ee38e..647efdf811 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/types.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/types.h @@ -1,139 +1,139 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -#include "bike_defs.h" -#include "error.h" -#include <stdint.h> - -typedef struct uint128_s -{ - union { - uint8_t bytes[16]; - uint32_t dw[4]; - uint64_t qw[2]; - } u; -} uint128_t; - -// Make sure no compiler optimizations. -#pragma pack(push, 1) - -typedef struct seed_s -{ - uint8_t raw[32]; -} seed_t; - -typedef struct seeds_s -{ - seed_t seed[NUM_OF_SEEDS]; -} seeds_t; - -typedef struct r_s -{ - uint8_t raw[R_SIZE]; -} r_t; - -typedef struct e_s -{ - uint8_t raw[N_SIZE]; -} e_t; - -typedef struct generic_param_n_s -{ - r_t val[N0]; -} generic_param_n_t; - -typedef generic_param_n_t ct_t; -typedef generic_param_n_t pk_t; -typedef generic_param_n_t split_e_t; - -typedef uint32_t idx_t; - -typedef struct compressed_idx_dv_s -{ - idx_t val[DV]; -} compressed_idx_dv_t; - -typedef compressed_idx_dv_t compressed_idx_dv_ar_t[N0]; - -typedef struct compressed_idx_t_t -{ - idx_t val[T1]; -} compressed_idx_t_t; - -// The secret key holds both representation for avoiding -// the compression in the decaps stage -typedef struct sk_s -{ - compressed_idx_dv_ar_t wlist; - r_t bin[N0]; -#ifndef INDCPA - r_t sigma0; - r_t sigma1; -#endif -} sk_t; - -// Pad e to the next Block -typedef ALIGN(8) struct padded_e_s -{ - e_t val; - uint8_t pad[N_PADDED_SIZE - N_SIZE]; -} padded_e_t; - -// Pad r to the next Block -typedef ALIGN(8) struct padded_r_s -{ - r_t val; - uint8_t pad[R_PADDED_SIZE - R_SIZE]; -} padded_r_t; - -typedef padded_r_t padded_param_n_t[N0]; -typedef padded_param_n_t pad_sk_t; -typedef padded_param_n_t pad_pk_t; -typedef padded_param_n_t pad_ct_t; - -// Need to allocate twice the room for the results -typedef ALIGN(8) struct dbl_padded_r_s -{ - r_t val; - uint8_t pad[(2 * R_PADDED_SIZE) - R_SIZE]; -} dbl_padded_r_t; - -typedef dbl_padded_r_t dbl_padded_param_n_t[N0]; -typedef dbl_padded_param_n_t dbl_pad_pk_t; -typedef dbl_padded_param_n_t dbl_pad_ct_t; -typedef dbl_padded_param_n_t dbl_pad_syndrome_t; - -typedef struct ss_s -{ - uint8_t raw[ELL_K_SIZE]; -} ss_t; - -// For optimization purposes -// 1- For a faster rotate we duplicate the syndrome (dup1/2) -// 2- We extend it to fit the boundary of DDQW -typedef ALIGN(64) struct syndrome_s -{ - uint64_t qw[3 * R_QW]; -} syndrome_t; - -typedef struct upc_slice_s -{ - union { - padded_r_t r; - uint64_t qw[sizeof(padded_r_t) / 8]; - } u; -} upc_slice_t; - -typedef struct upc_s -{ - upc_slice_t slice[SLICES]; -} upc_t; - -#pragma pack(pop) +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +#include "bike_defs.h" +#include "error.h" +#include <stdint.h> + +typedef struct uint128_s +{ + union { + uint8_t bytes[16]; + uint32_t dw[4]; + uint64_t qw[2]; + } u; +} uint128_t; + +// Make sure no compiler optimizations. +#pragma pack(push, 1) + +typedef struct seed_s +{ + uint8_t raw[32]; +} seed_t; + +typedef struct seeds_s +{ + seed_t seed[NUM_OF_SEEDS]; +} seeds_t; + +typedef struct r_s +{ + uint8_t raw[R_SIZE]; +} r_t; + +typedef struct e_s +{ + uint8_t raw[N_SIZE]; +} e_t; + +typedef struct generic_param_n_s +{ + r_t val[N0]; +} generic_param_n_t; + +typedef generic_param_n_t ct_t; +typedef generic_param_n_t pk_t; +typedef generic_param_n_t split_e_t; + +typedef uint32_t idx_t; + +typedef struct compressed_idx_dv_s +{ + idx_t val[DV]; +} compressed_idx_dv_t; + +typedef compressed_idx_dv_t compressed_idx_dv_ar_t[N0]; + +typedef struct compressed_idx_t_t +{ + idx_t val[T1]; +} compressed_idx_t_t; + +// The secret key holds both representation for avoiding +// the compression in the decaps stage +typedef struct sk_s +{ + compressed_idx_dv_ar_t wlist; + r_t bin[N0]; +#ifndef INDCPA + r_t sigma0; + r_t sigma1; +#endif +} sk_t; + +// Pad e to the next Block +typedef ALIGN(8) struct padded_e_s +{ + e_t val; + uint8_t pad[N_PADDED_SIZE - N_SIZE]; +} padded_e_t; + +// Pad r to the next Block +typedef ALIGN(8) struct padded_r_s +{ + r_t val; + uint8_t pad[R_PADDED_SIZE - R_SIZE]; +} padded_r_t; + +typedef padded_r_t padded_param_n_t[N0]; +typedef padded_param_n_t pad_sk_t; +typedef padded_param_n_t pad_pk_t; +typedef padded_param_n_t pad_ct_t; + +// Need to allocate twice the room for the results +typedef ALIGN(8) struct dbl_padded_r_s +{ + r_t val; + uint8_t pad[(2 * R_PADDED_SIZE) - R_SIZE]; +} dbl_padded_r_t; + +typedef dbl_padded_r_t dbl_padded_param_n_t[N0]; +typedef dbl_padded_param_n_t dbl_pad_pk_t; +typedef dbl_padded_param_n_t dbl_pad_ct_t; +typedef dbl_padded_param_n_t dbl_pad_syndrome_t; + +typedef struct ss_s +{ + uint8_t raw[ELL_K_SIZE]; +} ss_t; + +// For optimization purposes +// 1- For a faster rotate we duplicate the syndrome (dup1/2) +// 2- We extend it to fit the boundary of DDQW +typedef ALIGN(64) struct syndrome_s +{ + uint64_t qw[3 * R_QW]; +} syndrome_t; + +typedef struct upc_slice_s +{ + union { + padded_r_t r; + uint64_t qw[sizeof(padded_r_t) / 8]; + } u; +} upc_slice_t; + +typedef struct upc_s +{ + upc_slice_t slice[SLICES]; +} upc_t; + +#pragma pack(pop) diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/utilities.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/utilities.c index 4f049af86a..baed622b78 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/utilities.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/utilities.c @@ -1,160 +1,160 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#include "utilities.h" -#include <inttypes.h> - -#define BITS_IN_QW 64ULL -#define BITS_IN_BYTE 8ULL - -// Print a new line only if we prints in qw blocks -_INLINE_ void -print_newline(IN const uint64_t qw_pos) -{ -#ifndef NO_NEWLINE - if((qw_pos % 4) == 3) - { - printf("\n "); - } -#endif -} - -// This function is stitched for R_BITS vector -uint64_t -r_bits_vector_weight(IN const r_t *in) -{ - uint64_t acc = 0; - for(size_t i = 0; i < (R_SIZE - 1); i++) - { - acc += __builtin_popcount(in->raw[i]); - } - - acc += __builtin_popcount(in->raw[R_SIZE - 1] & LAST_R_BYTE_MASK); - return acc; -} - -// Prints a QW in LE/BE in win/linux format -_INLINE_ void -print_uint64(IN const uint64_t val) -{ -// If printing in BE is required swap the order of bytes -#ifdef PRINT_IN_BE - uint64_t tmp = bswap_64(val); -#else - uint64_t tmp = val; -#endif - - printf("%.16" PRIx64, tmp); - -#ifndef NO_SPACE - printf(" "); -#endif -} - -// Last block requires a special handling as we should zero mask all the bits -// above the desired number endien - 0 - BE, 1 - LE Return 1 if last block was -// printed else 0 -_INLINE_ uint8_t -print_last_block(IN const uint8_t *last_bytes, - IN const uint32_t bits_num, - IN const uint32_t endien) -{ - // Floor of bits/64 the reminder is in the next QW - const uint32_t qw_num = bits_num / BITS_IN_QW; - - // How many bits to pad with zero - const uint32_t rem_bits = bits_num - (BITS_IN_QW * qw_num); - - // We read byte byte and not the whole QW to avoid reading bad memory address - const uint32_t bytes_num = ((rem_bits % 8) == 0) ? rem_bits / BITS_IN_BYTE - : 1 + rem_bits / BITS_IN_BYTE; - - // Must be signed for the LE loop - int i; - - if(0 == rem_bits) - { - return 0; - } - - // Mask unneeded bits - const uint8_t last_byte = (rem_bits % 8 == 0) - ? last_bytes[bytes_num - 1] - : last_bytes[bytes_num - 1] & MASK(rem_bits % 8); - // BE - if(0 == endien) - { - for(i = 0; (uint32_t)i < (bytes_num - 1); i++) - { - printf("%.2x", last_bytes[i]); - } - - printf("%.2x", last_byte); - - for(i++; (uint32_t)i < sizeof(uint64_t); i++) - { - printf("__"); - } - } - else - { - for(i = sizeof(uint64_t) - 1; (uint32_t)i >= bytes_num; i--) - { - printf("__"); - } - - printf("%.2x", last_byte); - - for(i--; i >= 0; i--) - { - printf("%.2x", last_bytes[i]); - } - } - -#ifndef NO_SPACE - printf(" "); -#endif - - return 1; -} - -void -print_LE(IN const uint64_t *in, IN const uint32_t bits_num) -{ - const uint32_t qw_num = bits_num / BITS_IN_QW; - - // Print the MSB QW - uint32_t qw_pos = print_last_block((const uint8_t *)&in[qw_num], bits_num, 1); - - // Print each 8 bytes separated by space (if required) - for(int i = ((int)qw_num) - 1; i >= 0; i--, qw_pos++) - { - print_uint64(in[i]); - print_newline(qw_pos); - } - - printf("\n"); -} - -void -print_BE(IN const uint64_t *in, IN const uint32_t bits_num) -{ - const uint32_t qw_num = bits_num / BITS_IN_QW; - - // Print each 16 numbers separatly - for(uint32_t i = 0; i < qw_num; ++i) - { - print_uint64(in[i]); - print_newline(i); - } - - // Print the MSB QW - print_last_block((const uint8_t *)&in[qw_num], bits_num, 0); - - printf("\n"); -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#include "utilities.h" +#include <inttypes.h> + +#define BITS_IN_QW 64ULL +#define BITS_IN_BYTE 8ULL + +// Print a new line only if we prints in qw blocks +_INLINE_ void +print_newline(IN const uint64_t qw_pos) +{ +#ifndef NO_NEWLINE + if((qw_pos % 4) == 3) + { + printf("\n "); + } +#endif +} + +// This function is stitched for R_BITS vector +uint64_t +r_bits_vector_weight(IN const r_t *in) +{ + uint64_t acc = 0; + for(size_t i = 0; i < (R_SIZE - 1); i++) + { + acc += __builtin_popcount(in->raw[i]); + } + + acc += __builtin_popcount(in->raw[R_SIZE - 1] & LAST_R_BYTE_MASK); + return acc; +} + +// Prints a QW in LE/BE in win/linux format +_INLINE_ void +print_uint64(IN const uint64_t val) +{ +// If printing in BE is required swap the order of bytes +#ifdef PRINT_IN_BE + uint64_t tmp = bswap_64(val); +#else + uint64_t tmp = val; +#endif + + printf("%.16" PRIx64, tmp); + +#ifndef NO_SPACE + printf(" "); +#endif +} + +// Last block requires a special handling as we should zero mask all the bits +// above the desired number endien - 0 - BE, 1 - LE Return 1 if last block was +// printed else 0 +_INLINE_ uint8_t +print_last_block(IN const uint8_t *last_bytes, + IN const uint32_t bits_num, + IN const uint32_t endien) +{ + // Floor of bits/64 the reminder is in the next QW + const uint32_t qw_num = bits_num / BITS_IN_QW; + + // How many bits to pad with zero + const uint32_t rem_bits = bits_num - (BITS_IN_QW * qw_num); + + // We read byte byte and not the whole QW to avoid reading bad memory address + const uint32_t bytes_num = ((rem_bits % 8) == 0) ? rem_bits / BITS_IN_BYTE + : 1 + rem_bits / BITS_IN_BYTE; + + // Must be signed for the LE loop + int i; + + if(0 == rem_bits) + { + return 0; + } + + // Mask unneeded bits + const uint8_t last_byte = (rem_bits % 8 == 0) + ? last_bytes[bytes_num - 1] + : last_bytes[bytes_num - 1] & MASK(rem_bits % 8); + // BE + if(0 == endien) + { + for(i = 0; (uint32_t)i < (bytes_num - 1); i++) + { + printf("%.2x", last_bytes[i]); + } + + printf("%.2x", last_byte); + + for(i++; (uint32_t)i < sizeof(uint64_t); i++) + { + printf("__"); + } + } + else + { + for(i = sizeof(uint64_t) - 1; (uint32_t)i >= bytes_num; i--) + { + printf("__"); + } + + printf("%.2x", last_byte); + + for(i--; i >= 0; i--) + { + printf("%.2x", last_bytes[i]); + } + } + +#ifndef NO_SPACE + printf(" "); +#endif + + return 1; +} + +void +print_LE(IN const uint64_t *in, IN const uint32_t bits_num) +{ + const uint32_t qw_num = bits_num / BITS_IN_QW; + + // Print the MSB QW + uint32_t qw_pos = print_last_block((const uint8_t *)&in[qw_num], bits_num, 1); + + // Print each 8 bytes separated by space (if required) + for(int i = ((int)qw_num) - 1; i >= 0; i--, qw_pos++) + { + print_uint64(in[i]); + print_newline(qw_pos); + } + + printf("\n"); +} + +void +print_BE(IN const uint64_t *in, IN const uint32_t bits_num) +{ + const uint32_t qw_num = bits_num / BITS_IN_QW; + + // Print each 16 numbers separatly + for(uint32_t i = 0; i < qw_num; ++i) + { + print_uint64(in[i]); + print_newline(i); + } + + // Print the MSB QW + print_last_block((const uint8_t *)&in[qw_num], bits_num, 0); + + printf("\n"); +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/utilities.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/utilities.h index be8f4b9b10..bd2f163183 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r1/utilities.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r1/utilities.h @@ -1,158 +1,158 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -#include "cleanup.h" - -#ifndef bswap_64 -# define bswap_64(x) __builtin_bswap64(x) -#endif - -// Printing values in Little Endian -void -print_LE(IN const uint64_t *in, IN uint32_t bits_num); - -// Printing values in Big Endian -void -print_BE(IN const uint64_t *in, IN uint32_t bits_num); - -// Printing number is required only in verbose level 2 or above -#if VERBOSE >= 2 -# ifdef PRINT_IN_BE -// Print in Big Endian -# define print(name, in, bits_num) \ - do \ - { \ - EDMSG(name); \ - print_BE(in, bits_num); \ - } while(0) -# else -// Print in Little Endian -# define print(name, in, bits_num) \ - do \ - { \ - EDMSG(name); \ - print_LE(in, bits_num); \ - } while(0) -# endif -#else -// No prints at all -# define print(name, in, bits_num) -#endif - -// Comparing value in a constant time manner -_INLINE_ uint32_t -secure_cmp(IN const uint8_t *a, IN const uint8_t *b, IN const uint32_t size) -{ - volatile uint8_t res = 0; - - for(uint32_t i = 0; i < size; ++i) - { - res |= (a[i] ^ b[i]); - } - - return (0 == res); -} - -uint64_t -r_bits_vector_weight(IN const r_t *in); - -// Constant time -_INLINE_ uint32_t -iszero(IN const uint8_t *s, IN const uint32_t len) -{ - volatile uint32_t res = 0; - for(uint64_t i = 0; i < len; i++) - { - res |= s[i]; - } - return (0 == res); -} - -// BSR returns ceil(log2(val)) -_INLINE_ uint8_t -bit_scan_reverse(uint64_t val) -{ - // index is always smaller than 64 - uint8_t index = 0; - - while(val != 0) - { - val >>= 1; - index++; - } - - return index; -} - -// Return 1 if equal 0 otherwise -_INLINE_ uint32_t -secure_cmp32(IN const uint32_t v1, IN const uint32_t v2) -{ -#if defined(__aarch64__) - uint32_t res; - __asm__ __volatile__("cmp %w1, %w2; \n " - "cset %w0, EQ; \n" - : "=r"(res) - : "r"(v1), "r"(v2) - :); - return res; -#elif defined(__x86_64__) || defined(__i386__) - uint32_t res; - __asm__ __volatile__("xor %%edx, %%edx; \n" - "cmp %1, %2; \n " - "sete %%dl; \n" - "mov %%edx, %0; \n" - : "=r"(res) - : "r"(v1), "r"(v2) - : "rdx"); - return res; -#else - // Insecure comparison: The main purpose of secure_cmp32 is to avoid - // branches and thus to prevent potential side channel attacks. To do that - // we normally leverage some CPU special instructions such as "sete" - // (for __x86_64__) and "cset" (for __aarch64__). When dealing with general - // CPU architectures, the interpretation of the line below is left for the - // compiler, which may lead to an insecure branch. - return (v1 == v2 ? 1 : 0); -#endif -} - -// Return 0 if v1 < v2, (-1) otherwise -_INLINE_ uint32_t -secure_l32_mask(IN const uint32_t v1, IN const uint32_t v2) -{ -#if defined(__aarch64__) - uint32_t res; - __asm__ __volatile__("cmp %w2, %w1; \n " - "cset %w0, HI; \n" - : "=r"(res) - : "r"(v1), "r"(v2) - :); - return (res - 1); -#elif defined(__x86_64__) || defined(__i386__) - uint32_t res; - __asm__ __volatile__("xor %%edx, %%edx; \n" - "cmp %1, %2; \n " - "setl %%dl; \n" - "dec %%edx; \n" - "mov %%edx, %0; \n" - - : "=r"(res) - : "r"(v2), "r"(v1) - : "rdx"); - - return res; -#else - // If v1 >= v2 then the subtraction result is 0^32||(v1-v2) - // else it will be 1^32||(v2-v1+1). Subsequently, negating the upper - // 32 bits gives 0 if v1 < v2 and otherwise (-1). - return ~((uint32_t)(((uint64_t)v1 - (uint64_t)v2) >> 32)); -#endif -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +#include "cleanup.h" + +#ifndef bswap_64 +# define bswap_64(x) __builtin_bswap64(x) +#endif + +// Printing values in Little Endian +void +print_LE(IN const uint64_t *in, IN uint32_t bits_num); + +// Printing values in Big Endian +void +print_BE(IN const uint64_t *in, IN uint32_t bits_num); + +// Printing number is required only in verbose level 2 or above +#if VERBOSE >= 2 +# ifdef PRINT_IN_BE +// Print in Big Endian +# define print(name, in, bits_num) \ + do \ + { \ + EDMSG(name); \ + print_BE(in, bits_num); \ + } while(0) +# else +// Print in Little Endian +# define print(name, in, bits_num) \ + do \ + { \ + EDMSG(name); \ + print_LE(in, bits_num); \ + } while(0) +# endif +#else +// No prints at all +# define print(name, in, bits_num) +#endif + +// Comparing value in a constant time manner +_INLINE_ uint32_t +secure_cmp(IN const uint8_t *a, IN const uint8_t *b, IN const uint32_t size) +{ + volatile uint8_t res = 0; + + for(uint32_t i = 0; i < size; ++i) + { + res |= (a[i] ^ b[i]); + } + + return (0 == res); +} + +uint64_t +r_bits_vector_weight(IN const r_t *in); + +// Constant time +_INLINE_ uint32_t +iszero(IN const uint8_t *s, IN const uint32_t len) +{ + volatile uint32_t res = 0; + for(uint64_t i = 0; i < len; i++) + { + res |= s[i]; + } + return (0 == res); +} + +// BSR returns ceil(log2(val)) +_INLINE_ uint8_t +bit_scan_reverse(uint64_t val) +{ + // index is always smaller than 64 + uint8_t index = 0; + + while(val != 0) + { + val >>= 1; + index++; + } + + return index; +} + +// Return 1 if equal 0 otherwise +_INLINE_ uint32_t +secure_cmp32(IN const uint32_t v1, IN const uint32_t v2) +{ +#if defined(__aarch64__) + uint32_t res; + __asm__ __volatile__("cmp %w1, %w2; \n " + "cset %w0, EQ; \n" + : "=r"(res) + : "r"(v1), "r"(v2) + :); + return res; +#elif defined(__x86_64__) || defined(__i386__) + uint32_t res; + __asm__ __volatile__("xor %%edx, %%edx; \n" + "cmp %1, %2; \n " + "sete %%dl; \n" + "mov %%edx, %0; \n" + : "=r"(res) + : "r"(v1), "r"(v2) + : "rdx"); + return res; +#else + // Insecure comparison: The main purpose of secure_cmp32 is to avoid + // branches and thus to prevent potential side channel attacks. To do that + // we normally leverage some CPU special instructions such as "sete" + // (for __x86_64__) and "cset" (for __aarch64__). When dealing with general + // CPU architectures, the interpretation of the line below is left for the + // compiler, which may lead to an insecure branch. + return (v1 == v2 ? 1 : 0); +#endif +} + +// Return 0 if v1 < v2, (-1) otherwise +_INLINE_ uint32_t +secure_l32_mask(IN const uint32_t v1, IN const uint32_t v2) +{ +#if defined(__aarch64__) + uint32_t res; + __asm__ __volatile__("cmp %w2, %w1; \n " + "cset %w0, HI; \n" + : "=r"(res) + : "r"(v1), "r"(v2) + :); + return (res - 1); +#elif defined(__x86_64__) || defined(__i386__) + uint32_t res; + __asm__ __volatile__("xor %%edx, %%edx; \n" + "cmp %1, %2; \n " + "setl %%dl; \n" + "dec %%edx; \n" + "mov %%edx, %0; \n" + + : "=r"(res) + : "r"(v2), "r"(v1) + : "rdx"); + + return res; +#else + // If v1 >= v2 then the subtraction result is 0^32||(v1-v2) + // else it will be 1^32||(v2-v1+1). Subsequently, negating the upper + // 32 bits gives 0 if v1 < v2 and otherwise (-1). + return ~((uint32_t)(((uint64_t)v1 - (uint64_t)v2) >> 32)); +#endif +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/aes_ctr_prf.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/aes_ctr_prf.c index 26c99bc80d..90b2f10824 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/aes_ctr_prf.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/aes_ctr_prf.c @@ -1,105 +1,105 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#include "aes_ctr_prf.h" -#include "utilities.h" -#include <string.h> - -ret_t -init_aes_ctr_prf_state(OUT aes_ctr_prf_state_t *s, - IN const uint32_t max_invokations, - IN const seed_t *seed) -{ - if(0 == max_invokations) - { - BIKE_ERROR(E_AES_CTR_PRF_INIT_FAIL); - } - - // Set the key schedule (from seed). - // Make sure the size matches the AES256 key size - DEFER_CLEANUP(aes256_key_t key, aes256_key_cleanup); - - bike_static_assert(sizeof(*seed) == sizeof(key.raw), seed_size_equals_ky_size); - memcpy(key.raw, seed->raw, sizeof(key.raw)); - - GUARD(aes256_key_expansion(&s->ks_ptr, &key)); - - // Initialize buffer and counter - s->ctr.u.qw[0] = 0; - s->ctr.u.qw[1] = 0; - s->buffer.u.qw[0] = 0; - s->buffer.u.qw[1] = 0; - - s->pos = AES256_BLOCK_SIZE; - s->rem_invokations = max_invokations; - - SEDMSG(" Init aes_prf_ctr state:\n"); - SEDMSG(" s.pos = %d\n", s->pos); - SEDMSG(" s.rem_invokations = %u\n", s->rem_invokations); - SEDMSG(" s.ctr = 0x\n"); - - return SUCCESS; -} - -_INLINE_ ret_t -perform_aes(OUT uint8_t *ct, IN OUT aes_ctr_prf_state_t *s) -{ - // Ensure that the CTR is big enough - bike_static_assert( - ((sizeof(s->ctr.u.qw[0]) == 8) && (BIT(33) >= MAX_AES_INVOKATION)), - ctr_size_is_too_small); - - if(0 == s->rem_invokations) - { - BIKE_ERROR(E_AES_OVER_USED); - } - - GUARD(aes256_enc(ct, s->ctr.u.bytes, &s->ks_ptr)); - - s->ctr.u.qw[0]++; - s->rem_invokations--; - - return SUCCESS; -} - -ret_t -aes_ctr_prf(OUT uint8_t *a, IN OUT aes_ctr_prf_state_t *s, IN const uint32_t len) -{ - // When Len is smaller than whats left in the buffer - // No need in additional AES - if((len + s->pos) <= AES256_BLOCK_SIZE) - { - memcpy(a, &s->buffer.u.bytes[s->pos], len); - s->pos += len; - - return SUCCESS; - } - - // If s.pos != AES256_BLOCK_SIZE then copy whats left in the buffer - // Else copy zero bytes - uint32_t idx = AES256_BLOCK_SIZE - s->pos; - memcpy(a, &s->buffer.u.bytes[s->pos], idx); - - // Init s.pos - s->pos = 0; - - // Copy full AES blocks - while((len - idx) >= AES256_BLOCK_SIZE) - { - GUARD(perform_aes(&a[idx], s)); - idx += AES256_BLOCK_SIZE; - } - - GUARD(perform_aes(s->buffer.u.bytes, s)); - - // Copy the tail - s->pos = len - idx; - memcpy(&a[idx], s->buffer.u.bytes, s->pos); - - return SUCCESS; -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#include "aes_ctr_prf.h" +#include "utilities.h" +#include <string.h> + +ret_t +init_aes_ctr_prf_state(OUT aes_ctr_prf_state_t *s, + IN const uint32_t max_invokations, + IN const seed_t *seed) +{ + if(0 == max_invokations) + { + BIKE_ERROR(E_AES_CTR_PRF_INIT_FAIL); + } + + // Set the key schedule (from seed). + // Make sure the size matches the AES256 key size + DEFER_CLEANUP(aes256_key_t key, aes256_key_cleanup); + + bike_static_assert(sizeof(*seed) == sizeof(key.raw), seed_size_equals_ky_size); + memcpy(key.raw, seed->raw, sizeof(key.raw)); + + GUARD(aes256_key_expansion(&s->ks_ptr, &key)); + + // Initialize buffer and counter + s->ctr.u.qw[0] = 0; + s->ctr.u.qw[1] = 0; + s->buffer.u.qw[0] = 0; + s->buffer.u.qw[1] = 0; + + s->pos = AES256_BLOCK_SIZE; + s->rem_invokations = max_invokations; + + SEDMSG(" Init aes_prf_ctr state:\n"); + SEDMSG(" s.pos = %d\n", s->pos); + SEDMSG(" s.rem_invokations = %u\n", s->rem_invokations); + SEDMSG(" s.ctr = 0x\n"); + + return SUCCESS; +} + +_INLINE_ ret_t +perform_aes(OUT uint8_t *ct, IN OUT aes_ctr_prf_state_t *s) +{ + // Ensure that the CTR is big enough + bike_static_assert( + ((sizeof(s->ctr.u.qw[0]) == 8) && (BIT(33) >= MAX_AES_INVOKATION)), + ctr_size_is_too_small); + + if(0 == s->rem_invokations) + { + BIKE_ERROR(E_AES_OVER_USED); + } + + GUARD(aes256_enc(ct, s->ctr.u.bytes, &s->ks_ptr)); + + s->ctr.u.qw[0]++; + s->rem_invokations--; + + return SUCCESS; +} + +ret_t +aes_ctr_prf(OUT uint8_t *a, IN OUT aes_ctr_prf_state_t *s, IN const uint32_t len) +{ + // When Len is smaller than whats left in the buffer + // No need in additional AES + if((len + s->pos) <= AES256_BLOCK_SIZE) + { + memcpy(a, &s->buffer.u.bytes[s->pos], len); + s->pos += len; + + return SUCCESS; + } + + // If s.pos != AES256_BLOCK_SIZE then copy whats left in the buffer + // Else copy zero bytes + uint32_t idx = AES256_BLOCK_SIZE - s->pos; + memcpy(a, &s->buffer.u.bytes[s->pos], idx); + + // Init s.pos + s->pos = 0; + + // Copy full AES blocks + while((len - idx) >= AES256_BLOCK_SIZE) + { + GUARD(perform_aes(&a[idx], s)); + idx += AES256_BLOCK_SIZE; + } + + GUARD(perform_aes(s->buffer.u.bytes, s)); + + // Copy the tail + s->pos = len - idx; + memcpy(&a[idx], s->buffer.u.bytes, s->pos); + + return SUCCESS; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/aes_ctr_prf.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/aes_ctr_prf.h index ac17d4ddd5..bfcdeebd4a 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/aes_ctr_prf.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/aes_ctr_prf.h @@ -1,49 +1,49 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -#include "aes_wrap.h" - -////////////////////////////// -// Types -///////////////////////////// - -typedef struct aes_ctr_prf_state_s -{ - uint128_t ctr; - uint128_t buffer; - aes256_ks_t ks_ptr; - uint32_t rem_invokations; - uint8_t pos; -} aes_ctr_prf_state_t; - -////////////////////////////// -// Methods -///////////////////////////// - -ret_t -init_aes_ctr_prf_state(OUT aes_ctr_prf_state_t *s, - IN uint32_t max_invokations, - IN const seed_t *seed); - -ret_t -aes_ctr_prf(OUT uint8_t *a, IN OUT aes_ctr_prf_state_t *s, IN uint32_t len); - -_INLINE_ void -finalize_aes_ctr_prf(IN OUT aes_ctr_prf_state_t *s) -{ - aes256_free_ks(&s->ks_ptr); - secure_clean((uint8_t *)s, sizeof(*s)); -} - -_INLINE_ void -aes_ctr_prf_state_cleanup(IN OUT aes_ctr_prf_state_t *s) -{ - finalize_aes_ctr_prf(s); -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +#include "aes_wrap.h" + +////////////////////////////// +// Types +///////////////////////////// + +typedef struct aes_ctr_prf_state_s +{ + uint128_t ctr; + uint128_t buffer; + aes256_ks_t ks_ptr; + uint32_t rem_invokations; + uint8_t pos; +} aes_ctr_prf_state_t; + +////////////////////////////// +// Methods +///////////////////////////// + +ret_t +init_aes_ctr_prf_state(OUT aes_ctr_prf_state_t *s, + IN uint32_t max_invokations, + IN const seed_t *seed); + +ret_t +aes_ctr_prf(OUT uint8_t *a, IN OUT aes_ctr_prf_state_t *s, IN uint32_t len); + +_INLINE_ void +finalize_aes_ctr_prf(IN OUT aes_ctr_prf_state_t *s) +{ + aes256_free_ks(&s->ks_ptr); + secure_clean((uint8_t *)s, sizeof(*s)); +} + +_INLINE_ void +aes_ctr_prf_state_cleanup(IN OUT aes_ctr_prf_state_t *s) +{ + finalize_aes_ctr_prf(s); +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/aes_wrap.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/aes_wrap.h index f0adc0bb52..1a377d1c15 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/aes_wrap.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/aes_wrap.h @@ -1,71 +1,71 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker, Shay Gueron, and Dusan Kostic, - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com, dkostic@amazon.com) - */ - -#pragma once - -#include "cleanup.h" -#include <openssl/evp.h> - -#define MAX_AES_INVOKATION (MASK(32)) - -#define AES256_KEY_SIZE (32U) -#define AES256_KEY_BITS (AES256_KEY_SIZE * 8) -#define AES256_BLOCK_SIZE (16U) -#define AES256_ROUNDS (14U) - -typedef ALIGN(16) struct aes256_key_s -{ - uint8_t raw[AES256_KEY_SIZE]; -} aes256_key_t; - -_INLINE_ void -aes256_key_cleanup(aes256_key_t *o) -{ - secure_clean(o->raw, sizeof(*o)); -} - -// Using OpenSSL structures -typedef EVP_CIPHER_CTX *aes256_ks_t; - -_INLINE_ ret_t -aes256_key_expansion(OUT aes256_ks_t *ks, IN const aes256_key_t *key) -{ - *ks = EVP_CIPHER_CTX_new(); - if(*ks == NULL) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - if(0 == EVP_EncryptInit_ex(*ks, EVP_aes_256_ecb(), NULL, key->raw, NULL)) - { - EVP_CIPHER_CTX_free(*ks); - *ks = NULL; - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - - EVP_CIPHER_CTX_set_padding(*ks, 0); - - return SUCCESS; -} - -_INLINE_ ret_t -aes256_enc(OUT uint8_t *ct, IN const uint8_t *pt, IN const aes256_ks_t *ks) -{ - int outlen = 0; - if(0 == EVP_EncryptUpdate(*ks, ct, &outlen, pt, AES256_BLOCK_SIZE)) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - return SUCCESS; -} - -_INLINE_ void -aes256_free_ks(OUT aes256_ks_t *ks) -{ - EVP_CIPHER_CTX_free(*ks); - *ks = NULL; -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker, Shay Gueron, and Dusan Kostic, + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com, dkostic@amazon.com) + */ + +#pragma once + +#include "cleanup.h" +#include <openssl/evp.h> + +#define MAX_AES_INVOKATION (MASK(32)) + +#define AES256_KEY_SIZE (32U) +#define AES256_KEY_BITS (AES256_KEY_SIZE * 8) +#define AES256_BLOCK_SIZE (16U) +#define AES256_ROUNDS (14U) + +typedef ALIGN(16) struct aes256_key_s +{ + uint8_t raw[AES256_KEY_SIZE]; +} aes256_key_t; + +_INLINE_ void +aes256_key_cleanup(aes256_key_t *o) +{ + secure_clean(o->raw, sizeof(*o)); +} + +// Using OpenSSL structures +typedef EVP_CIPHER_CTX *aes256_ks_t; + +_INLINE_ ret_t +aes256_key_expansion(OUT aes256_ks_t *ks, IN const aes256_key_t *key) +{ + *ks = EVP_CIPHER_CTX_new(); + if(*ks == NULL) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + if(0 == EVP_EncryptInit_ex(*ks, EVP_aes_256_ecb(), NULL, key->raw, NULL)) + { + EVP_CIPHER_CTX_free(*ks); + *ks = NULL; + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + + EVP_CIPHER_CTX_set_padding(*ks, 0); + + return SUCCESS; +} + +_INLINE_ ret_t +aes256_enc(OUT uint8_t *ct, IN const uint8_t *pt, IN const aes256_ks_t *ks) +{ + int outlen = 0; + if(0 == EVP_EncryptUpdate(*ks, ct, &outlen, pt, AES256_BLOCK_SIZE)) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + return SUCCESS; +} + +_INLINE_ void +aes256_free_ks(OUT aes256_ks_t *ks) +{ + EVP_CIPHER_CTX_free(*ks); + *ks = NULL; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/bike_defs.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/bike_defs.h index 34a221462b..b64014abf3 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/bike_defs.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/bike_defs.h @@ -1,107 +1,107 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -#include "defs.h" - -#define LEVEL 1 - -//////////////////////////////////////////// -// BIKE Parameters -/////////////////////////////////////////// -#define N0 2 - -#ifndef LEVEL -# define LEVEL 1 -#endif - -#if(LEVEL == 3) -# ifdef INDCPA -# define R_BITS 19853 -# else -# define R_BITS 24821 -# endif -# define DV 103 -# define T1 199 - -# define THRESHOLD_COEFF0 15.932 -# define THRESHOLD_COEFF1 0.0052936 - -// The gfm code is optimized to a block size in this case: -# define BLOCK_SIZE 32768 -#elif(LEVEL == 1) -// 64-bits of post-quantum security parameters (BIKE paper): -# ifdef INDCPA -# define R_BITS 10163 -# else -# define R_BITS 11779 -# endif -# define DV 71 -# define T1 134 - -# define THRESHOLD_COEFF0 13.530 -# define THRESHOLD_COEFF1 0.0069721 - -// The gfm code is optimized to a block size in this case: -# define BLOCK_SIZE (16384) -#else -# error "Bad level, choose one of 1/3" -#endif - -#ifdef INDCPA -# define NUM_OF_SEEDS 2 -#else -# define NUM_OF_SEEDS 3 -#endif - -// Round the size to the nearest byte. -// SIZE suffix, is the number of bytes (uint8_t). -#define N_BITS (R_BITS * N0) -#define R_SIZE DIVIDE_AND_CEIL(R_BITS, 8) -#define R_QW DIVIDE_AND_CEIL(R_BITS, 8 * QW_SIZE) -#define R_YMM DIVIDE_AND_CEIL(R_BITS, 8 * YMM_SIZE) -#define R_ZMM DIVIDE_AND_CEIL(R_BITS, 8 * ZMM_SIZE) - -#define N_SIZE DIVIDE_AND_CEIL(N_BITS, 8) - -#define R_BLOCKS DIVIDE_AND_CEIL(R_BITS, BLOCK_SIZE) -#define R_PADDED (R_BLOCKS * BLOCK_SIZE) -#define R_PADDED_SIZE (R_PADDED / 8) -#define R_PADDED_QW (R_PADDED / 64) - -#define N_BLOCKS DIVIDE_AND_CEIL(N_BITS, BLOCK_SIZE) -#define N_PADDED (N_BLOCKS * BLOCK_SIZE) -#define N_PADDED_SIZE (N_PADDED / 8) -#define N_PADDED_QW (N_PADDED / 64) - -#define R_DDQWORDS_BITS (DIVIDE_AND_CEIL(R_BITS, ALL_YMM_SIZE) * ALL_YMM_SIZE) -bike_static_assert((R_BITS % ALL_YMM_SIZE != 0), rbits_512_err); - -#define N_DDQWORDS_BITS (R_DDQWORDS_BITS + R_BITS) -bike_static_assert((N_BITS % ALL_YMM_SIZE != 0), nbits_512_err); - -#define LAST_R_QW_LEAD (R_BITS & MASK(6)) -#define LAST_R_QW_TRAIL (64 - LAST_R_QW_LEAD) -#define LAST_R_QW_MASK MASK(LAST_R_QW_LEAD) - -#define LAST_R_BYTE_LEAD (R_BITS & MASK(3)) -#define LAST_R_BYTE_TRAIL (8 - LAST_R_BYTE_LEAD) -#define LAST_R_BYTE_MASK MASK(LAST_R_BYTE_LEAD) - -// BIKE auxiliary functions parameters: -#define ELL_K_BITS 256 -#define ELL_K_SIZE (ELL_K_BITS / 8) - -//////////////////////////////// -// Parameters for the BG decoder. -//////////////////////////////// -#define DELTA 3 -#define SLICES (LOG2_MSB(DV) + 1) - -#define BGF_DECODER +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +#include "defs.h" + +#define LEVEL 1 + +//////////////////////////////////////////// +// BIKE Parameters +/////////////////////////////////////////// +#define N0 2 + +#ifndef LEVEL +# define LEVEL 1 +#endif + +#if(LEVEL == 3) +# ifdef INDCPA +# define R_BITS 19853 +# else +# define R_BITS 24821 +# endif +# define DV 103 +# define T1 199 + +# define THRESHOLD_COEFF0 15.932 +# define THRESHOLD_COEFF1 0.0052936 + +// The gfm code is optimized to a block size in this case: +# define BLOCK_SIZE 32768 +#elif(LEVEL == 1) +// 64-bits of post-quantum security parameters (BIKE paper): +# ifdef INDCPA +# define R_BITS 10163 +# else +# define R_BITS 11779 +# endif +# define DV 71 +# define T1 134 + +# define THRESHOLD_COEFF0 13.530 +# define THRESHOLD_COEFF1 0.0069721 + +// The gfm code is optimized to a block size in this case: +# define BLOCK_SIZE (16384) +#else +# error "Bad level, choose one of 1/3" +#endif + +#ifdef INDCPA +# define NUM_OF_SEEDS 2 +#else +# define NUM_OF_SEEDS 3 +#endif + +// Round the size to the nearest byte. +// SIZE suffix, is the number of bytes (uint8_t). +#define N_BITS (R_BITS * N0) +#define R_SIZE DIVIDE_AND_CEIL(R_BITS, 8) +#define R_QW DIVIDE_AND_CEIL(R_BITS, 8 * QW_SIZE) +#define R_YMM DIVIDE_AND_CEIL(R_BITS, 8 * YMM_SIZE) +#define R_ZMM DIVIDE_AND_CEIL(R_BITS, 8 * ZMM_SIZE) + +#define N_SIZE DIVIDE_AND_CEIL(N_BITS, 8) + +#define R_BLOCKS DIVIDE_AND_CEIL(R_BITS, BLOCK_SIZE) +#define R_PADDED (R_BLOCKS * BLOCK_SIZE) +#define R_PADDED_SIZE (R_PADDED / 8) +#define R_PADDED_QW (R_PADDED / 64) + +#define N_BLOCKS DIVIDE_AND_CEIL(N_BITS, BLOCK_SIZE) +#define N_PADDED (N_BLOCKS * BLOCK_SIZE) +#define N_PADDED_SIZE (N_PADDED / 8) +#define N_PADDED_QW (N_PADDED / 64) + +#define R_DDQWORDS_BITS (DIVIDE_AND_CEIL(R_BITS, ALL_YMM_SIZE) * ALL_YMM_SIZE) +bike_static_assert((R_BITS % ALL_YMM_SIZE != 0), rbits_512_err); + +#define N_DDQWORDS_BITS (R_DDQWORDS_BITS + R_BITS) +bike_static_assert((N_BITS % ALL_YMM_SIZE != 0), nbits_512_err); + +#define LAST_R_QW_LEAD (R_BITS & MASK(6)) +#define LAST_R_QW_TRAIL (64 - LAST_R_QW_LEAD) +#define LAST_R_QW_MASK MASK(LAST_R_QW_LEAD) + +#define LAST_R_BYTE_LEAD (R_BITS & MASK(3)) +#define LAST_R_BYTE_TRAIL (8 - LAST_R_BYTE_LEAD) +#define LAST_R_BYTE_MASK MASK(LAST_R_BYTE_LEAD) + +// BIKE auxiliary functions parameters: +#define ELL_K_BITS 256 +#define ELL_K_SIZE (ELL_K_BITS / 8) + +//////////////////////////////// +// Parameters for the BG decoder. +//////////////////////////////// +#define DELTA 3 +#define SLICES (LOG2_MSB(DV) + 1) + +#define BGF_DECODER diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/bike_r2_kem.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/bike_r2_kem.c index 3539827d14..730cb2f826 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/bike_r2_kem.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/bike_r2_kem.c @@ -1,374 +1,374 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker, Shay Gueron, and Dusan Kostic, - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com, dkostic@amazon.com) - */ - -#include "decode.h" -#include "gf2x.h" -#include "sampling.h" -#include "sha.h" -#include "tls/s2n_kem.h" - -_INLINE_ void -split_e(OUT split_e_t *splitted_e, IN const e_t *e) -{ - // Copy lower bytes (e0) - memcpy(splitted_e->val[0].raw, e->raw, R_SIZE); - - // Now load second value - for(uint32_t i = R_SIZE; i < N_SIZE; ++i) - { - splitted_e->val[1].raw[i - R_SIZE] = - ((e->raw[i] << LAST_R_BYTE_TRAIL) | (e->raw[i - 1] >> LAST_R_BYTE_LEAD)); - } - - // Fix corner case - if(N_SIZE < (2ULL * R_SIZE)) - { - splitted_e->val[1].raw[R_SIZE - 1] = (e->raw[N_SIZE - 1] >> LAST_R_BYTE_LEAD); - } - - // Fix last value - splitted_e->val[0].raw[R_SIZE - 1] &= LAST_R_BYTE_MASK; - splitted_e->val[1].raw[R_SIZE - 1] &= LAST_R_BYTE_MASK; -} - -_INLINE_ void -translate_hash_to_ss(OUT ss_t *ss, IN sha_hash_t *hash) -{ - bike_static_assert(sizeof(*hash) >= sizeof(*ss), hash_size_lt_ss_size); - memcpy(ss->raw, hash->u.raw, sizeof(*ss)); -} - -_INLINE_ void -translate_hash_to_seed(OUT seed_t *seed, IN sha_hash_t *hash) -{ - bike_static_assert(sizeof(*hash) >= sizeof(*seed), hash_size_lt_seed_size); - memcpy(seed->raw, hash->u.raw, sizeof(*seed)); -} - -_INLINE_ ret_t -calc_pk(OUT pk_t *pk, IN const seed_t *g_seed, IN const pad_sk_t p_sk) -{ - // PK is dbl padded because modmul require some scratch space for the - // multiplication result - dbl_pad_pk_t p_pk = {0}; - - // Intialized padding to zero - DEFER_CLEANUP(padded_r_t g = {0}, padded_r_cleanup); - - GUARD(sample_uniform_r_bits(&g.val, g_seed, MUST_BE_ODD)); - - // Calculate (g0, g1) = (g*h1, g*h0) - GUARD(gf2x_mod_mul((uint64_t *)&p_pk[0], (const uint64_t *)&g, - (const uint64_t *)&p_sk[1])); - GUARD(gf2x_mod_mul((uint64_t *)&p_pk[1], (const uint64_t *)&g, - (const uint64_t *)&p_sk[0])); - - // Copy the data to the output parameters. - pk->val[0] = p_pk[0].val; - pk->val[1] = p_pk[1].val; - - print("g: ", (const uint64_t *)g.val.raw, R_BITS); - print("g0: ", (const uint64_t *)&p_pk[0], R_BITS); - print("g1: ", (uint64_t *)&p_pk[1], R_BITS); - - return SUCCESS; -} - -// The function H is required by BIKE-1- Round 2 variant. It uses the -// extract-then-expand paradigm, based on SHA384 and AES256-CTR PRNG, to produce -// e from (m*f0, m*f1): -_INLINE_ ret_t -function_h(OUT split_e_t *splitted_e, IN const r_t *in0, IN const r_t *in1) -{ - DEFER_CLEANUP(generic_param_n_t tmp, generic_param_n_cleanup); - DEFER_CLEANUP(sha_hash_t hash_seed = {0}, sha_hash_cleanup); - DEFER_CLEANUP(seed_t seed_for_hash, seed_cleanup); - DEFER_CLEANUP(aes_ctr_prf_state_t prf_state = {0}, finalize_aes_ctr_prf); - - tmp.val[0] = *in0; - tmp.val[1] = *in1; - - // Hash (m*f0, m*f1) to generate a seed: - sha(&hash_seed, sizeof(tmp), (uint8_t *)&tmp); - - // Format the seed as a 32-bytes input: - translate_hash_to_seed(&seed_for_hash, &hash_seed); - - // Use the seed to generate a sparse error vector e: - DMSG(" Generating random error.\n"); - GUARD(init_aes_ctr_prf_state(&prf_state, MAX_AES_INVOKATION, &seed_for_hash)); - - DEFER_CLEANUP(padded_e_t e, padded_e_cleanup); - DEFER_CLEANUP(ALIGN(8) compressed_idx_t_t dummy, compressed_idx_t_cleanup); - - GUARD(generate_sparse_rep((uint64_t *)&e, dummy.val, T1, N_BITS, sizeof(e), - &prf_state)); - split_e(splitted_e, &e.val); - - return SUCCESS; -} - -_INLINE_ ret_t -encrypt(OUT ct_t *ct, OUT split_e_t *mf, IN const pk_t *pk, IN const seed_t *seed) -{ - DEFER_CLEANUP(padded_r_t m = {0}, padded_r_cleanup); - - DMSG(" Sampling m.\n"); - GUARD(sample_uniform_r_bits(&m.val, seed, NO_RESTRICTION)); - - // Pad the public key - pad_pk_t p_pk = {0}; - p_pk[0].val = pk->val[0]; - p_pk[1].val = pk->val[1]; - - // Pad the ciphertext - pad_ct_t p_ct = {0}; - p_ct[0].val = ct->val[0]; - p_ct[1].val = ct->val[1]; - - DEFER_CLEANUP(dbl_pad_ct_t mf_int = {0}, dbl_pad_ct_cleanup); - - DMSG(" Computing m*f0 and m*f1.\n"); - GUARD( - gf2x_mod_mul((uint64_t *)&mf_int[0], (uint64_t *)&m, (uint64_t *)&p_pk[0])); - GUARD( - gf2x_mod_mul((uint64_t *)&mf_int[1], (uint64_t *)&m, (uint64_t *)&p_pk[1])); - - DEFER_CLEANUP(split_e_t splitted_e, split_e_cleanup); - - DMSG(" Computing the hash function e <- H(m*f0, m*f1).\n"); - GUARD(function_h(&splitted_e, &mf_int[0].val, &mf_int[1].val)); - - DMSG(" Addding Error to the ciphertext.\n"); - GUARD(gf2x_add(p_ct[0].val.raw, mf_int[0].val.raw, splitted_e.val[0].raw, - R_SIZE)); - GUARD(gf2x_add(p_ct[1].val.raw, mf_int[1].val.raw, splitted_e.val[1].raw, - R_SIZE)); - - // Copy the data to the output parameters. - ct->val[0] = p_ct[0].val; - ct->val[1] = p_ct[1].val; - - // Copy the internal mf to the output parameters. - mf->val[0] = mf_int[0].val; - mf->val[1] = mf_int[1].val; - - print("e0: ", (uint64_t *)splitted_e.val[0].raw, R_BITS); - print("e1: ", (uint64_t *)splitted_e.val[1].raw, R_BITS); - print("c0: ", (uint64_t *)p_ct[0].val.raw, R_BITS); - print("c1: ", (uint64_t *)p_ct[1].val.raw, R_BITS); - - return SUCCESS; -} - -_INLINE_ ret_t -reencrypt(OUT pad_ct_t ce, - OUT split_e_t *e2, - IN const split_e_t *e, - IN const ct_t *l_ct) -{ - // Compute (c0 + e0') and (c1 + e1') - GUARD(gf2x_add(ce[0].val.raw, l_ct->val[0].raw, e->val[0].raw, R_SIZE)); - GUARD(gf2x_add(ce[1].val.raw, l_ct->val[1].raw, e->val[1].raw, R_SIZE)); - - // (e0'', e1'') <-- H(c0 + e0', c1 + e1') - GUARD(function_h(e2, &ce[0].val, &ce[1].val)); - - return SUCCESS; -} - -// Generate the Shared Secret K(mf0, mf1, c) by either -// K(c0+e0', c1+e1', c) or K(sigma0, sigma1, c) -_INLINE_ void -get_ss(OUT ss_t *out, IN const r_t *in0, IN const r_t *in1, IN const ct_t *ct) -{ - DMSG(" Enter get_ss.\n"); - - uint8_t tmp[4 * R_SIZE]; - memcpy(tmp, in0->raw, R_SIZE); - memcpy(tmp + R_SIZE, in1->raw, R_SIZE); - memcpy(tmp + 2 * R_SIZE, ct, sizeof(*ct)); - - // Calculate the hash digest - DEFER_CLEANUP(sha_hash_t hash = {0}, sha_hash_cleanup); - sha(&hash, sizeof(tmp), tmp); - - // Truncate the resulting digest, to produce the key K, by copying only the - // desired number of LSBs. - translate_hash_to_ss(out, &hash); - - secure_clean(tmp, sizeof(tmp)); - DMSG(" Exit get_ss.\n"); -} -//////////////////////////////////////////////////////////////////////////////// -// The three APIs below (keypair, encapsulate, decapsulate) are defined by NIST: -//////////////////////////////////////////////////////////////////////////////// -int -BIKE1_L1_R2_crypto_kem_keypair(OUT unsigned char *pk, OUT unsigned char *sk) -{ - notnull_check(sk); - notnull_check(pk); - - // Convert to this implementation types - pk_t *l_pk = (pk_t *)pk; - DEFER_CLEANUP(ALIGN(8) sk_t l_sk = {0}, sk_cleanup); - - // For DRBG and AES_PRF - DEFER_CLEANUP(seeds_t seeds = {0}, seeds_cleanup); - DEFER_CLEANUP(aes_ctr_prf_state_t h_prf_state = {0}, aes_ctr_prf_state_cleanup); - - // For sigma0/1/2 - DEFER_CLEANUP(aes_ctr_prf_state_t s_prf_state = {0}, aes_ctr_prf_state_cleanup); - - // Padded for internal use only (the padded data is not released). - DEFER_CLEANUP(pad_sk_t p_sk = {0}, pad_sk_cleanup); - - // Get the entropy seeds. - GUARD(get_seeds(&seeds)); - - DMSG(" Enter crypto_kem_keypair.\n"); - DMSG(" Calculating the secret key.\n"); - - // h0 and h1 use the same context - GUARD(init_aes_ctr_prf_state(&h_prf_state, MAX_AES_INVOKATION, &seeds.seed[0])); - - // sigma0/1/2 use the same context. - GUARD(init_aes_ctr_prf_state(&s_prf_state, MAX_AES_INVOKATION, &seeds.seed[2])); - - GUARD(generate_sparse_rep((uint64_t *)&p_sk[0], l_sk.wlist[0].val, DV, R_BITS, - sizeof(p_sk[0]), &h_prf_state)); - - // Sample the sigmas - GUARD(sample_uniform_r_bits_with_fixed_prf_context(&l_sk.sigma0, &s_prf_state, - NO_RESTRICTION)); - GUARD(sample_uniform_r_bits_with_fixed_prf_context(&l_sk.sigma1, &s_prf_state, - NO_RESTRICTION)); - - GUARD(generate_sparse_rep((uint64_t *)&p_sk[1], l_sk.wlist[1].val, DV, R_BITS, - sizeof(p_sk[1]), &h_prf_state)); - - // Copy data - l_sk.bin[0] = p_sk[0].val; - l_sk.bin[1] = p_sk[1].val; - - DMSG(" Calculating the public key.\n"); - - GUARD(calc_pk(l_pk, &seeds.seed[1], p_sk)); - - memcpy(sk, &l_sk, sizeof(l_sk)); - - print("h0: ", (uint64_t *)&l_sk.bin[0], R_BITS); - print("h1: ", (uint64_t *)&l_sk.bin[1], R_BITS); - print("h0c:", (uint64_t *)&l_sk.wlist[0], SIZEOF_BITS(compressed_idx_dv_t)); - print("h1c:", (uint64_t *)&l_sk.wlist[1], SIZEOF_BITS(compressed_idx_dv_t)); - print("sigma0: ", (uint64_t *)l_sk.sigma0.raw, R_BITS); - print("sigma1: ", (uint64_t *)l_sk.sigma1.raw, R_BITS); - - DMSG(" Exit crypto_kem_keypair.\n"); - - return SUCCESS; -} - -// Encapsulate - pk is the public key, -// ct is a key encapsulation message (ciphertext), -// ss is the shared secret. -int -BIKE1_L1_R2_crypto_kem_enc(OUT unsigned char * ct, - OUT unsigned char * ss, - IN const unsigned char *pk) -{ - DMSG(" Enter crypto_kem_enc.\n"); - - // Convert to the types that are used by this implementation - const pk_t *l_pk = (const pk_t *)pk; - ct_t * l_ct = (ct_t *)ct; - ss_t * l_ss = (ss_t *)ss; - - notnull_check(pk); - notnull_check(ct); - notnull_check(ss); - - // For NIST DRBG_CTR - DEFER_CLEANUP(seeds_t seeds = {0}, seeds_cleanup); - - // Get the entropy seeds. - GUARD(get_seeds(&seeds)); - - DMSG(" Encrypting.\n"); - // In fact, seed[0] should be used. - // Here, we stay consistent with BIKE's reference code - // that chooses the seconde seed. - DEFER_CLEANUP(split_e_t mf, split_e_cleanup); - GUARD(encrypt(l_ct, &mf, l_pk, &seeds.seed[1])); - - DMSG(" Generating shared secret.\n"); - get_ss(l_ss, &mf.val[0], &mf.val[1], l_ct); - - print("ss: ", (uint64_t *)l_ss->raw, SIZEOF_BITS(*l_ss)); - DMSG(" Exit crypto_kem_enc.\n"); - return SUCCESS; -} - -// Decapsulate - ct is a key encapsulation message (ciphertext), -// sk is the private key, -// ss is the shared secret -int -BIKE1_L1_R2_crypto_kem_dec(OUT unsigned char * ss, - IN const unsigned char *ct, - IN const unsigned char *sk) -{ - DMSG(" Enter crypto_kem_dec.\n"); - - // Convert to the types used by this implementation - const ct_t *l_ct = (const ct_t *)ct; - ss_t * l_ss = (ss_t *)ss; - notnull_check(sk); - notnull_check(ct); - notnull_check(ss); - - DEFER_CLEANUP(ALIGN(8) sk_t l_sk, sk_cleanup); - memcpy(&l_sk, sk, sizeof(l_sk)); - - // Force zero initialization. - DEFER_CLEANUP(syndrome_t syndrome = {0}, syndrome_cleanup); - DEFER_CLEANUP(split_e_t e, split_e_cleanup); - - DMSG(" Computing s.\n"); - GUARD(compute_syndrome(&syndrome, l_ct, &l_sk)); - - DMSG(" Decoding.\n"); - uint32_t dec_ret = decode(&e, &syndrome, l_ct, &l_sk) != SUCCESS ? 0 : 1; - - DEFER_CLEANUP(split_e_t e2, split_e_cleanup); - DEFER_CLEANUP(pad_ct_t ce, pad_ct_cleanup); - GUARD(reencrypt(ce, &e2, &e, l_ct)); - - // Check if the decoding is successful. - // Check if the error weight equals T1. - // Check if (e0', e1') == (e0'', e1''). - volatile uint32_t success_cond; - success_cond = dec_ret; - success_cond &= secure_cmp32(T1, r_bits_vector_weight(&e.val[0]) + - r_bits_vector_weight(&e.val[1])); - success_cond &= secure_cmp((uint8_t *)&e, (uint8_t *)&e2, sizeof(e)); - - ss_t ss_succ = {0}; - ss_t ss_fail = {0}; - - get_ss(&ss_succ, &ce[0].val, &ce[1].val, l_ct); - get_ss(&ss_fail, &l_sk.sigma0, &l_sk.sigma1, l_ct); - - uint8_t mask = ~secure_l32_mask(0, success_cond); - for(uint32_t i = 0; i < sizeof(*l_ss); i++) - { - l_ss->raw[i] = (mask & ss_succ.raw[i]) | (~mask & ss_fail.raw[i]); - } - - DMSG(" Exit crypto_kem_dec.\n"); - return SUCCESS; -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker, Shay Gueron, and Dusan Kostic, + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com, dkostic@amazon.com) + */ + +#include "decode.h" +#include "gf2x.h" +#include "sampling.h" +#include "sha.h" +#include "tls/s2n_kem.h" + +_INLINE_ void +split_e(OUT split_e_t *splitted_e, IN const e_t *e) +{ + // Copy lower bytes (e0) + memcpy(splitted_e->val[0].raw, e->raw, R_SIZE); + + // Now load second value + for(uint32_t i = R_SIZE; i < N_SIZE; ++i) + { + splitted_e->val[1].raw[i - R_SIZE] = + ((e->raw[i] << LAST_R_BYTE_TRAIL) | (e->raw[i - 1] >> LAST_R_BYTE_LEAD)); + } + + // Fix corner case + if(N_SIZE < (2ULL * R_SIZE)) + { + splitted_e->val[1].raw[R_SIZE - 1] = (e->raw[N_SIZE - 1] >> LAST_R_BYTE_LEAD); + } + + // Fix last value + splitted_e->val[0].raw[R_SIZE - 1] &= LAST_R_BYTE_MASK; + splitted_e->val[1].raw[R_SIZE - 1] &= LAST_R_BYTE_MASK; +} + +_INLINE_ void +translate_hash_to_ss(OUT ss_t *ss, IN sha_hash_t *hash) +{ + bike_static_assert(sizeof(*hash) >= sizeof(*ss), hash_size_lt_ss_size); + memcpy(ss->raw, hash->u.raw, sizeof(*ss)); +} + +_INLINE_ void +translate_hash_to_seed(OUT seed_t *seed, IN sha_hash_t *hash) +{ + bike_static_assert(sizeof(*hash) >= sizeof(*seed), hash_size_lt_seed_size); + memcpy(seed->raw, hash->u.raw, sizeof(*seed)); +} + +_INLINE_ ret_t +calc_pk(OUT pk_t *pk, IN const seed_t *g_seed, IN const pad_sk_t p_sk) +{ + // PK is dbl padded because modmul require some scratch space for the + // multiplication result + dbl_pad_pk_t p_pk = {0}; + + // Intialized padding to zero + DEFER_CLEANUP(padded_r_t g = {0}, padded_r_cleanup); + + GUARD(sample_uniform_r_bits(&g.val, g_seed, MUST_BE_ODD)); + + // Calculate (g0, g1) = (g*h1, g*h0) + GUARD(gf2x_mod_mul((uint64_t *)&p_pk[0], (const uint64_t *)&g, + (const uint64_t *)&p_sk[1])); + GUARD(gf2x_mod_mul((uint64_t *)&p_pk[1], (const uint64_t *)&g, + (const uint64_t *)&p_sk[0])); + + // Copy the data to the output parameters. + pk->val[0] = p_pk[0].val; + pk->val[1] = p_pk[1].val; + + print("g: ", (const uint64_t *)g.val.raw, R_BITS); + print("g0: ", (const uint64_t *)&p_pk[0], R_BITS); + print("g1: ", (uint64_t *)&p_pk[1], R_BITS); + + return SUCCESS; +} + +// The function H is required by BIKE-1- Round 2 variant. It uses the +// extract-then-expand paradigm, based on SHA384 and AES256-CTR PRNG, to produce +// e from (m*f0, m*f1): +_INLINE_ ret_t +function_h(OUT split_e_t *splitted_e, IN const r_t *in0, IN const r_t *in1) +{ + DEFER_CLEANUP(generic_param_n_t tmp, generic_param_n_cleanup); + DEFER_CLEANUP(sha_hash_t hash_seed = {0}, sha_hash_cleanup); + DEFER_CLEANUP(seed_t seed_for_hash, seed_cleanup); + DEFER_CLEANUP(aes_ctr_prf_state_t prf_state = {0}, finalize_aes_ctr_prf); + + tmp.val[0] = *in0; + tmp.val[1] = *in1; + + // Hash (m*f0, m*f1) to generate a seed: + sha(&hash_seed, sizeof(tmp), (uint8_t *)&tmp); + + // Format the seed as a 32-bytes input: + translate_hash_to_seed(&seed_for_hash, &hash_seed); + + // Use the seed to generate a sparse error vector e: + DMSG(" Generating random error.\n"); + GUARD(init_aes_ctr_prf_state(&prf_state, MAX_AES_INVOKATION, &seed_for_hash)); + + DEFER_CLEANUP(padded_e_t e, padded_e_cleanup); + DEFER_CLEANUP(ALIGN(8) compressed_idx_t_t dummy, compressed_idx_t_cleanup); + + GUARD(generate_sparse_rep((uint64_t *)&e, dummy.val, T1, N_BITS, sizeof(e), + &prf_state)); + split_e(splitted_e, &e.val); + + return SUCCESS; +} + +_INLINE_ ret_t +encrypt(OUT ct_t *ct, OUT split_e_t *mf, IN const pk_t *pk, IN const seed_t *seed) +{ + DEFER_CLEANUP(padded_r_t m = {0}, padded_r_cleanup); + + DMSG(" Sampling m.\n"); + GUARD(sample_uniform_r_bits(&m.val, seed, NO_RESTRICTION)); + + // Pad the public key + pad_pk_t p_pk = {0}; + p_pk[0].val = pk->val[0]; + p_pk[1].val = pk->val[1]; + + // Pad the ciphertext + pad_ct_t p_ct = {0}; + p_ct[0].val = ct->val[0]; + p_ct[1].val = ct->val[1]; + + DEFER_CLEANUP(dbl_pad_ct_t mf_int = {0}, dbl_pad_ct_cleanup); + + DMSG(" Computing m*f0 and m*f1.\n"); + GUARD( + gf2x_mod_mul((uint64_t *)&mf_int[0], (uint64_t *)&m, (uint64_t *)&p_pk[0])); + GUARD( + gf2x_mod_mul((uint64_t *)&mf_int[1], (uint64_t *)&m, (uint64_t *)&p_pk[1])); + + DEFER_CLEANUP(split_e_t splitted_e, split_e_cleanup); + + DMSG(" Computing the hash function e <- H(m*f0, m*f1).\n"); + GUARD(function_h(&splitted_e, &mf_int[0].val, &mf_int[1].val)); + + DMSG(" Addding Error to the ciphertext.\n"); + GUARD(gf2x_add(p_ct[0].val.raw, mf_int[0].val.raw, splitted_e.val[0].raw, + R_SIZE)); + GUARD(gf2x_add(p_ct[1].val.raw, mf_int[1].val.raw, splitted_e.val[1].raw, + R_SIZE)); + + // Copy the data to the output parameters. + ct->val[0] = p_ct[0].val; + ct->val[1] = p_ct[1].val; + + // Copy the internal mf to the output parameters. + mf->val[0] = mf_int[0].val; + mf->val[1] = mf_int[1].val; + + print("e0: ", (uint64_t *)splitted_e.val[0].raw, R_BITS); + print("e1: ", (uint64_t *)splitted_e.val[1].raw, R_BITS); + print("c0: ", (uint64_t *)p_ct[0].val.raw, R_BITS); + print("c1: ", (uint64_t *)p_ct[1].val.raw, R_BITS); + + return SUCCESS; +} + +_INLINE_ ret_t +reencrypt(OUT pad_ct_t ce, + OUT split_e_t *e2, + IN const split_e_t *e, + IN const ct_t *l_ct) +{ + // Compute (c0 + e0') and (c1 + e1') + GUARD(gf2x_add(ce[0].val.raw, l_ct->val[0].raw, e->val[0].raw, R_SIZE)); + GUARD(gf2x_add(ce[1].val.raw, l_ct->val[1].raw, e->val[1].raw, R_SIZE)); + + // (e0'', e1'') <-- H(c0 + e0', c1 + e1') + GUARD(function_h(e2, &ce[0].val, &ce[1].val)); + + return SUCCESS; +} + +// Generate the Shared Secret K(mf0, mf1, c) by either +// K(c0+e0', c1+e1', c) or K(sigma0, sigma1, c) +_INLINE_ void +get_ss(OUT ss_t *out, IN const r_t *in0, IN const r_t *in1, IN const ct_t *ct) +{ + DMSG(" Enter get_ss.\n"); + + uint8_t tmp[4 * R_SIZE]; + memcpy(tmp, in0->raw, R_SIZE); + memcpy(tmp + R_SIZE, in1->raw, R_SIZE); + memcpy(tmp + 2 * R_SIZE, ct, sizeof(*ct)); + + // Calculate the hash digest + DEFER_CLEANUP(sha_hash_t hash = {0}, sha_hash_cleanup); + sha(&hash, sizeof(tmp), tmp); + + // Truncate the resulting digest, to produce the key K, by copying only the + // desired number of LSBs. + translate_hash_to_ss(out, &hash); + + secure_clean(tmp, sizeof(tmp)); + DMSG(" Exit get_ss.\n"); +} +//////////////////////////////////////////////////////////////////////////////// +// The three APIs below (keypair, encapsulate, decapsulate) are defined by NIST: +//////////////////////////////////////////////////////////////////////////////// +int +BIKE1_L1_R2_crypto_kem_keypair(OUT unsigned char *pk, OUT unsigned char *sk) +{ + notnull_check(sk); + notnull_check(pk); + + // Convert to this implementation types + pk_t *l_pk = (pk_t *)pk; + DEFER_CLEANUP(ALIGN(8) sk_t l_sk = {0}, sk_cleanup); + + // For DRBG and AES_PRF + DEFER_CLEANUP(seeds_t seeds = {0}, seeds_cleanup); + DEFER_CLEANUP(aes_ctr_prf_state_t h_prf_state = {0}, aes_ctr_prf_state_cleanup); + + // For sigma0/1/2 + DEFER_CLEANUP(aes_ctr_prf_state_t s_prf_state = {0}, aes_ctr_prf_state_cleanup); + + // Padded for internal use only (the padded data is not released). + DEFER_CLEANUP(pad_sk_t p_sk = {0}, pad_sk_cleanup); + + // Get the entropy seeds. + GUARD(get_seeds(&seeds)); + + DMSG(" Enter crypto_kem_keypair.\n"); + DMSG(" Calculating the secret key.\n"); + + // h0 and h1 use the same context + GUARD(init_aes_ctr_prf_state(&h_prf_state, MAX_AES_INVOKATION, &seeds.seed[0])); + + // sigma0/1/2 use the same context. + GUARD(init_aes_ctr_prf_state(&s_prf_state, MAX_AES_INVOKATION, &seeds.seed[2])); + + GUARD(generate_sparse_rep((uint64_t *)&p_sk[0], l_sk.wlist[0].val, DV, R_BITS, + sizeof(p_sk[0]), &h_prf_state)); + + // Sample the sigmas + GUARD(sample_uniform_r_bits_with_fixed_prf_context(&l_sk.sigma0, &s_prf_state, + NO_RESTRICTION)); + GUARD(sample_uniform_r_bits_with_fixed_prf_context(&l_sk.sigma1, &s_prf_state, + NO_RESTRICTION)); + + GUARD(generate_sparse_rep((uint64_t *)&p_sk[1], l_sk.wlist[1].val, DV, R_BITS, + sizeof(p_sk[1]), &h_prf_state)); + + // Copy data + l_sk.bin[0] = p_sk[0].val; + l_sk.bin[1] = p_sk[1].val; + + DMSG(" Calculating the public key.\n"); + + GUARD(calc_pk(l_pk, &seeds.seed[1], p_sk)); + + memcpy(sk, &l_sk, sizeof(l_sk)); + + print("h0: ", (uint64_t *)&l_sk.bin[0], R_BITS); + print("h1: ", (uint64_t *)&l_sk.bin[1], R_BITS); + print("h0c:", (uint64_t *)&l_sk.wlist[0], SIZEOF_BITS(compressed_idx_dv_t)); + print("h1c:", (uint64_t *)&l_sk.wlist[1], SIZEOF_BITS(compressed_idx_dv_t)); + print("sigma0: ", (uint64_t *)l_sk.sigma0.raw, R_BITS); + print("sigma1: ", (uint64_t *)l_sk.sigma1.raw, R_BITS); + + DMSG(" Exit crypto_kem_keypair.\n"); + + return SUCCESS; +} + +// Encapsulate - pk is the public key, +// ct is a key encapsulation message (ciphertext), +// ss is the shared secret. +int +BIKE1_L1_R2_crypto_kem_enc(OUT unsigned char * ct, + OUT unsigned char * ss, + IN const unsigned char *pk) +{ + DMSG(" Enter crypto_kem_enc.\n"); + + // Convert to the types that are used by this implementation + const pk_t *l_pk = (const pk_t *)pk; + ct_t * l_ct = (ct_t *)ct; + ss_t * l_ss = (ss_t *)ss; + + notnull_check(pk); + notnull_check(ct); + notnull_check(ss); + + // For NIST DRBG_CTR + DEFER_CLEANUP(seeds_t seeds = {0}, seeds_cleanup); + + // Get the entropy seeds. + GUARD(get_seeds(&seeds)); + + DMSG(" Encrypting.\n"); + // In fact, seed[0] should be used. + // Here, we stay consistent with BIKE's reference code + // that chooses the seconde seed. + DEFER_CLEANUP(split_e_t mf, split_e_cleanup); + GUARD(encrypt(l_ct, &mf, l_pk, &seeds.seed[1])); + + DMSG(" Generating shared secret.\n"); + get_ss(l_ss, &mf.val[0], &mf.val[1], l_ct); + + print("ss: ", (uint64_t *)l_ss->raw, SIZEOF_BITS(*l_ss)); + DMSG(" Exit crypto_kem_enc.\n"); + return SUCCESS; +} + +// Decapsulate - ct is a key encapsulation message (ciphertext), +// sk is the private key, +// ss is the shared secret +int +BIKE1_L1_R2_crypto_kem_dec(OUT unsigned char * ss, + IN const unsigned char *ct, + IN const unsigned char *sk) +{ + DMSG(" Enter crypto_kem_dec.\n"); + + // Convert to the types used by this implementation + const ct_t *l_ct = (const ct_t *)ct; + ss_t * l_ss = (ss_t *)ss; + notnull_check(sk); + notnull_check(ct); + notnull_check(ss); + + DEFER_CLEANUP(ALIGN(8) sk_t l_sk, sk_cleanup); + memcpy(&l_sk, sk, sizeof(l_sk)); + + // Force zero initialization. + DEFER_CLEANUP(syndrome_t syndrome = {0}, syndrome_cleanup); + DEFER_CLEANUP(split_e_t e, split_e_cleanup); + + DMSG(" Computing s.\n"); + GUARD(compute_syndrome(&syndrome, l_ct, &l_sk)); + + DMSG(" Decoding.\n"); + uint32_t dec_ret = decode(&e, &syndrome, l_ct, &l_sk) != SUCCESS ? 0 : 1; + + DEFER_CLEANUP(split_e_t e2, split_e_cleanup); + DEFER_CLEANUP(pad_ct_t ce, pad_ct_cleanup); + GUARD(reencrypt(ce, &e2, &e, l_ct)); + + // Check if the decoding is successful. + // Check if the error weight equals T1. + // Check if (e0', e1') == (e0'', e1''). + volatile uint32_t success_cond; + success_cond = dec_ret; + success_cond &= secure_cmp32(T1, r_bits_vector_weight(&e.val[0]) + + r_bits_vector_weight(&e.val[1])); + success_cond &= secure_cmp((uint8_t *)&e, (uint8_t *)&e2, sizeof(e)); + + ss_t ss_succ = {0}; + ss_t ss_fail = {0}; + + get_ss(&ss_succ, &ce[0].val, &ce[1].val, l_ct); + get_ss(&ss_fail, &l_sk.sigma0, &l_sk.sigma1, l_ct); + + uint8_t mask = ~secure_l32_mask(0, success_cond); + for(uint32_t i = 0; i < sizeof(*l_ss); i++) + { + l_ss->raw[i] = (mask & ss_succ.raw[i]) | (~mask & ss_fail.raw[i]); + } + + DMSG(" Exit crypto_kem_dec.\n"); + return SUCCESS; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/cleanup.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/cleanup.h index 6bacfaa45a..67205216d3 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/cleanup.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/cleanup.h @@ -1,131 +1,131 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once -#include "types.h" -#include "utils/s2n_safety.h" - -_INLINE_ void -secure_clean(OUT uint8_t *p, IN const uint32_t len) -{ -#ifdef _WIN32 - SecureZeroMemory(p, len); -#else - typedef void *(*memset_t)(void *, int, size_t); - static volatile memset_t memset_func = memset; - memset_func(p, 0, len); -#endif -} - -_INLINE_ void -r_cleanup(IN OUT r_t *o) -{ - secure_clean((uint8_t *)o, sizeof(*o)); -} - -_INLINE_ void -e_cleanup(IN OUT e_t *o) -{ - secure_clean((uint8_t *)o, sizeof(*o)); -} - -_INLINE_ void -padded_r_cleanup(IN OUT padded_r_t *o) -{ - secure_clean((uint8_t *)o, sizeof(*o)); -} - -_INLINE_ void -padded_e_cleanup(IN OUT padded_e_t *o) -{ - secure_clean((uint8_t *)o, sizeof(*o)); -} - -_INLINE_ void -split_e_cleanup(IN OUT split_e_t *o) -{ - secure_clean((uint8_t *)o, sizeof(*o)); -} - -_INLINE_ void -sk_cleanup(IN OUT sk_t *o) -{ - secure_clean((uint8_t *)o, sizeof(*o)); -} - -_INLINE_ void -pad_sk_cleanup(IN OUT pad_sk_t *o) -{ - secure_clean((uint8_t *)o[0], sizeof(*o)); -} - -_INLINE_ void -pad_ct_cleanup(IN OUT pad_ct_t *o) -{ - secure_clean((uint8_t *)o[0], sizeof(*o)); -} - -_INLINE_ void -dbl_pad_ct_cleanup(IN OUT dbl_pad_ct_t *o) -{ - secure_clean((uint8_t *)o[0], sizeof(*o)); -} - -_INLINE_ void -seed_cleanup(IN OUT seed_t *o) -{ - secure_clean((uint8_t *)o, sizeof(*o)); -} - -_INLINE_ void -syndrome_cleanup(IN OUT syndrome_t *o) -{ - secure_clean((uint8_t *)o, sizeof(*o)); -} - -_INLINE_ void -dbl_pad_syndrome_cleanup(IN OUT dbl_pad_syndrome_t *o) -{ - secure_clean((uint8_t *)o[0], sizeof(*o)); -} - -_INLINE_ void -compressed_idx_t_cleanup(IN OUT compressed_idx_t_t *o) -{ - secure_clean((uint8_t *)o, sizeof(*o)); -} - -_INLINE_ void -compressed_idx_dv_ar_cleanup(IN OUT compressed_idx_dv_ar_t *o) -{ - for(int i = 0; i < N0; i++) - { - secure_clean((uint8_t *)&(*o)[i], sizeof((*o)[0])); - } -} - -_INLINE_ void -generic_param_n_cleanup(IN OUT generic_param_n_t *o) -{ - secure_clean((uint8_t *)o, sizeof(*o)); -} - -_INLINE_ void -seeds_cleanup(IN OUT seeds_t *o) -{ - for(int i = 0; i < NUM_OF_SEEDS; i++) - { - seed_cleanup(&(o->seed[i])); - } -} - -_INLINE_ void -upc_cleanup(IN OUT upc_t *o) -{ - secure_clean((uint8_t *)o, sizeof(*o)); -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once +#include "types.h" +#include "utils/s2n_safety.h" + +_INLINE_ void +secure_clean(OUT uint8_t *p, IN const uint32_t len) +{ +#ifdef _WIN32 + SecureZeroMemory(p, len); +#else + typedef void *(*memset_t)(void *, int, size_t); + static volatile memset_t memset_func = memset; + memset_func(p, 0, len); +#endif +} + +_INLINE_ void +r_cleanup(IN OUT r_t *o) +{ + secure_clean((uint8_t *)o, sizeof(*o)); +} + +_INLINE_ void +e_cleanup(IN OUT e_t *o) +{ + secure_clean((uint8_t *)o, sizeof(*o)); +} + +_INLINE_ void +padded_r_cleanup(IN OUT padded_r_t *o) +{ + secure_clean((uint8_t *)o, sizeof(*o)); +} + +_INLINE_ void +padded_e_cleanup(IN OUT padded_e_t *o) +{ + secure_clean((uint8_t *)o, sizeof(*o)); +} + +_INLINE_ void +split_e_cleanup(IN OUT split_e_t *o) +{ + secure_clean((uint8_t *)o, sizeof(*o)); +} + +_INLINE_ void +sk_cleanup(IN OUT sk_t *o) +{ + secure_clean((uint8_t *)o, sizeof(*o)); +} + +_INLINE_ void +pad_sk_cleanup(IN OUT pad_sk_t *o) +{ + secure_clean((uint8_t *)o[0], sizeof(*o)); +} + +_INLINE_ void +pad_ct_cleanup(IN OUT pad_ct_t *o) +{ + secure_clean((uint8_t *)o[0], sizeof(*o)); +} + +_INLINE_ void +dbl_pad_ct_cleanup(IN OUT dbl_pad_ct_t *o) +{ + secure_clean((uint8_t *)o[0], sizeof(*o)); +} + +_INLINE_ void +seed_cleanup(IN OUT seed_t *o) +{ + secure_clean((uint8_t *)o, sizeof(*o)); +} + +_INLINE_ void +syndrome_cleanup(IN OUT syndrome_t *o) +{ + secure_clean((uint8_t *)o, sizeof(*o)); +} + +_INLINE_ void +dbl_pad_syndrome_cleanup(IN OUT dbl_pad_syndrome_t *o) +{ + secure_clean((uint8_t *)o[0], sizeof(*o)); +} + +_INLINE_ void +compressed_idx_t_cleanup(IN OUT compressed_idx_t_t *o) +{ + secure_clean((uint8_t *)o, sizeof(*o)); +} + +_INLINE_ void +compressed_idx_dv_ar_cleanup(IN OUT compressed_idx_dv_ar_t *o) +{ + for(int i = 0; i < N0; i++) + { + secure_clean((uint8_t *)&(*o)[i], sizeof((*o)[0])); + } +} + +_INLINE_ void +generic_param_n_cleanup(IN OUT generic_param_n_t *o) +{ + secure_clean((uint8_t *)o, sizeof(*o)); +} + +_INLINE_ void +seeds_cleanup(IN OUT seeds_t *o) +{ + for(int i = 0; i < NUM_OF_SEEDS; i++) + { + seed_cleanup(&(o->seed[i])); + } +} + +_INLINE_ void +upc_cleanup(IN OUT upc_t *o) +{ + secure_clean((uint8_t *)o, sizeof(*o)); +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/decode.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/decode.c index 404c6377da..ee37e7d82a 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/decode.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/decode.c @@ -1,365 +1,365 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker, Shay Gueron, and Dusan Kostic, - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com, dkostic@amazon.com) - * - * [1] The optimizations are based on the description developed in the paper: - * Drucker, Nir, and Shay Gueron. 2019. “A Toolbox for Software Optimization - * of QC-MDPC Code-Based Cryptosystems.” Journal of Cryptographic Engineering, - * January, 1–17. https://doi.org/10.1007/s13389-018-00200-4. - * - * [2] The decoder algorithm is the Black-Gray decoder in - * the early submission of CAKE (due to N. Sandrier and R Misoczki). - * - * [3] The analysis for the constant time implementation is given in - * Drucker, Nir, Shay Gueron, and Dusan Kostic. 2019. - * “On Constant-Time QC-MDPC Decoding with Negligible Failure Rate.” - * Cryptology EPrint Archive, 2019. https://eprint.iacr.org/2019/1289. - * - * [4] it was adapted to BGF in: - * Drucker, Nir, Shay Gueron, and Dusan Kostic. 2019. - * “QC-MDPC decoders with several shades of gray.” - * Cryptology EPrint Archive, 2019. To be published. - * - * [5] Chou, T.: QcBits: Constant-Time Small-Key Code-Based Cryptography. - * In: Gier-lichs, B., Poschmann, A.Y. (eds.) Cryptographic Hardware - * and Embedded Systems– CHES 2016. pp. 280–300. Springer Berlin Heidelberg, - * Berlin, Heidelberg (2016) - * - * [6] The rotate512_small funciton is a derivative of the code described in: - * Guimarães, Antonio, Diego F Aranha, and Edson Borin. 2019. - * “Optimized Implementation of QC-MDPC Code-Based Cryptography.” - * Concurrency and Computation: Practice and Experience 31 (18): - * e5089. https://doi.org/10.1002/cpe.5089. - */ - -#include "decode.h" -#include "gf2x.h" -#include "utilities.h" -#include <string.h> - -// Decoding (bit-flipping) parameter -#ifdef BG_DECODER -# if(LEVEL == 1) -# define MAX_IT 3 -# elif(LEVEL == 3) -# define MAX_IT 4 -# elif(LEVEL == 5) -# define MAX_IT 7 -# else -# error "Level can only be 1/3/5" -# endif -#elif defined(BGF_DECODER) -# if(LEVEL == 1) -# define MAX_IT 5 -# elif(LEVEL == 3) -# define MAX_IT 6 -# elif(LEVEL == 5) -# define MAX_IT 7 -# else -# error "Level can only be 1/3/5" -# endif -#endif - -// Duplicates the first R_BITS of the syndrome three times -// |------------------------------------------| -// | Third copy | Second copy | first R_BITS | -// |------------------------------------------| -// This is required by the rotate functions. -_INLINE_ void -dup(IN OUT syndrome_t *s) -{ - s->qw[R_QW - 1] = - (s->qw[0] << LAST_R_QW_LEAD) | (s->qw[R_QW - 1] & LAST_R_QW_MASK); - - for(size_t i = 0; i < (2 * R_QW) - 1; i++) - { - s->qw[R_QW + i] = - (s->qw[i] >> LAST_R_QW_TRAIL) | (s->qw[i + 1] << LAST_R_QW_LEAD); - } -} - -ret_t -compute_syndrome(OUT syndrome_t *syndrome, IN const ct_t *ct, IN const sk_t *sk) -{ - // gf2x_mod_mul requires the values to be 64bit padded and extra (dbl) space - // for the results - DEFER_CLEANUP(dbl_pad_syndrome_t pad_s, dbl_pad_syndrome_cleanup); - DEFER_CLEANUP(pad_sk_t pad_sk = {0}, pad_sk_cleanup); - pad_sk[0].val = sk->bin[0]; - pad_sk[1].val = sk->bin[1]; - - DEFER_CLEANUP(pad_ct_t pad_ct = {0}, pad_ct_cleanup); - pad_ct[0].val = ct->val[0]; - pad_ct[1].val = ct->val[1]; - - // Compute s = c0*h0 + c1*h1: - GUARD(gf2x_mod_mul((uint64_t *)&pad_s[0], (uint64_t *)&pad_ct[0], - (uint64_t *)&pad_sk[0])); - GUARD(gf2x_mod_mul((uint64_t *)&pad_s[1], (uint64_t *)&pad_ct[1], - (uint64_t *)&pad_sk[1])); - - GUARD(gf2x_add(pad_s[0].val.raw, pad_s[0].val.raw, pad_s[1].val.raw, R_SIZE)); - - memcpy((uint8_t *)syndrome->qw, pad_s[0].val.raw, R_SIZE); - dup(syndrome); - - return SUCCESS; -} - -_INLINE_ ret_t -recompute_syndrome(OUT syndrome_t *syndrome, - IN const ct_t *ct, - IN const sk_t *sk, - IN const split_e_t *splitted_e) -{ - ct_t tmp_ct = *ct; - - // Adapt the ciphertext - GUARD(gf2x_add(tmp_ct.val[0].raw, tmp_ct.val[0].raw, splitted_e->val[0].raw, - R_SIZE)); - GUARD(gf2x_add(tmp_ct.val[1].raw, tmp_ct.val[1].raw, splitted_e->val[1].raw, - R_SIZE)); - - // Recompute the syndrome - GUARD(compute_syndrome(syndrome, &tmp_ct, sk)); - - return SUCCESS; -} - -_INLINE_ uint8_t -get_threshold(IN const syndrome_t *s) -{ - bike_static_assert(sizeof(*s) >= sizeof(r_t), syndrome_is_large_enough); - - const uint32_t syndrome_weight = r_bits_vector_weight((const r_t *)s->qw); - - // The equations below are defined in BIKE's specification: - // https://bikesuite.org/files/round2/spec/BIKE-Spec-Round2.2019.03.30.pdf - // Page 20 Section 2.4.2 - const uint8_t threshold = - THRESHOLD_COEFF0 + (THRESHOLD_COEFF1 * syndrome_weight); - - DMSG(" Thresold: %d\n", threshold); - return threshold; -} - -// Use half-adder as described in [5]. -_INLINE_ void -bit_sliced_adder(OUT upc_t *upc, - IN OUT syndrome_t *rotated_syndrome, - IN const size_t num_of_slices) -{ - // From cache-memory perspective this loop should be the outside loop - for(size_t j = 0; j < num_of_slices; j++) - { - for(size_t i = 0; i < R_QW; i++) - { - const uint64_t carry = (upc->slice[j].u.qw[i] & rotated_syndrome->qw[i]); - upc->slice[j].u.qw[i] ^= rotated_syndrome->qw[i]; - rotated_syndrome->qw[i] = carry; - } - } -} - -_INLINE_ void -bit_slice_full_subtract(OUT upc_t *upc, IN uint8_t val) -{ - // Borrow - uint64_t br[R_QW] = {0}; - - for(size_t j = 0; j < SLICES; j++) - { - - const uint64_t lsb_mask = 0 - (val & 0x1); - val >>= 1; - - // Perform a - b with c as the input/output carry - // br = 0 0 0 0 1 1 1 1 - // a = 0 0 1 1 0 0 1 1 - // b = 0 1 0 1 0 1 0 1 - // ------------------- - // o = 0 1 1 0 0 1 1 1 - // c = 0 1 0 0 1 1 0 1 - // - // o = a^b^c - // _ __ _ _ _ _ _ - // br = abc + abc + abc + abc = abc + ((a+b))c - - for(size_t i = 0; i < R_QW; i++) - { - const uint64_t a = upc->slice[j].u.qw[i]; - const uint64_t b = lsb_mask; - const uint64_t tmp = ((~a) & b & (~br[i])) | ((((~a) | b) & br[i])); - upc->slice[j].u.qw[i] = a ^ b ^ br[i]; - br[i] = tmp; - } - } -} - -// Calculate the Unsatisfied Parity Checks (UPCs) and update the errors -// vector (e) accordingy. In addition, update the black and gray errors vector -// with the relevant values. -_INLINE_ void -find_err1(OUT split_e_t *e, - OUT split_e_t *black_e, - OUT split_e_t *gray_e, - IN const syndrome_t * syndrome, - IN const compressed_idx_dv_ar_t wlist, - IN const uint8_t threshold) -{ - // This function uses the bit-slice-adder methodology of [5]: - DEFER_CLEANUP(syndrome_t rotated_syndrome = {0}, syndrome_cleanup); - DEFER_CLEANUP(upc_t upc, upc_cleanup); - - for(uint32_t i = 0; i < N0; i++) - { - // UPC must start from zero at every iteration - memset(&upc, 0, sizeof(upc)); - - // 1) Right-rotate the syndrome for every secret key set bit index - // Then slice-add it to the UPC array. - for(size_t j = 0; j < DV; j++) - { - rotate_right(&rotated_syndrome, syndrome, wlist[i].val[j]); - bit_sliced_adder(&upc, &rotated_syndrome, LOG2_MSB(j + 1)); - } - - // 2) Subtract the threshold from the UPC counters - bit_slice_full_subtract(&upc, threshold); - - // 3) Update the errors and the black errors vectors. - // The last slice of the UPC array holds the MSB of the accumulated values - // minus the threshold. Every zero bit indicates a potential error bit. - // The errors values are stored in the black array and xored with the - // errors Of the previous iteration. - const r_t *last_slice = &(upc.slice[SLICES - 1].u.r.val); - for(size_t j = 0; j < R_SIZE; j++) - { - const uint8_t sum_msb = (~last_slice->raw[j]); - black_e->val[i].raw[j] = sum_msb; - e->val[i].raw[j] ^= sum_msb; - } - - // Ensure that the padding bits (upper bits of the last byte) are zero so - // they will not be included in the multiplication and in the hash function. - e->val[i].raw[R_SIZE - 1] &= LAST_R_BYTE_MASK; - - // 4) Calculate the gray error array by adding "DELTA" to the UPC array. - // For that we reuse the rotated_syndrome variable setting it to all "1". - for(size_t l = 0; l < DELTA; l++) - { - memset((uint8_t *)rotated_syndrome.qw, 0xff, R_SIZE); - bit_sliced_adder(&upc, &rotated_syndrome, SLICES); - } - - // 5) Update the gray list with the relevant bits that are not - // set in the black list. - for(size_t j = 0; j < R_SIZE; j++) - { - const uint8_t sum_msb = (~last_slice->raw[j]); - gray_e->val[i].raw[j] = (~(black_e->val[i].raw[j])) & sum_msb; - } - } -} - -// Recalculate the UPCs and update the errors vector (e) according to it -// and to the black/gray vectors. -_INLINE_ void -find_err2(OUT split_e_t *e, - IN split_e_t *pos_e, - IN const syndrome_t * syndrome, - IN const compressed_idx_dv_ar_t wlist, - IN const uint8_t threshold) -{ - DEFER_CLEANUP(syndrome_t rotated_syndrome = {0}, syndrome_cleanup); - DEFER_CLEANUP(upc_t upc, upc_cleanup); - - for(uint32_t i = 0; i < N0; i++) - { - // UPC must start from zero at every iteration - memset(&upc, 0, sizeof(upc)); - - // 1) Right-rotate the syndrome for every secret key set bit index - // Then slice-add it to the UPC array. - for(size_t j = 0; j < DV; j++) - { - rotate_right(&rotated_syndrome, syndrome, wlist[i].val[j]); - bit_sliced_adder(&upc, &rotated_syndrome, LOG2_MSB(j + 1)); - } - - // 2) Subtract the threshold from the UPC counters - bit_slice_full_subtract(&upc, threshold); - - // 3) Update the errors vector. - // The last slice of the UPC array holds the MSB of the accumulated values - // minus the threshold. Every zero bit indicates a potential error bit. - const r_t *last_slice = &(upc.slice[SLICES - 1].u.r.val); - for(size_t j = 0; j < R_SIZE; j++) - { - const uint8_t sum_msb = (~last_slice->raw[j]); - e->val[i].raw[j] ^= (pos_e->val[i].raw[j] & sum_msb); - } - - // Ensure that the padding bits (upper bits of the last byte) are zero so - // they will not be included in the multiplication and in the hash function. - e->val[i].raw[R_SIZE - 1] &= LAST_R_BYTE_MASK; - } -} - -ret_t -decode(OUT split_e_t *e, - IN const syndrome_t *original_s, - IN const ct_t *ct, - IN const sk_t *sk) -{ - split_e_t black_e = {0}; - split_e_t gray_e = {0}; - syndrome_t s; - - // Reset (init) the error because it is xored in the find_err funcitons. - memset(e, 0, sizeof(*e)); - s = *original_s; - dup(&s); - - for(uint32_t iter = 0; iter < MAX_IT; iter++) - { - const uint8_t threshold = get_threshold(&s); - - DMSG(" Iteration: %d\n", iter); - DMSG(" Weight of e: %lu\n", - r_bits_vector_weight(&e->val[0]) + r_bits_vector_weight(&e->val[1])); - DMSG(" Weight of syndrome: %lu\n", r_bits_vector_weight((r_t *)s.qw)); - - find_err1(e, &black_e, &gray_e, &s, sk->wlist, threshold); - GUARD(recompute_syndrome(&s, ct, sk, e)); -#ifdef BGF_DECODER - if(iter >= 1) - { - continue; - } -#endif - DMSG(" Weight of e: %lu\n", - r_bits_vector_weight(&e->val[0]) + r_bits_vector_weight(&e->val[1])); - DMSG(" Weight of syndrome: %lu\n", r_bits_vector_weight((r_t *)s.qw)); - - find_err2(e, &black_e, &s, sk->wlist, ((DV + 1) / 2) + 1); - GUARD(recompute_syndrome(&s, ct, sk, e)); - - DMSG(" Weight of e: %lu\n", - r_bits_vector_weight(&e->val[0]) + r_bits_vector_weight(&e->val[1])); - DMSG(" Weight of syndrome: %lu\n", r_bits_vector_weight((r_t *)s.qw)); - - find_err2(e, &gray_e, &s, sk->wlist, ((DV + 1) / 2) + 1); - GUARD(recompute_syndrome(&s, ct, sk, e)); - } - - if(r_bits_vector_weight((r_t *)s.qw) > 0) - { - BIKE_ERROR(E_DECODING_FAILURE); - } - - return SUCCESS; -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker, Shay Gueron, and Dusan Kostic, + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com, dkostic@amazon.com) + * + * [1] The optimizations are based on the description developed in the paper: + * Drucker, Nir, and Shay Gueron. 2019. “A Toolbox for Software Optimization + * of QC-MDPC Code-Based Cryptosystems.” Journal of Cryptographic Engineering, + * January, 1–17. https://doi.org/10.1007/s13389-018-00200-4. + * + * [2] The decoder algorithm is the Black-Gray decoder in + * the early submission of CAKE (due to N. Sandrier and R Misoczki). + * + * [3] The analysis for the constant time implementation is given in + * Drucker, Nir, Shay Gueron, and Dusan Kostic. 2019. + * “On Constant-Time QC-MDPC Decoding with Negligible Failure Rate.” + * Cryptology EPrint Archive, 2019. https://eprint.iacr.org/2019/1289. + * + * [4] it was adapted to BGF in: + * Drucker, Nir, Shay Gueron, and Dusan Kostic. 2019. + * “QC-MDPC decoders with several shades of gray.” + * Cryptology EPrint Archive, 2019. To be published. + * + * [5] Chou, T.: QcBits: Constant-Time Small-Key Code-Based Cryptography. + * In: Gier-lichs, B., Poschmann, A.Y. (eds.) Cryptographic Hardware + * and Embedded Systems– CHES 2016. pp. 280–300. Springer Berlin Heidelberg, + * Berlin, Heidelberg (2016) + * + * [6] The rotate512_small funciton is a derivative of the code described in: + * Guimarães, Antonio, Diego F Aranha, and Edson Borin. 2019. + * “Optimized Implementation of QC-MDPC Code-Based Cryptography.” + * Concurrency and Computation: Practice and Experience 31 (18): + * e5089. https://doi.org/10.1002/cpe.5089. + */ + +#include "decode.h" +#include "gf2x.h" +#include "utilities.h" +#include <string.h> + +// Decoding (bit-flipping) parameter +#ifdef BG_DECODER +# if(LEVEL == 1) +# define MAX_IT 3 +# elif(LEVEL == 3) +# define MAX_IT 4 +# elif(LEVEL == 5) +# define MAX_IT 7 +# else +# error "Level can only be 1/3/5" +# endif +#elif defined(BGF_DECODER) +# if(LEVEL == 1) +# define MAX_IT 5 +# elif(LEVEL == 3) +# define MAX_IT 6 +# elif(LEVEL == 5) +# define MAX_IT 7 +# else +# error "Level can only be 1/3/5" +# endif +#endif + +// Duplicates the first R_BITS of the syndrome three times +// |------------------------------------------| +// | Third copy | Second copy | first R_BITS | +// |------------------------------------------| +// This is required by the rotate functions. +_INLINE_ void +dup(IN OUT syndrome_t *s) +{ + s->qw[R_QW - 1] = + (s->qw[0] << LAST_R_QW_LEAD) | (s->qw[R_QW - 1] & LAST_R_QW_MASK); + + for(size_t i = 0; i < (2 * R_QW) - 1; i++) + { + s->qw[R_QW + i] = + (s->qw[i] >> LAST_R_QW_TRAIL) | (s->qw[i + 1] << LAST_R_QW_LEAD); + } +} + +ret_t +compute_syndrome(OUT syndrome_t *syndrome, IN const ct_t *ct, IN const sk_t *sk) +{ + // gf2x_mod_mul requires the values to be 64bit padded and extra (dbl) space + // for the results + DEFER_CLEANUP(dbl_pad_syndrome_t pad_s, dbl_pad_syndrome_cleanup); + DEFER_CLEANUP(pad_sk_t pad_sk = {0}, pad_sk_cleanup); + pad_sk[0].val = sk->bin[0]; + pad_sk[1].val = sk->bin[1]; + + DEFER_CLEANUP(pad_ct_t pad_ct = {0}, pad_ct_cleanup); + pad_ct[0].val = ct->val[0]; + pad_ct[1].val = ct->val[1]; + + // Compute s = c0*h0 + c1*h1: + GUARD(gf2x_mod_mul((uint64_t *)&pad_s[0], (uint64_t *)&pad_ct[0], + (uint64_t *)&pad_sk[0])); + GUARD(gf2x_mod_mul((uint64_t *)&pad_s[1], (uint64_t *)&pad_ct[1], + (uint64_t *)&pad_sk[1])); + + GUARD(gf2x_add(pad_s[0].val.raw, pad_s[0].val.raw, pad_s[1].val.raw, R_SIZE)); + + memcpy((uint8_t *)syndrome->qw, pad_s[0].val.raw, R_SIZE); + dup(syndrome); + + return SUCCESS; +} + +_INLINE_ ret_t +recompute_syndrome(OUT syndrome_t *syndrome, + IN const ct_t *ct, + IN const sk_t *sk, + IN const split_e_t *splitted_e) +{ + ct_t tmp_ct = *ct; + + // Adapt the ciphertext + GUARD(gf2x_add(tmp_ct.val[0].raw, tmp_ct.val[0].raw, splitted_e->val[0].raw, + R_SIZE)); + GUARD(gf2x_add(tmp_ct.val[1].raw, tmp_ct.val[1].raw, splitted_e->val[1].raw, + R_SIZE)); + + // Recompute the syndrome + GUARD(compute_syndrome(syndrome, &tmp_ct, sk)); + + return SUCCESS; +} + +_INLINE_ uint8_t +get_threshold(IN const syndrome_t *s) +{ + bike_static_assert(sizeof(*s) >= sizeof(r_t), syndrome_is_large_enough); + + const uint32_t syndrome_weight = r_bits_vector_weight((const r_t *)s->qw); + + // The equations below are defined in BIKE's specification: + // https://bikesuite.org/files/round2/spec/BIKE-Spec-Round2.2019.03.30.pdf + // Page 20 Section 2.4.2 + const uint8_t threshold = + THRESHOLD_COEFF0 + (THRESHOLD_COEFF1 * syndrome_weight); + + DMSG(" Thresold: %d\n", threshold); + return threshold; +} + +// Use half-adder as described in [5]. +_INLINE_ void +bit_sliced_adder(OUT upc_t *upc, + IN OUT syndrome_t *rotated_syndrome, + IN const size_t num_of_slices) +{ + // From cache-memory perspective this loop should be the outside loop + for(size_t j = 0; j < num_of_slices; j++) + { + for(size_t i = 0; i < R_QW; i++) + { + const uint64_t carry = (upc->slice[j].u.qw[i] & rotated_syndrome->qw[i]); + upc->slice[j].u.qw[i] ^= rotated_syndrome->qw[i]; + rotated_syndrome->qw[i] = carry; + } + } +} + +_INLINE_ void +bit_slice_full_subtract(OUT upc_t *upc, IN uint8_t val) +{ + // Borrow + uint64_t br[R_QW] = {0}; + + for(size_t j = 0; j < SLICES; j++) + { + + const uint64_t lsb_mask = 0 - (val & 0x1); + val >>= 1; + + // Perform a - b with c as the input/output carry + // br = 0 0 0 0 1 1 1 1 + // a = 0 0 1 1 0 0 1 1 + // b = 0 1 0 1 0 1 0 1 + // ------------------- + // o = 0 1 1 0 0 1 1 1 + // c = 0 1 0 0 1 1 0 1 + // + // o = a^b^c + // _ __ _ _ _ _ _ + // br = abc + abc + abc + abc = abc + ((a+b))c + + for(size_t i = 0; i < R_QW; i++) + { + const uint64_t a = upc->slice[j].u.qw[i]; + const uint64_t b = lsb_mask; + const uint64_t tmp = ((~a) & b & (~br[i])) | ((((~a) | b) & br[i])); + upc->slice[j].u.qw[i] = a ^ b ^ br[i]; + br[i] = tmp; + } + } +} + +// Calculate the Unsatisfied Parity Checks (UPCs) and update the errors +// vector (e) accordingy. In addition, update the black and gray errors vector +// with the relevant values. +_INLINE_ void +find_err1(OUT split_e_t *e, + OUT split_e_t *black_e, + OUT split_e_t *gray_e, + IN const syndrome_t * syndrome, + IN const compressed_idx_dv_ar_t wlist, + IN const uint8_t threshold) +{ + // This function uses the bit-slice-adder methodology of [5]: + DEFER_CLEANUP(syndrome_t rotated_syndrome = {0}, syndrome_cleanup); + DEFER_CLEANUP(upc_t upc, upc_cleanup); + + for(uint32_t i = 0; i < N0; i++) + { + // UPC must start from zero at every iteration + memset(&upc, 0, sizeof(upc)); + + // 1) Right-rotate the syndrome for every secret key set bit index + // Then slice-add it to the UPC array. + for(size_t j = 0; j < DV; j++) + { + rotate_right(&rotated_syndrome, syndrome, wlist[i].val[j]); + bit_sliced_adder(&upc, &rotated_syndrome, LOG2_MSB(j + 1)); + } + + // 2) Subtract the threshold from the UPC counters + bit_slice_full_subtract(&upc, threshold); + + // 3) Update the errors and the black errors vectors. + // The last slice of the UPC array holds the MSB of the accumulated values + // minus the threshold. Every zero bit indicates a potential error bit. + // The errors values are stored in the black array and xored with the + // errors Of the previous iteration. + const r_t *last_slice = &(upc.slice[SLICES - 1].u.r.val); + for(size_t j = 0; j < R_SIZE; j++) + { + const uint8_t sum_msb = (~last_slice->raw[j]); + black_e->val[i].raw[j] = sum_msb; + e->val[i].raw[j] ^= sum_msb; + } + + // Ensure that the padding bits (upper bits of the last byte) are zero so + // they will not be included in the multiplication and in the hash function. + e->val[i].raw[R_SIZE - 1] &= LAST_R_BYTE_MASK; + + // 4) Calculate the gray error array by adding "DELTA" to the UPC array. + // For that we reuse the rotated_syndrome variable setting it to all "1". + for(size_t l = 0; l < DELTA; l++) + { + memset((uint8_t *)rotated_syndrome.qw, 0xff, R_SIZE); + bit_sliced_adder(&upc, &rotated_syndrome, SLICES); + } + + // 5) Update the gray list with the relevant bits that are not + // set in the black list. + for(size_t j = 0; j < R_SIZE; j++) + { + const uint8_t sum_msb = (~last_slice->raw[j]); + gray_e->val[i].raw[j] = (~(black_e->val[i].raw[j])) & sum_msb; + } + } +} + +// Recalculate the UPCs and update the errors vector (e) according to it +// and to the black/gray vectors. +_INLINE_ void +find_err2(OUT split_e_t *e, + IN split_e_t *pos_e, + IN const syndrome_t * syndrome, + IN const compressed_idx_dv_ar_t wlist, + IN const uint8_t threshold) +{ + DEFER_CLEANUP(syndrome_t rotated_syndrome = {0}, syndrome_cleanup); + DEFER_CLEANUP(upc_t upc, upc_cleanup); + + for(uint32_t i = 0; i < N0; i++) + { + // UPC must start from zero at every iteration + memset(&upc, 0, sizeof(upc)); + + // 1) Right-rotate the syndrome for every secret key set bit index + // Then slice-add it to the UPC array. + for(size_t j = 0; j < DV; j++) + { + rotate_right(&rotated_syndrome, syndrome, wlist[i].val[j]); + bit_sliced_adder(&upc, &rotated_syndrome, LOG2_MSB(j + 1)); + } + + // 2) Subtract the threshold from the UPC counters + bit_slice_full_subtract(&upc, threshold); + + // 3) Update the errors vector. + // The last slice of the UPC array holds the MSB of the accumulated values + // minus the threshold. Every zero bit indicates a potential error bit. + const r_t *last_slice = &(upc.slice[SLICES - 1].u.r.val); + for(size_t j = 0; j < R_SIZE; j++) + { + const uint8_t sum_msb = (~last_slice->raw[j]); + e->val[i].raw[j] ^= (pos_e->val[i].raw[j] & sum_msb); + } + + // Ensure that the padding bits (upper bits of the last byte) are zero so + // they will not be included in the multiplication and in the hash function. + e->val[i].raw[R_SIZE - 1] &= LAST_R_BYTE_MASK; + } +} + +ret_t +decode(OUT split_e_t *e, + IN const syndrome_t *original_s, + IN const ct_t *ct, + IN const sk_t *sk) +{ + split_e_t black_e = {0}; + split_e_t gray_e = {0}; + syndrome_t s; + + // Reset (init) the error because it is xored in the find_err funcitons. + memset(e, 0, sizeof(*e)); + s = *original_s; + dup(&s); + + for(uint32_t iter = 0; iter < MAX_IT; iter++) + { + const uint8_t threshold = get_threshold(&s); + + DMSG(" Iteration: %d\n", iter); + DMSG(" Weight of e: %lu\n", + r_bits_vector_weight(&e->val[0]) + r_bits_vector_weight(&e->val[1])); + DMSG(" Weight of syndrome: %lu\n", r_bits_vector_weight((r_t *)s.qw)); + + find_err1(e, &black_e, &gray_e, &s, sk->wlist, threshold); + GUARD(recompute_syndrome(&s, ct, sk, e)); +#ifdef BGF_DECODER + if(iter >= 1) + { + continue; + } +#endif + DMSG(" Weight of e: %lu\n", + r_bits_vector_weight(&e->val[0]) + r_bits_vector_weight(&e->val[1])); + DMSG(" Weight of syndrome: %lu\n", r_bits_vector_weight((r_t *)s.qw)); + + find_err2(e, &black_e, &s, sk->wlist, ((DV + 1) / 2) + 1); + GUARD(recompute_syndrome(&s, ct, sk, e)); + + DMSG(" Weight of e: %lu\n", + r_bits_vector_weight(&e->val[0]) + r_bits_vector_weight(&e->val[1])); + DMSG(" Weight of syndrome: %lu\n", r_bits_vector_weight((r_t *)s.qw)); + + find_err2(e, &gray_e, &s, sk->wlist, ((DV + 1) / 2) + 1); + GUARD(recompute_syndrome(&s, ct, sk, e)); + } + + if(r_bits_vector_weight((r_t *)s.qw) > 0) + { + BIKE_ERROR(E_DECODING_FAILURE); + } + + return SUCCESS; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/decode.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/decode.h index d8809fd829..db7cf8ec1b 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/decode.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/decode.h @@ -1,28 +1,28 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -#include "types.h" - -ret_t -compute_syndrome(OUT syndrome_t *syndrome, IN const ct_t *ct, IN const sk_t *sk); - -// e should be zeroed before calling the decoder. -ret_t -decode(OUT split_e_t *e, - IN const syndrome_t *s, - IN const ct_t *ct, - IN const sk_t *sk); - -// Rotate right the first R_BITS of a syndrome. -// Assumption: the syndrome contains three R_BITS duplications. -// The output syndrome contains only one R_BITS rotation, the other -// (2 * R_BITS) bits are undefined. -void -rotate_right(OUT syndrome_t *out, IN const syndrome_t *in, IN uint32_t bitscount); +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +#include "types.h" + +ret_t +compute_syndrome(OUT syndrome_t *syndrome, IN const ct_t *ct, IN const sk_t *sk); + +// e should be zeroed before calling the decoder. +ret_t +decode(OUT split_e_t *e, + IN const syndrome_t *s, + IN const ct_t *ct, + IN const sk_t *sk); + +// Rotate right the first R_BITS of a syndrome. +// Assumption: the syndrome contains three R_BITS duplications. +// The output syndrome contains only one R_BITS rotation, the other +// (2 * R_BITS) bits are undefined. +void +rotate_right(OUT syndrome_t *out, IN const syndrome_t *in, IN uint32_t bitscount); diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/defs.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/defs.h index 0b74bb1131..c78ee90703 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/defs.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/defs.h @@ -1,144 +1,144 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -//////////////////////////////////////////// -// Basic defs -/////////////////////////////////////////// -#define FUNC_PREFIX BIKE1_L1_R2 -#include "functions_renaming.h" - -#ifdef __cplusplus -# define EXTERNC extern "C" -#else -# define EXTERNC -#endif - -// For code clarity. -#define IN -#define OUT - -#define ALIGN(n) __attribute__((aligned(n))) -#define BIKE_UNUSED(x) (void)(x) -#define BIKE_UNUSED_ATT __attribute__((unused)) - -#define _INLINE_ static inline - -// In asm the symbols '==' and '?' are not allowed therefore if using -// divide_and_ceil in asm files we must ensure with static_assert its validity -#if(__cplusplus >= 201103L) || defined(static_assert) -# define bike_static_assert(COND, MSG) static_assert(COND, "MSG") -#else -# define bike_static_assert(COND, MSG) \ - typedef char static_assertion_##MSG[(COND) ? 1 : -1] BIKE_UNUSED_ATT -#endif - -// Divide by the divider and round up to next integer -#define DIVIDE_AND_CEIL(x, divider) (((x) + (divider)) / (divider)) - -#define BIT(len) (1ULL << (len)) - -#define MASK(len) (BIT(len) - 1) -#define SIZEOF_BITS(b) (sizeof(b) * 8) - -#define QW_SIZE 0x8 -#define XMM_SIZE 0x10 -#define YMM_SIZE 0x20 -#define ZMM_SIZE 0x40 - -#define ALL_YMM_SIZE (16 * YMM_SIZE) -#define ALL_ZMM_SIZE (32 * ZMM_SIZE) - -// Copied from (Kaz answer) -// https://stackoverflow.com/questions/466204/rounding-up-to-next-power-of-2 -#define UPTOPOW2_0(v) ((v)-1) -#define UPTOPOW2_1(v) (UPTOPOW2_0(v) | (UPTOPOW2_0(v) >> 1)) -#define UPTOPOW2_2(v) (UPTOPOW2_1(v) | (UPTOPOW2_1(v) >> 2)) -#define UPTOPOW2_3(v) (UPTOPOW2_2(v) | (UPTOPOW2_2(v) >> 4)) -#define UPTOPOW2_4(v) (UPTOPOW2_3(v) | (UPTOPOW2_3(v) >> 8)) -#define UPTOPOW2_5(v) (UPTOPOW2_4(v) | (UPTOPOW2_4(v) >> 16)) - -#define UPTOPOW2(v) (UPTOPOW2_5(v) + 1) - -// Works only for 0 < v < 512 -#define LOG2_MSB(v) \ - ((v) == 0 \ - ? 0 \ - : ((v) < 2 \ - ? 1 \ - : ((v) < 4 \ - ? 2 \ - : ((v) < 8 \ - ? 3 \ - : ((v) < 16 \ - ? 4 \ - : ((v) < 32 \ - ? 5 \ - : ((v) < 64 ? 6 \ - : ((v) < 128 \ - ? 7 \ - : ((v) < 256 \ - ? 8 \ - : 9))))))))) - -//////////////////////////////////////////// -// Debug -/////////////////////////////////////////// - -#ifndef VERBOSE -# define VERBOSE 0 -#endif - -#include <stdio.h> - -#if(VERBOSE == 4) -# define MSG(...) \ - { \ - printf(__VA_ARGS__); \ - } -# define DMSG(...) MSG(__VA_ARGS__) -# define EDMSG(...) MSG(__VA_ARGS__) -# define SEDMSG(...) MSG(__VA_ARGS__) -#elif(VERBOSE == 3) -# define MSG(...) \ - { \ - printf(__VA_ARGS__); \ - } -# define DMSG(...) MSG(__VA_ARGS__) -# define EDMSG(...) MSG(__VA_ARGS__) -# define SEDMSG(...) -#elif(VERBOSE == 2) -# define MSG(...) \ - { \ - printf(__VA_ARGS__); \ - } -# define DMSG(...) MSG(__VA_ARGS__) -# define EDMSG(...) -# define SEDMSG(...) -#elif(VERBOSE == 1) -# define MSG(...) \ - { \ - printf(__VA_ARGS__); \ - } -# define DMSG(...) -# define EDMSG(...) -# define SEDMSG(...) -#else -# define MSG(...) -# define DMSG(...) -# define EDMSG(...) -# define SEDMSG(...) -#endif - -//////////////////////////////////////////// -// Printing -/////////////////////////////////////////// -//#define PRINT_IN_BE -//#define NO_SPACE -//#define NO_NEWLINE +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +//////////////////////////////////////////// +// Basic defs +/////////////////////////////////////////// +#define FUNC_PREFIX BIKE1_L1_R2 +#include "functions_renaming.h" + +#ifdef __cplusplus +# define EXTERNC extern "C" +#else +# define EXTERNC +#endif + +// For code clarity. +#define IN +#define OUT + +#define ALIGN(n) __attribute__((aligned(n))) +#define BIKE_UNUSED(x) (void)(x) +#define BIKE_UNUSED_ATT __attribute__((unused)) + +#define _INLINE_ static inline + +// In asm the symbols '==' and '?' are not allowed therefore if using +// divide_and_ceil in asm files we must ensure with static_assert its validity +#if(__cplusplus >= 201103L) || defined(static_assert) +# define bike_static_assert(COND, MSG) static_assert(COND, "MSG") +#else +# define bike_static_assert(COND, MSG) \ + typedef char static_assertion_##MSG[(COND) ? 1 : -1] BIKE_UNUSED_ATT +#endif + +// Divide by the divider and round up to next integer +#define DIVIDE_AND_CEIL(x, divider) (((x) + (divider)) / (divider)) + +#define BIT(len) (1ULL << (len)) + +#define MASK(len) (BIT(len) - 1) +#define SIZEOF_BITS(b) (sizeof(b) * 8) + +#define QW_SIZE 0x8 +#define XMM_SIZE 0x10 +#define YMM_SIZE 0x20 +#define ZMM_SIZE 0x40 + +#define ALL_YMM_SIZE (16 * YMM_SIZE) +#define ALL_ZMM_SIZE (32 * ZMM_SIZE) + +// Copied from (Kaz answer) +// https://stackoverflow.com/questions/466204/rounding-up-to-next-power-of-2 +#define UPTOPOW2_0(v) ((v)-1) +#define UPTOPOW2_1(v) (UPTOPOW2_0(v) | (UPTOPOW2_0(v) >> 1)) +#define UPTOPOW2_2(v) (UPTOPOW2_1(v) | (UPTOPOW2_1(v) >> 2)) +#define UPTOPOW2_3(v) (UPTOPOW2_2(v) | (UPTOPOW2_2(v) >> 4)) +#define UPTOPOW2_4(v) (UPTOPOW2_3(v) | (UPTOPOW2_3(v) >> 8)) +#define UPTOPOW2_5(v) (UPTOPOW2_4(v) | (UPTOPOW2_4(v) >> 16)) + +#define UPTOPOW2(v) (UPTOPOW2_5(v) + 1) + +// Works only for 0 < v < 512 +#define LOG2_MSB(v) \ + ((v) == 0 \ + ? 0 \ + : ((v) < 2 \ + ? 1 \ + : ((v) < 4 \ + ? 2 \ + : ((v) < 8 \ + ? 3 \ + : ((v) < 16 \ + ? 4 \ + : ((v) < 32 \ + ? 5 \ + : ((v) < 64 ? 6 \ + : ((v) < 128 \ + ? 7 \ + : ((v) < 256 \ + ? 8 \ + : 9))))))))) + +//////////////////////////////////////////// +// Debug +/////////////////////////////////////////// + +#ifndef VERBOSE +# define VERBOSE 0 +#endif + +#include <stdio.h> + +#if(VERBOSE == 4) +# define MSG(...) \ + { \ + printf(__VA_ARGS__); \ + } +# define DMSG(...) MSG(__VA_ARGS__) +# define EDMSG(...) MSG(__VA_ARGS__) +# define SEDMSG(...) MSG(__VA_ARGS__) +#elif(VERBOSE == 3) +# define MSG(...) \ + { \ + printf(__VA_ARGS__); \ + } +# define DMSG(...) MSG(__VA_ARGS__) +# define EDMSG(...) MSG(__VA_ARGS__) +# define SEDMSG(...) +#elif(VERBOSE == 2) +# define MSG(...) \ + { \ + printf(__VA_ARGS__); \ + } +# define DMSG(...) MSG(__VA_ARGS__) +# define EDMSG(...) +# define SEDMSG(...) +#elif(VERBOSE == 1) +# define MSG(...) \ + { \ + printf(__VA_ARGS__); \ + } +# define DMSG(...) +# define EDMSG(...) +# define SEDMSG(...) +#else +# define MSG(...) +# define DMSG(...) +# define EDMSG(...) +# define SEDMSG(...) +#endif + +//////////////////////////////////////////// +// Printing +/////////////////////////////////////////// +//#define PRINT_IN_BE +//#define NO_SPACE +//#define NO_NEWLINE diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/error.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/error.c index b048fc06a2..0d8e5b25ce 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/error.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/error.c @@ -1,11 +1,11 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#include "error.h" - -__thread _bike_err_t bike_errno; +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#include "error.h" + +__thread _bike_err_t bike_errno; diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/error.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/error.h index eac4e2daee..19d0bb1d9b 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/error.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/error.h @@ -1,36 +1,36 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -#include "defs.h" - -#define SUCCESS 0 -#define FAIL (-1) - -#define ret_t int - -enum _bike_err -{ - E_ERROR_WEIGHT_IS_NOT_T = 1, - E_DECODING_FAILURE = 2, - E_AES_CTR_PRF_INIT_FAIL = 3, - E_AES_OVER_USED = 4, - EXTERNAL_LIB_ERROR_OPENSSL = 5, - E_FAIL_TO_GET_SEED = 6 -}; - -typedef enum _bike_err _bike_err_t; - -extern __thread _bike_err_t bike_errno; -#define BIKE_ERROR(x) \ - do \ - { \ - bike_errno = (x); \ - return FAIL; \ - } while(0) +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +#include "defs.h" + +#define SUCCESS 0 +#define FAIL (-1) + +#define ret_t int + +enum _bike_err +{ + E_ERROR_WEIGHT_IS_NOT_T = 1, + E_DECODING_FAILURE = 2, + E_AES_CTR_PRF_INIT_FAIL = 3, + E_AES_OVER_USED = 4, + EXTERNAL_LIB_ERROR_OPENSSL = 5, + E_FAIL_TO_GET_SEED = 6 +}; + +typedef enum _bike_err _bike_err_t; + +extern __thread _bike_err_t bike_errno; +#define BIKE_ERROR(x) \ + do \ + { \ + bike_errno = (x); \ + return FAIL; \ + } while(0) diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/functions_renaming.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/functions_renaming.h index f11aa90e14..09c8385803 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/functions_renaming.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/functions_renaming.h @@ -1,60 +1,60 @@ -/* - * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - * The license is detailed in the file LICENSE.md, and applies to this file. - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#ifndef __FUNCTIONS_RENAMING_H_INCLUDED__ -#define __FUNCTIONS_RENAMING_H_INCLUDED__ - -#define PASTER(x, y) x##_##y -#define EVALUATOR(x, y) PASTER(x, y) -#define RENAME_FUNC_NAME(fname) EVALUATOR(FUNC_PREFIX, fname) - -#define keypair RENAME_FUNC_NAME(keypair) -#define decaps RENAME_FUNC_NAME(decaps) -#define encaps RENAME_FUNC_NAME(encaps) - -#define aes_ctr_prf RENAME_FUNC_NAME(aes_ctr_prf) -#define sample_uniform_r_bits_with_fixed_prf_context \ - RENAME_FUNC_NAME(sample_uniform_r_bits_with_fixed_prf_context) -#define init_aes_ctr_prf_state RENAME_FUNC_NAME(init_aes_ctr_prf_state) -#define generate_sparse_rep RENAME_FUNC_NAME(generate_sparse_rep) -#define parallel_hash RENAME_FUNC_NAME(parallel_hash) -#define decode RENAME_FUNC_NAME(decode) -#define print_BE RENAME_FUNC_NAME(print_BE) -#define print_LE RENAME_FUNC_NAME(print_LE) -#define gf2x_mod_mul RENAME_FUNC_NAME(gf2x_mod_mul) -#define secure_set_bits RENAME_FUNC_NAME(secure_set_bits) -#define sha RENAME_FUNC_NAME(sha) -#define count_ones RENAME_FUNC_NAME(count_ones) -#define sha_mb RENAME_FUNC_NAME(sha_mb) -#define split_e RENAME_FUNC_NAME(split_e) -#define compute_syndrome RENAME_FUNC_NAME(compute_syndrome) -#define bike_errno RENAME_FUNC_NAME(bike_errno) -#define cyclic_product RENAME_FUNC_NAME(cyclic_product) -#define ossl_add RENAME_FUNC_NAME(ossl_add) -#define karatzuba_add1 RENAME_FUNC_NAME(karatzuba_add1) -#define karatzuba_add2 RENAME_FUNC_NAME(karatzuba_add2) -#define gf2x_add RENAME_FUNC_NAME(gf2x_add) -#define gf2_muladd_4x4 RENAME_FUNC_NAME(gf2_muladd_4x4) -#define red RENAME_FUNC_NAME(red) -#define gf2x_mul_1x1 RENAME_FUNC_NAME(gf2x_mul_1x1) -#define rotate_right RENAME_FUNC_NAME(rotate_right) -#define r_bits_vector_weight RENAME_FUNC_NAME(r_bits_vector_weight) - -#endif //__FUNCTIONS_RENAMING_H_INCLUDED__ +/* + * Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + * The license is detailed in the file LICENSE.md, and applies to this file. + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#ifndef __FUNCTIONS_RENAMING_H_INCLUDED__ +#define __FUNCTIONS_RENAMING_H_INCLUDED__ + +#define PASTER(x, y) x##_##y +#define EVALUATOR(x, y) PASTER(x, y) +#define RENAME_FUNC_NAME(fname) EVALUATOR(FUNC_PREFIX, fname) + +#define keypair RENAME_FUNC_NAME(keypair) +#define decaps RENAME_FUNC_NAME(decaps) +#define encaps RENAME_FUNC_NAME(encaps) + +#define aes_ctr_prf RENAME_FUNC_NAME(aes_ctr_prf) +#define sample_uniform_r_bits_with_fixed_prf_context \ + RENAME_FUNC_NAME(sample_uniform_r_bits_with_fixed_prf_context) +#define init_aes_ctr_prf_state RENAME_FUNC_NAME(init_aes_ctr_prf_state) +#define generate_sparse_rep RENAME_FUNC_NAME(generate_sparse_rep) +#define parallel_hash RENAME_FUNC_NAME(parallel_hash) +#define decode RENAME_FUNC_NAME(decode) +#define print_BE RENAME_FUNC_NAME(print_BE) +#define print_LE RENAME_FUNC_NAME(print_LE) +#define gf2x_mod_mul RENAME_FUNC_NAME(gf2x_mod_mul) +#define secure_set_bits RENAME_FUNC_NAME(secure_set_bits) +#define sha RENAME_FUNC_NAME(sha) +#define count_ones RENAME_FUNC_NAME(count_ones) +#define sha_mb RENAME_FUNC_NAME(sha_mb) +#define split_e RENAME_FUNC_NAME(split_e) +#define compute_syndrome RENAME_FUNC_NAME(compute_syndrome) +#define bike_errno RENAME_FUNC_NAME(bike_errno) +#define cyclic_product RENAME_FUNC_NAME(cyclic_product) +#define ossl_add RENAME_FUNC_NAME(ossl_add) +#define karatzuba_add1 RENAME_FUNC_NAME(karatzuba_add1) +#define karatzuba_add2 RENAME_FUNC_NAME(karatzuba_add2) +#define gf2x_add RENAME_FUNC_NAME(gf2x_add) +#define gf2_muladd_4x4 RENAME_FUNC_NAME(gf2_muladd_4x4) +#define red RENAME_FUNC_NAME(red) +#define gf2x_mul_1x1 RENAME_FUNC_NAME(gf2x_mul_1x1) +#define rotate_right RENAME_FUNC_NAME(rotate_right) +#define r_bits_vector_weight RENAME_FUNC_NAME(r_bits_vector_weight) + +#endif //__FUNCTIONS_RENAMING_H_INCLUDED__ diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/gf2x.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/gf2x.h index 2de0050ff6..7fb1695058 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/gf2x.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/gf2x.h @@ -1,55 +1,55 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron, - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -#include "types.h" - -#ifdef USE_OPENSSL -# include "openssl_utils.h" -#endif - -#ifdef USE_OPENSSL_GF2M -// res = a*b mod (x^r - 1) -// Note: the caller must allocate twice the size of res. -_INLINE_ ret_t -gf2x_mod_mul(OUT uint64_t *res, IN const uint64_t *a, IN const uint64_t *b) -{ - return cyclic_product((uint8_t *)res, (const uint8_t *)a, (const uint8_t *)b); -} - -// A wrapper for other gf2x_add implementations. -_INLINE_ ret_t -gf2x_add(OUT uint8_t *res, - IN const uint8_t *a, - IN const uint8_t *b, - IN const uint64_t size) -{ - BIKE_UNUSED(size); - return ossl_add((uint8_t *)res, a, b); -} -#else // USE_OPENSSL_GF2M - -_INLINE_ ret_t -gf2x_add(OUT uint8_t *res, - IN const uint8_t *a, - IN const uint8_t *b, - IN const uint64_t bytelen) -{ - for(uint64_t i = 0; i < bytelen; i++) - { - res[i] = a[i] ^ b[i]; - } - return SUCCESS; -} - -// res = a*b mod (x^r - 1) -// the caller must allocate twice the size of res! -ret_t -gf2x_mod_mul(OUT uint64_t *res, IN const uint64_t *a, IN const uint64_t *b); -#endif +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron, + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +#include "types.h" + +#ifdef USE_OPENSSL +# include "openssl_utils.h" +#endif + +#ifdef USE_OPENSSL_GF2M +// res = a*b mod (x^r - 1) +// Note: the caller must allocate twice the size of res. +_INLINE_ ret_t +gf2x_mod_mul(OUT uint64_t *res, IN const uint64_t *a, IN const uint64_t *b) +{ + return cyclic_product((uint8_t *)res, (const uint8_t *)a, (const uint8_t *)b); +} + +// A wrapper for other gf2x_add implementations. +_INLINE_ ret_t +gf2x_add(OUT uint8_t *res, + IN const uint8_t *a, + IN const uint8_t *b, + IN const uint64_t size) +{ + BIKE_UNUSED(size); + return ossl_add((uint8_t *)res, a, b); +} +#else // USE_OPENSSL_GF2M + +_INLINE_ ret_t +gf2x_add(OUT uint8_t *res, + IN const uint8_t *a, + IN const uint8_t *b, + IN const uint64_t bytelen) +{ + for(uint64_t i = 0; i < bytelen; i++) + { + res[i] = a[i] ^ b[i]; + } + return SUCCESS; +} + +// res = a*b mod (x^r - 1) +// the caller must allocate twice the size of res! +ret_t +gf2x_mod_mul(OUT uint64_t *res, IN const uint64_t *a, IN const uint64_t *b); +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/gf2x_internal.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/gf2x_internal.h index 74fc5b9932..779e7f9727 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/gf2x_internal.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/gf2x_internal.h @@ -1,32 +1,32 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron, - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -#include "types.h" - -EXTERNC void -karatzuba_add1(OUT uint64_t *res, - IN const uint64_t *a, - IN const uint64_t *b, - IN uint64_t n_half, - IN uint64_t *alah); - -EXTERNC void -karatzuba_add2(OUT uint64_t *res1, - OUT uint64_t *res2, - IN const uint64_t *res, - IN const uint64_t *tmp, - IN uint64_t n_half); - -EXTERNC void -red(uint64_t *res); - -void - -gf2x_mul_1x1(OUT uint64_t *res, IN uint64_t a, IN uint64_t b); +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron, + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +#include "types.h" + +EXTERNC void +karatzuba_add1(OUT uint64_t *res, + IN const uint64_t *a, + IN const uint64_t *b, + IN uint64_t n_half, + IN uint64_t *alah); + +EXTERNC void +karatzuba_add2(OUT uint64_t *res1, + OUT uint64_t *res2, + IN const uint64_t *res, + IN const uint64_t *tmp, + IN uint64_t n_half); + +EXTERNC void +red(uint64_t *res); + +void + +gf2x_mul_1x1(OUT uint64_t *res, IN uint64_t a, IN uint64_t b); diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/gf2x_mul.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/gf2x_mul.c index 84a79589db..81e55a3366 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/gf2x_mul.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/gf2x_mul.c @@ -1,97 +1,97 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron, - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#include "cleanup.h" -#include "gf2x.h" -#include "gf2x_internal.h" -#include <stdlib.h> -#include <string.h> - -#ifndef USE_OPENSSL_GF2M - -// All the temporary data (which might hold secrets) -// is stored on a secure buffer, so that it can be easily cleaned up later. -// The secure buffer required is: 3n/2 (alah|blbh|tmp) in a recursive way. -// 3n/2 + 3n/4 + 3n/8 = 3(n/2 + n/4 + n/8) < 3n -# define SECURE_BUFFER_SIZE (3 * R_PADDED_SIZE) - -// Calculate number of uint64_t values needed to store SECURE_BUFFER_SIZE bytes. Rounding up to the next whole integer. -# define SECURE_BUFFER_SIZE_64_BIT ((SECURE_BUFFER_SIZE / sizeof(uint64_t)) + ((SECURE_BUFFER_SIZE % sizeof(uint64_t)) != 0)) - -// This functions assumes that n is even. -_INLINE_ void -karatzuba(OUT uint64_t *res, - IN const uint64_t *a, - IN const uint64_t *b, - IN const uint64_t n, - uint64_t * secure_buf) -{ - if(1 == n) - { - gf2x_mul_1x1(res, a[0], b[0]); - return; - } - - const uint64_t half_n = n >> 1; - - // Define pointers for the middle of each parameter - // sepearting a=a_low and a_high (same for ba nd res) - const uint64_t *a_high = a + half_n; - const uint64_t *b_high = b + half_n; - - // Divide res into 4 parts res3|res2|res1|res in size n/2 - uint64_t *res1 = res + half_n; - uint64_t *res2 = res1 + half_n; - - // All three parameters below are allocated on the secure buffer - // All of them are in size half n - uint64_t *alah = secure_buf; - uint64_t *blbh = alah + half_n; - uint64_t *tmp = blbh + half_n; - - // Place the secure buffer ptr in the first free location, - // so the recursive function can use it. - secure_buf = tmp + half_n; - - // Calculate Z0 and store the result in res(low) - karatzuba(res, a, b, half_n, secure_buf); - - // Calculate Z2 and store the result in res(high) - karatzuba(res2, a_high, b_high, half_n, secure_buf); - - // Accomulate the results. - karatzuba_add1(res, a, b, half_n, alah); - - // (a_low + a_high)(b_low + b_high) --> res1 - karatzuba(res1, alah, blbh, half_n, secure_buf); - - karatzuba_add2(res1, res2, res, tmp, half_n); -} - -ret_t -gf2x_mod_mul(OUT uint64_t *res, IN const uint64_t *a, IN const uint64_t *b) -{ - bike_static_assert((R_PADDED_QW % 2 == 0), karatzuba_n_is_odd); - - ALIGN(sizeof(uint64_t)) uint64_t secure_buffer[SECURE_BUFFER_SIZE_64_BIT]; - - /* make sure we have the correct size allocation. */ - bike_static_assert(sizeof(secure_buffer) % sizeof(uint64_t) == 0, - secure_buffer_not_eligable_for_uint64_t); - - karatzuba(res, a, b, R_PADDED_QW, (uint64_t *)secure_buffer); - - // This function implicitly assumes that the size of res is 2*R_PADDED_QW. - red(res); - - secure_clean((uint8_t*)secure_buffer, sizeof(secure_buffer)); - - return SUCCESS; -} - -#endif // USE_OPENSSL_GF2M +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron, + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#include "cleanup.h" +#include "gf2x.h" +#include "gf2x_internal.h" +#include <stdlib.h> +#include <string.h> + +#ifndef USE_OPENSSL_GF2M + +// All the temporary data (which might hold secrets) +// is stored on a secure buffer, so that it can be easily cleaned up later. +// The secure buffer required is: 3n/2 (alah|blbh|tmp) in a recursive way. +// 3n/2 + 3n/4 + 3n/8 = 3(n/2 + n/4 + n/8) < 3n +# define SECURE_BUFFER_SIZE (3 * R_PADDED_SIZE) + +// Calculate number of uint64_t values needed to store SECURE_BUFFER_SIZE bytes. Rounding up to the next whole integer. +# define SECURE_BUFFER_SIZE_64_BIT ((SECURE_BUFFER_SIZE / sizeof(uint64_t)) + ((SECURE_BUFFER_SIZE % sizeof(uint64_t)) != 0)) + +// This functions assumes that n is even. +_INLINE_ void +karatzuba(OUT uint64_t *res, + IN const uint64_t *a, + IN const uint64_t *b, + IN const uint64_t n, + uint64_t * secure_buf) +{ + if(1 == n) + { + gf2x_mul_1x1(res, a[0], b[0]); + return; + } + + const uint64_t half_n = n >> 1; + + // Define pointers for the middle of each parameter + // sepearting a=a_low and a_high (same for ba nd res) + const uint64_t *a_high = a + half_n; + const uint64_t *b_high = b + half_n; + + // Divide res into 4 parts res3|res2|res1|res in size n/2 + uint64_t *res1 = res + half_n; + uint64_t *res2 = res1 + half_n; + + // All three parameters below are allocated on the secure buffer + // All of them are in size half n + uint64_t *alah = secure_buf; + uint64_t *blbh = alah + half_n; + uint64_t *tmp = blbh + half_n; + + // Place the secure buffer ptr in the first free location, + // so the recursive function can use it. + secure_buf = tmp + half_n; + + // Calculate Z0 and store the result in res(low) + karatzuba(res, a, b, half_n, secure_buf); + + // Calculate Z2 and store the result in res(high) + karatzuba(res2, a_high, b_high, half_n, secure_buf); + + // Accomulate the results. + karatzuba_add1(res, a, b, half_n, alah); + + // (a_low + a_high)(b_low + b_high) --> res1 + karatzuba(res1, alah, blbh, half_n, secure_buf); + + karatzuba_add2(res1, res2, res, tmp, half_n); +} + +ret_t +gf2x_mod_mul(OUT uint64_t *res, IN const uint64_t *a, IN const uint64_t *b) +{ + bike_static_assert((R_PADDED_QW % 2 == 0), karatzuba_n_is_odd); + + ALIGN(sizeof(uint64_t)) uint64_t secure_buffer[SECURE_BUFFER_SIZE_64_BIT]; + + /* make sure we have the correct size allocation. */ + bike_static_assert(sizeof(secure_buffer) % sizeof(uint64_t) == 0, + secure_buffer_not_eligable_for_uint64_t); + + karatzuba(res, a, b, R_PADDED_QW, (uint64_t *)secure_buffer); + + // This function implicitly assumes that the size of res is 2*R_PADDED_QW. + red(res); + + secure_clean((uint8_t*)secure_buffer, sizeof(secure_buffer)); + + return SUCCESS; +} + +#endif // USE_OPENSSL_GF2M diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/gf2x_portable.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/gf2x_portable.c index 1816da6e77..f59361f192 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/gf2x_portable.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/gf2x_portable.c @@ -1,108 +1,108 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron, - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#include "gf2x.h" -#include "utilities.h" - -#if !defined(USE_OPENSSL_GF2M) - -// The algorithm is based on the windowing method, for example as in: -// Brent, R. P., Gaudry, P., Thomé, E., & Zimmermann, P. (2008, May), "Faster -// multiplication in GF (2)[x]". In: International Algorithmic Number Theory -// Symposium (pp. 153-166). Springer, Berlin, Heidelberg. In this implementation, -// the last three bits are multiplied using a schoolbook multiplicaiton. -void -gf2x_mul_1x1(uint64_t *c, uint64_t a, uint64_t b) -{ - uint64_t h = 0, l = 0, u[8]; - const uint64_t w = 64; - const uint64_t s = 3; - // Multiplying 64 bits by 7 can results in an overflow of 3 bits. - // Therefore, these bits are masked out, and are treated in step 3. - const uint64_t b0 = b & 0x1fffffffffffffff; - - // Step 1: Calculate a multiplication table with 8 entries. - u[0] = 0; - u[1] = b0; - u[2] = u[1] << 1; - u[3] = u[2] ^ b0; - u[4] = u[2] << 1; - u[5] = u[4] ^ b0; - u[6] = u[3] << 1; - u[7] = u[6] ^ b0; - - // Step 2: Multiply two elements in parallel in poisitions i,i+s - l = u[a & 7] ^ (u[(a >> 3) & 7] << 3); - h = (u[(a >> 3) & 7] >> 61); - for(uint32_t i = (2 * s); i < w; i += (2 * s)) - { - uint64_t g1 = u[(a >> i) & 7]; - uint64_t g2 = u[(a >> (i + s)) & 7]; - - l ^= (g1 << i) ^ (g2 << (i + s)); - h ^= (g1 >> (w - i)) ^ (g2 >> (w - (i + s))); - } - - // Step 3: Multiply the last three bits. - for(uint8_t i = 61; i < 64; i++) - { - uint64_t mask = (-((b >> i) & 1)); - l ^= ((a << i) & mask); - h ^= ((a >> (w - i)) & mask); - } - - c[0] = l; - c[1] = h; -} - -void -karatzuba_add1(OUT const uint64_t *res, - IN const uint64_t *a, - IN const uint64_t *b, - IN const uint64_t n_half, - IN uint64_t *alah) -{ - for(uint32_t j = 0; j < n_half; j++) - { - alah[j + 0 * n_half] = a[j] ^ a[n_half + j]; - alah[j + 1 * n_half] = b[j] ^ b[n_half + j]; - alah[j + 2 * n_half] = res[n_half + j] ^ res[2 * n_half + j]; - } -} - -void -karatzuba_add2(OUT uint64_t *res1, - OUT uint64_t *res2, - IN const uint64_t *res, - IN const uint64_t *tmp, - IN const uint64_t n_half) -{ - for(uint32_t j = 0; j < n_half; j++) - { - res1[j] ^= res[j] ^ tmp[j]; - res2[j] ^= res2[n_half + j] ^ tmp[j]; - } -} - -void -red(uint64_t *a) -{ - for(uint32_t i = 0; i < R_QW; i++) - { - const uint64_t temp0 = a[R_QW + i - 1]; - const uint64_t temp1 = a[R_QW + i]; - a[i] ^= (temp0 >> LAST_R_QW_LEAD) | (temp1 << LAST_R_QW_TRAIL); - } - - a[R_QW - 1] &= LAST_R_QW_MASK; - - // Clean the secrets from the upper half of a. - secure_clean((uint8_t *)&a[R_QW], sizeof(uint64_t) * R_QW); -} - -#endif +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron, + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#include "gf2x.h" +#include "utilities.h" + +#if !defined(USE_OPENSSL_GF2M) + +// The algorithm is based on the windowing method, for example as in: +// Brent, R. P., Gaudry, P., Thomé, E., & Zimmermann, P. (2008, May), "Faster +// multiplication in GF (2)[x]". In: International Algorithmic Number Theory +// Symposium (pp. 153-166). Springer, Berlin, Heidelberg. In this implementation, +// the last three bits are multiplied using a schoolbook multiplicaiton. +void +gf2x_mul_1x1(uint64_t *c, uint64_t a, uint64_t b) +{ + uint64_t h = 0, l = 0, u[8]; + const uint64_t w = 64; + const uint64_t s = 3; + // Multiplying 64 bits by 7 can results in an overflow of 3 bits. + // Therefore, these bits are masked out, and are treated in step 3. + const uint64_t b0 = b & 0x1fffffffffffffff; + + // Step 1: Calculate a multiplication table with 8 entries. + u[0] = 0; + u[1] = b0; + u[2] = u[1] << 1; + u[3] = u[2] ^ b0; + u[4] = u[2] << 1; + u[5] = u[4] ^ b0; + u[6] = u[3] << 1; + u[7] = u[6] ^ b0; + + // Step 2: Multiply two elements in parallel in poisitions i,i+s + l = u[a & 7] ^ (u[(a >> 3) & 7] << 3); + h = (u[(a >> 3) & 7] >> 61); + for(uint32_t i = (2 * s); i < w; i += (2 * s)) + { + uint64_t g1 = u[(a >> i) & 7]; + uint64_t g2 = u[(a >> (i + s)) & 7]; + + l ^= (g1 << i) ^ (g2 << (i + s)); + h ^= (g1 >> (w - i)) ^ (g2 >> (w - (i + s))); + } + + // Step 3: Multiply the last three bits. + for(uint8_t i = 61; i < 64; i++) + { + uint64_t mask = (-((b >> i) & 1)); + l ^= ((a << i) & mask); + h ^= ((a >> (w - i)) & mask); + } + + c[0] = l; + c[1] = h; +} + +void +karatzuba_add1(OUT const uint64_t *res, + IN const uint64_t *a, + IN const uint64_t *b, + IN const uint64_t n_half, + IN uint64_t *alah) +{ + for(uint32_t j = 0; j < n_half; j++) + { + alah[j + 0 * n_half] = a[j] ^ a[n_half + j]; + alah[j + 1 * n_half] = b[j] ^ b[n_half + j]; + alah[j + 2 * n_half] = res[n_half + j] ^ res[2 * n_half + j]; + } +} + +void +karatzuba_add2(OUT uint64_t *res1, + OUT uint64_t *res2, + IN const uint64_t *res, + IN const uint64_t *tmp, + IN const uint64_t n_half) +{ + for(uint32_t j = 0; j < n_half; j++) + { + res1[j] ^= res[j] ^ tmp[j]; + res2[j] ^= res2[n_half + j] ^ tmp[j]; + } +} + +void +red(uint64_t *a) +{ + for(uint32_t i = 0; i < R_QW; i++) + { + const uint64_t temp0 = a[R_QW + i - 1]; + const uint64_t temp1 = a[R_QW + i]; + a[i] ^= (temp0 >> LAST_R_QW_LEAD) | (temp1 << LAST_R_QW_TRAIL); + } + + a[R_QW - 1] &= LAST_R_QW_MASK; + + // Clean the secrets from the upper half of a. + secure_clean((uint8_t *)&a[R_QW], sizeof(uint64_t) * R_QW); +} + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/openssl_utils.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/openssl_utils.c index 09e0af3fde..a2a97c4651 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/openssl_utils.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/openssl_utils.c @@ -1,187 +1,187 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron, - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#include "openssl_utils.h" -#include "utilities.h" -#include <assert.h> -#include <openssl/bn.h> -#include <string.h> - -#ifdef USE_OPENSSL_GF2M - -# define MAX_OPENSSL_INV_TRIALS 1000 - -_INLINE_ void -BN_CTX_cleanup(BN_CTX *ctx) -{ - if(ctx) - { - BN_CTX_end(ctx); - BN_CTX_free(ctx); - } -} - -DEFINE_POINTER_CLEANUP_FUNC(BN_CTX *, BN_CTX_cleanup); - -// Loading (big) numbers into OpenSSL should use Big Endian representation. -// Therefore, the bytes ordering of the number should be reversed. -_INLINE_ void -reverse_endian(OUT uint8_t *res, IN const uint8_t *in, IN const uint32_t n) -{ - uint32_t i; - - for(i = 0; i < (n / 2); i++) - { - uint64_t tmp = in[i]; - res[i] = in[n - 1 - i]; - res[n - 1 - i] = tmp; - } - - // If the number of blocks is odd, swap also the middle block. - if(n % 2) - { - res[i] = in[i]; - } -} - -_INLINE_ ret_t -ossl_bn2bin(OUT uint8_t *out, IN const BIGNUM *in, IN const uint32_t size) -{ - assert(size <= N_SIZE); - uint8_t be_tmp[N_SIZE] = {0}; - - memset(out, 0, size); - - if(BN_bn2bin(in, be_tmp) == -1) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - reverse_endian(out, be_tmp, BN_num_bytes(in)); - - return SUCCESS; -} - -_INLINE_ ret_t -ossl_bin2bn(IN BIGNUM *out, OUT const uint8_t *in, IN const uint32_t size) -{ - assert(size <= N_SIZE); - uint8_t be_tmp[N_SIZE] = {0}; - - reverse_endian(be_tmp, in, size); - - if(BN_bin2bn(be_tmp, size, out) == 0) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - - return SUCCESS; -} - -ret_t -ossl_add(OUT uint8_t res_bin[R_SIZE], - IN const uint8_t a_bin[R_SIZE], - IN const uint8_t b_bin[R_SIZE]) -{ - DEFER_CLEANUP(BN_CTX *bn_ctx = BN_CTX_new(), BN_CTX_cleanup_pointer); - BIGNUM *r = NULL; - BIGNUM *a = NULL; - BIGNUM *b = NULL; - - if(NULL == bn_ctx) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - - BN_CTX_start(bn_ctx); - - r = BN_CTX_get(bn_ctx); - a = BN_CTX_get(bn_ctx); - b = BN_CTX_get(bn_ctx); - - if((NULL == r) || (NULL == a) || (NULL == b)) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - - GUARD(ossl_bin2bn(a, a_bin, R_SIZE)); - GUARD(ossl_bin2bn(b, b_bin, R_SIZE)); - - if(BN_GF2m_add(r, a, b) == 0) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - - GUARD(ossl_bn2bin(res_bin, r, R_SIZE)); - - return SUCCESS; -} - -// Perform a cyclic product by using OpenSSL. -_INLINE_ ret_t -ossl_cyclic_product(OUT BIGNUM *r, - IN const BIGNUM *a, - IN const BIGNUM *b, - BN_CTX * bn_ctx) -{ - BIGNUM *m = BN_CTX_get(bn_ctx); - if(NULL == m) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - - // m = x^PARAM_R - 1 - if((BN_set_bit(m, R_BITS) == 0) || (BN_set_bit(m, 0) == 0)) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - - // r = a*b mod m - if(BN_GF2m_mod_mul(r, a, b, m, bn_ctx) == 0) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - - return SUCCESS; -} - -// Perform a cyclic product by using OpenSSL. -ret_t -cyclic_product(OUT uint8_t res_bin[R_SIZE], - IN const uint8_t a_bin[R_SIZE], - IN const uint8_t b_bin[R_SIZE]) -{ - DEFER_CLEANUP(BN_CTX *bn_ctx = BN_CTX_new(), BN_CTX_cleanup_pointer); - BIGNUM *r = NULL; - BIGNUM *a = NULL; - BIGNUM *b = NULL; - - if(NULL == bn_ctx) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - - BN_CTX_start(bn_ctx); - - r = BN_CTX_get(bn_ctx); - a = BN_CTX_get(bn_ctx); - b = BN_CTX_get(bn_ctx); - - if((NULL == r) || (NULL == a) || (NULL == b)) - { - BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); - } - - GUARD(ossl_bin2bn(a, a_bin, R_SIZE)); - GUARD(ossl_bin2bn(b, b_bin, R_SIZE)); - GUARD(ossl_cyclic_product(r, a, b, bn_ctx)); - GUARD(ossl_bn2bin(res_bin, r, R_SIZE)); - - return SUCCESS; -} - -#endif // USE_OPENSSL_GF2M +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron, + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#include "openssl_utils.h" +#include "utilities.h" +#include <assert.h> +#include <openssl/bn.h> +#include <string.h> + +#ifdef USE_OPENSSL_GF2M + +# define MAX_OPENSSL_INV_TRIALS 1000 + +_INLINE_ void +BN_CTX_cleanup(BN_CTX *ctx) +{ + if(ctx) + { + BN_CTX_end(ctx); + BN_CTX_free(ctx); + } +} + +DEFINE_POINTER_CLEANUP_FUNC(BN_CTX *, BN_CTX_cleanup); + +// Loading (big) numbers into OpenSSL should use Big Endian representation. +// Therefore, the bytes ordering of the number should be reversed. +_INLINE_ void +reverse_endian(OUT uint8_t *res, IN const uint8_t *in, IN const uint32_t n) +{ + uint32_t i; + + for(i = 0; i < (n / 2); i++) + { + uint64_t tmp = in[i]; + res[i] = in[n - 1 - i]; + res[n - 1 - i] = tmp; + } + + // If the number of blocks is odd, swap also the middle block. + if(n % 2) + { + res[i] = in[i]; + } +} + +_INLINE_ ret_t +ossl_bn2bin(OUT uint8_t *out, IN const BIGNUM *in, IN const uint32_t size) +{ + assert(size <= N_SIZE); + uint8_t be_tmp[N_SIZE] = {0}; + + memset(out, 0, size); + + if(BN_bn2bin(in, be_tmp) == -1) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + reverse_endian(out, be_tmp, BN_num_bytes(in)); + + return SUCCESS; +} + +_INLINE_ ret_t +ossl_bin2bn(IN BIGNUM *out, OUT const uint8_t *in, IN const uint32_t size) +{ + assert(size <= N_SIZE); + uint8_t be_tmp[N_SIZE] = {0}; + + reverse_endian(be_tmp, in, size); + + if(BN_bin2bn(be_tmp, size, out) == 0) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + + return SUCCESS; +} + +ret_t +ossl_add(OUT uint8_t res_bin[R_SIZE], + IN const uint8_t a_bin[R_SIZE], + IN const uint8_t b_bin[R_SIZE]) +{ + DEFER_CLEANUP(BN_CTX *bn_ctx = BN_CTX_new(), BN_CTX_cleanup_pointer); + BIGNUM *r = NULL; + BIGNUM *a = NULL; + BIGNUM *b = NULL; + + if(NULL == bn_ctx) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + + BN_CTX_start(bn_ctx); + + r = BN_CTX_get(bn_ctx); + a = BN_CTX_get(bn_ctx); + b = BN_CTX_get(bn_ctx); + + if((NULL == r) || (NULL == a) || (NULL == b)) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + + GUARD(ossl_bin2bn(a, a_bin, R_SIZE)); + GUARD(ossl_bin2bn(b, b_bin, R_SIZE)); + + if(BN_GF2m_add(r, a, b) == 0) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + + GUARD(ossl_bn2bin(res_bin, r, R_SIZE)); + + return SUCCESS; +} + +// Perform a cyclic product by using OpenSSL. +_INLINE_ ret_t +ossl_cyclic_product(OUT BIGNUM *r, + IN const BIGNUM *a, + IN const BIGNUM *b, + BN_CTX * bn_ctx) +{ + BIGNUM *m = BN_CTX_get(bn_ctx); + if(NULL == m) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + + // m = x^PARAM_R - 1 + if((BN_set_bit(m, R_BITS) == 0) || (BN_set_bit(m, 0) == 0)) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + + // r = a*b mod m + if(BN_GF2m_mod_mul(r, a, b, m, bn_ctx) == 0) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + + return SUCCESS; +} + +// Perform a cyclic product by using OpenSSL. +ret_t +cyclic_product(OUT uint8_t res_bin[R_SIZE], + IN const uint8_t a_bin[R_SIZE], + IN const uint8_t b_bin[R_SIZE]) +{ + DEFER_CLEANUP(BN_CTX *bn_ctx = BN_CTX_new(), BN_CTX_cleanup_pointer); + BIGNUM *r = NULL; + BIGNUM *a = NULL; + BIGNUM *b = NULL; + + if(NULL == bn_ctx) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + + BN_CTX_start(bn_ctx); + + r = BN_CTX_get(bn_ctx); + a = BN_CTX_get(bn_ctx); + b = BN_CTX_get(bn_ctx); + + if((NULL == r) || (NULL == a) || (NULL == b)) + { + BIKE_ERROR(EXTERNAL_LIB_ERROR_OPENSSL); + } + + GUARD(ossl_bin2bn(a, a_bin, R_SIZE)); + GUARD(ossl_bin2bn(b, b_bin, R_SIZE)); + GUARD(ossl_cyclic_product(r, a, b, bn_ctx)); + GUARD(ossl_bn2bin(res_bin, r, R_SIZE)); + + return SUCCESS; +} + +#endif // USE_OPENSSL_GF2M diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/openssl_utils.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/openssl_utils.h index 59438b6d70..4f1c55bd94 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/openssl_utils.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/openssl_utils.h @@ -1,33 +1,33 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron, - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -#include "types.h" - -#ifdef USE_OPENSSL -# include <openssl/bn.h> -# ifndef OPENSSL_NO_EC2M -# define USE_OPENSSL_GF2M 1 -# endif -#endif - -#ifdef USE_OPENSSL_GF2M - -ret_t -ossl_add(OUT uint8_t res_bin[R_SIZE], - IN const uint8_t a_bin[R_SIZE], - IN const uint8_t b_bin[R_SIZE]); - -// Perform cyclic product by using OpenSSL -ret_t -cyclic_product(OUT uint8_t res_bin[R_SIZE], - IN const uint8_t a_bin[R_SIZE], - IN const uint8_t b_bin[R_SIZE]); - -#endif +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron, + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +#include "types.h" + +#ifdef USE_OPENSSL +# include <openssl/bn.h> +# ifndef OPENSSL_NO_EC2M +# define USE_OPENSSL_GF2M 1 +# endif +#endif + +#ifdef USE_OPENSSL_GF2M + +ret_t +ossl_add(OUT uint8_t res_bin[R_SIZE], + IN const uint8_t a_bin[R_SIZE], + IN const uint8_t b_bin[R_SIZE]); + +// Perform cyclic product by using OpenSSL +ret_t +cyclic_product(OUT uint8_t res_bin[R_SIZE], + IN const uint8_t a_bin[R_SIZE], + IN const uint8_t b_bin[R_SIZE]); + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/sampling.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/sampling.c index 3686338fad..1efde4ddd1 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/sampling.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/sampling.c @@ -1,118 +1,118 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#include "sampling.h" -#include <assert.h> -#include <string.h> - -_INLINE_ ret_t -get_rand_mod_len(OUT uint32_t * rand_pos, - IN const uint32_t len, - IN OUT aes_ctr_prf_state_t *prf_state) -{ - const uint64_t mask = MASK(bit_scan_reverse(len)); - - do - { - // Generate 128bit of random numbers - GUARD(aes_ctr_prf((uint8_t *)rand_pos, prf_state, sizeof(*rand_pos))); - - // Mask only relevant bits - (*rand_pos) &= mask; - - // Break if a number smaller than len is found - if((*rand_pos) < len) - { - break; - } - - } while(1); - - return SUCCESS; -} - -_INLINE_ void -make_odd_weight(IN OUT r_t *r) -{ - if(((r_bits_vector_weight(r) % 2) == 1)) - { - // Already odd - return; - } - - r->raw[0] ^= 1; -} - -// IN: must_be_odd - 1 true, 0 not -ret_t -sample_uniform_r_bits_with_fixed_prf_context(OUT r_t *r, - IN OUT - aes_ctr_prf_state_t *prf_state, - IN const must_be_odd_t must_be_odd) -{ - // Generate random data - GUARD(aes_ctr_prf(r->raw, prf_state, R_SIZE)); - - // Mask upper bits of the MSByte - r->raw[R_SIZE - 1] &= MASK(R_BITS + 8 - (R_SIZE * 8)); - - if(must_be_odd == MUST_BE_ODD) - { - make_odd_weight(r); - } - - return SUCCESS; -} - -_INLINE_ int -is_new(IN const idx_t wlist[], IN const uint32_t ctr) -{ - for(uint32_t i = 0; i < ctr; i++) - { - if(wlist[i] == wlist[ctr]) - { - return 0; - } - } - - return 1; -} - -// Assumption 1) paddded_len % 64 = 0! -// Assumption 2) a is a len bits array. It is padded to be a padded_len -// bytes array. The padded area may be modified and should -// be ignored outside the function scope. -ret_t -generate_sparse_rep(OUT uint64_t * a, - OUT idx_t wlist[], - IN const uint32_t weight, - IN const uint32_t len, - IN const uint32_t padded_len, - IN OUT aes_ctr_prf_state_t *prf_state) -{ - assert(padded_len % 64 == 0); - // Bits comparison - assert((padded_len * 8) >= len); - - uint64_t ctr = 0; - - // Generate weight rand numbers - do - { - GUARD(get_rand_mod_len(&wlist[ctr], len, prf_state)); - ctr += is_new(wlist, ctr); - } while(ctr < weight); - - // Initialize to zero - memset(a, 0, (len + 7) >> 3); - - // Assign values to "a" - secure_set_bits(a, wlist, padded_len, weight); - - return SUCCESS; -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#include "sampling.h" +#include <assert.h> +#include <string.h> + +_INLINE_ ret_t +get_rand_mod_len(OUT uint32_t * rand_pos, + IN const uint32_t len, + IN OUT aes_ctr_prf_state_t *prf_state) +{ + const uint64_t mask = MASK(bit_scan_reverse(len)); + + do + { + // Generate 128bit of random numbers + GUARD(aes_ctr_prf((uint8_t *)rand_pos, prf_state, sizeof(*rand_pos))); + + // Mask only relevant bits + (*rand_pos) &= mask; + + // Break if a number smaller than len is found + if((*rand_pos) < len) + { + break; + } + + } while(1); + + return SUCCESS; +} + +_INLINE_ void +make_odd_weight(IN OUT r_t *r) +{ + if(((r_bits_vector_weight(r) % 2) == 1)) + { + // Already odd + return; + } + + r->raw[0] ^= 1; +} + +// IN: must_be_odd - 1 true, 0 not +ret_t +sample_uniform_r_bits_with_fixed_prf_context(OUT r_t *r, + IN OUT + aes_ctr_prf_state_t *prf_state, + IN const must_be_odd_t must_be_odd) +{ + // Generate random data + GUARD(aes_ctr_prf(r->raw, prf_state, R_SIZE)); + + // Mask upper bits of the MSByte + r->raw[R_SIZE - 1] &= MASK(R_BITS + 8 - (R_SIZE * 8)); + + if(must_be_odd == MUST_BE_ODD) + { + make_odd_weight(r); + } + + return SUCCESS; +} + +_INLINE_ int +is_new(IN const idx_t wlist[], IN const uint32_t ctr) +{ + for(uint32_t i = 0; i < ctr; i++) + { + if(wlist[i] == wlist[ctr]) + { + return 0; + } + } + + return 1; +} + +// Assumption 1) paddded_len % 64 = 0! +// Assumption 2) a is a len bits array. It is padded to be a padded_len +// bytes array. The padded area may be modified and should +// be ignored outside the function scope. +ret_t +generate_sparse_rep(OUT uint64_t * a, + OUT idx_t wlist[], + IN const uint32_t weight, + IN const uint32_t len, + IN const uint32_t padded_len, + IN OUT aes_ctr_prf_state_t *prf_state) +{ + assert(padded_len % 64 == 0); + // Bits comparison + assert((padded_len * 8) >= len); + + uint64_t ctr = 0; + + // Generate weight rand numbers + do + { + GUARD(get_rand_mod_len(&wlist[ctr], len, prf_state)); + ctr += is_new(wlist, ctr); + } while(ctr < weight); + + // Initialize to zero + memset(a, 0, (len + 7) >> 3); + + // Assign values to "a" + secure_set_bits(a, wlist, padded_len, weight); + + return SUCCESS; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/sampling.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/sampling.h index 1ffd56f34a..8d6caa6d7c 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/sampling.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/sampling.h @@ -1,78 +1,78 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -#include "aes_ctr_prf.h" -#include "pq-crypto/s2n_pq_random.h" -#include "utils/s2n_result.h" -#include "utilities.h" - -typedef enum -{ - NO_RESTRICTION = 0, - MUST_BE_ODD = 1 -} must_be_odd_t; - -_INLINE_ ret_t -get_seeds(OUT seeds_t *seeds) -{ - if(s2n_result_is_ok(s2n_get_random_bytes(seeds->seed[0].raw, sizeof(seeds_t)))) - { - return SUCCESS; - } - else - { - BIKE_ERROR(E_FAIL_TO_GET_SEED); - } -} - -// Return's an array of r pseudorandom bits -// No restrictions exist for the top or bottom bits - -// in case an odd number is required then set must_be_odd=1 -// Uses the provided prf context -ret_t -sample_uniform_r_bits_with_fixed_prf_context(OUT r_t *r, - IN OUT - aes_ctr_prf_state_t *prf_state, - IN must_be_odd_t must_be_odd); - -// Return's an array of r pseudorandom bits -// No restrictions exist for the top or bottom bits - -// in case an odd number is required then set must_be_odd=1 -_INLINE_ ret_t -sample_uniform_r_bits(OUT r_t *r, - IN const seed_t * seed, - IN const must_be_odd_t must_be_odd) -{ - // For the seedexpander - DEFER_CLEANUP(aes_ctr_prf_state_t prf_state = {0}, aes_ctr_prf_state_cleanup); - - GUARD(init_aes_ctr_prf_state(&prf_state, MAX_AES_INVOKATION, seed)); - - GUARD(sample_uniform_r_bits_with_fixed_prf_context(r, &prf_state, must_be_odd)); - - return SUCCESS; -} - -// Generate a pseudorandom r of length len with a set weight -// Using the pseudorandom ctx supplied -// Outputs also a compressed (not ordered) list of indices -ret_t -generate_sparse_rep(OUT uint64_t *a, - OUT idx_t wlist[], - IN uint32_t weight, - IN uint32_t len, - IN uint32_t padded_len, - IN OUT aes_ctr_prf_state_t *prf_state); - -EXTERNC void -secure_set_bits(IN OUT uint64_t *a, - IN const idx_t wlist[], - IN uint32_t a_len, - IN uint32_t weight); +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +#include "aes_ctr_prf.h" +#include "pq-crypto/s2n_pq_random.h" +#include "utils/s2n_result.h" +#include "utilities.h" + +typedef enum +{ + NO_RESTRICTION = 0, + MUST_BE_ODD = 1 +} must_be_odd_t; + +_INLINE_ ret_t +get_seeds(OUT seeds_t *seeds) +{ + if(s2n_result_is_ok(s2n_get_random_bytes(seeds->seed[0].raw, sizeof(seeds_t)))) + { + return SUCCESS; + } + else + { + BIKE_ERROR(E_FAIL_TO_GET_SEED); + } +} + +// Return's an array of r pseudorandom bits +// No restrictions exist for the top or bottom bits - +// in case an odd number is required then set must_be_odd=1 +// Uses the provided prf context +ret_t +sample_uniform_r_bits_with_fixed_prf_context(OUT r_t *r, + IN OUT + aes_ctr_prf_state_t *prf_state, + IN must_be_odd_t must_be_odd); + +// Return's an array of r pseudorandom bits +// No restrictions exist for the top or bottom bits - +// in case an odd number is required then set must_be_odd=1 +_INLINE_ ret_t +sample_uniform_r_bits(OUT r_t *r, + IN const seed_t * seed, + IN const must_be_odd_t must_be_odd) +{ + // For the seedexpander + DEFER_CLEANUP(aes_ctr_prf_state_t prf_state = {0}, aes_ctr_prf_state_cleanup); + + GUARD(init_aes_ctr_prf_state(&prf_state, MAX_AES_INVOKATION, seed)); + + GUARD(sample_uniform_r_bits_with_fixed_prf_context(r, &prf_state, must_be_odd)); + + return SUCCESS; +} + +// Generate a pseudorandom r of length len with a set weight +// Using the pseudorandom ctx supplied +// Outputs also a compressed (not ordered) list of indices +ret_t +generate_sparse_rep(OUT uint64_t *a, + OUT idx_t wlist[], + IN uint32_t weight, + IN uint32_t len, + IN uint32_t padded_len, + IN OUT aes_ctr_prf_state_t *prf_state); + +EXTERNC void +secure_set_bits(IN OUT uint64_t *a, + IN const idx_t wlist[], + IN uint32_t a_len, + IN uint32_t weight); diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/sampling_portable.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/sampling_portable.c index 1ae7a6f247..e41e6b5cf2 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/sampling_portable.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/sampling_portable.c @@ -1,48 +1,48 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#include "sampling.h" -#include <assert.h> - -#define MAX_WEIGHT (T1 > DV ? T1 : DV) - -// This implementation assumes that the wlist contains fake list -void -secure_set_bits(IN OUT uint64_t * a, - IN const idx_t wlist[], - IN const uint32_t a_len_bytes, - IN const uint32_t weight) -{ - assert(a_len_bytes % 8 == 0); - - // Set arrays to the maximum possible for the stack protector - assert(weight <= MAX_WEIGHT); - uint64_t qw_pos[MAX_WEIGHT]; - uint64_t bit_pos[MAX_WEIGHT]; - - // 1. Identify the QW position of each value and the bit position inside this - // QW. - for(uint32_t j = 0; j < weight; j++) - { - qw_pos[j] = wlist[j] >> 6; - bit_pos[j] = BIT(wlist[j] & 0x3f); - } - - // 2. Fill each QW in a constant time. - for(uint32_t qw = 0; qw < (a_len_bytes / 8); qw++) - { - uint64_t tmp = 0; - for(uint32_t j = 0; j < weight; j++) - { - uint64_t mask = (-1ULL) + (!secure_cmp32(qw_pos[j], qw)); - tmp |= (bit_pos[j] & mask); - } - // Set the bit in a masked way - a[qw] |= tmp; - } -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#include "sampling.h" +#include <assert.h> + +#define MAX_WEIGHT (T1 > DV ? T1 : DV) + +// This implementation assumes that the wlist contains fake list +void +secure_set_bits(IN OUT uint64_t * a, + IN const idx_t wlist[], + IN const uint32_t a_len_bytes, + IN const uint32_t weight) +{ + assert(a_len_bytes % 8 == 0); + + // Set arrays to the maximum possible for the stack protector + assert(weight <= MAX_WEIGHT); + uint64_t qw_pos[MAX_WEIGHT]; + uint64_t bit_pos[MAX_WEIGHT]; + + // 1. Identify the QW position of each value and the bit position inside this + // QW. + for(uint32_t j = 0; j < weight; j++) + { + qw_pos[j] = wlist[j] >> 6; + bit_pos[j] = BIT(wlist[j] & 0x3f); + } + + // 2. Fill each QW in a constant time. + for(uint32_t qw = 0; qw < (a_len_bytes / 8); qw++) + { + uint64_t tmp = 0; + for(uint32_t j = 0; j < weight; j++) + { + uint64_t mask = (-1ULL) + (!secure_cmp32(qw_pos[j], qw)); + tmp |= (bit_pos[j] & mask); + } + // Set the bit in a masked way + a[qw] |= tmp; + } +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/secure_decode_portable.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/secure_decode_portable.c index 963c3257b7..dc4fbe01d8 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/secure_decode_portable.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/secure_decode_portable.c @@ -1,66 +1,66 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#include "decode.h" -#include "utilities.h" - -#define R_QW_HALF_LOG2 UPTOPOW2(R_QW / 2) - -_INLINE_ void -rotr_big(OUT syndrome_t *out, IN const syndrome_t *in, IN size_t qw_num) -{ - // For preventing overflows (comparison in bytes) - bike_static_assert(sizeof(*out) > 8 * (R_QW + (2 * R_QW_HALF_LOG2)), - rotr_big_err); - - memcpy(out, in, sizeof(*in)); - - for(uint32_t idx = R_QW_HALF_LOG2; idx >= 1; idx >>= 1) - { - // Convert 32 bit mask to 64 bit mask - const uint64_t mask = ((uint32_t)secure_l32_mask(qw_num, idx) + 1U) - 1ULL; - qw_num = qw_num - (idx & mask); - - // Rotate R_QW quadwords and another idx quadwords needed by the next - // iteration - for(size_t i = 0; i < (R_QW + idx); i++) - { - out->qw[i] = (out->qw[i] & (~mask)) | (out->qw[i + idx] & mask); - } - } -} - -_INLINE_ void -rotr_small(OUT syndrome_t *out, IN const syndrome_t *in, IN const size_t bits) -{ - bike_static_assert(bits < 64, rotr_small_err); - bike_static_assert(sizeof(*out) > (8 * R_QW), rotr_small_qw_err); - - // Convert |bits| to 0/1 by using !!bits then create a mask of 0 or 0xffffffffff - // Use high_shift to avoid undefined behaviour when doing x << 64; - const uint64_t mask = (0 - (!!bits)); - const uint64_t high_shift = (64 - bits) & mask; - - for(size_t i = 0; i < R_QW; i++) - { - const uint64_t low_part = in->qw[i] >> bits; - const uint64_t high_part = (in->qw[i + 1] << high_shift) & mask; - out->qw[i] = low_part | high_part; - } -} - -void -rotate_right(OUT syndrome_t *out, - IN const syndrome_t *in, - IN const uint32_t bitscount) -{ - // Rotate (64-bit) quad-words - rotr_big(out, in, (bitscount / 64)); - // Rotate bits (less than 64) - rotr_small(out, out, (bitscount % 64)); -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#include "decode.h" +#include "utilities.h" + +#define R_QW_HALF_LOG2 UPTOPOW2(R_QW / 2) + +_INLINE_ void +rotr_big(OUT syndrome_t *out, IN const syndrome_t *in, IN size_t qw_num) +{ + // For preventing overflows (comparison in bytes) + bike_static_assert(sizeof(*out) > 8 * (R_QW + (2 * R_QW_HALF_LOG2)), + rotr_big_err); + + memcpy(out, in, sizeof(*in)); + + for(uint32_t idx = R_QW_HALF_LOG2; idx >= 1; idx >>= 1) + { + // Convert 32 bit mask to 64 bit mask + const uint64_t mask = ((uint32_t)secure_l32_mask(qw_num, idx) + 1U) - 1ULL; + qw_num = qw_num - (idx & mask); + + // Rotate R_QW quadwords and another idx quadwords needed by the next + // iteration + for(size_t i = 0; i < (R_QW + idx); i++) + { + out->qw[i] = (out->qw[i] & (~mask)) | (out->qw[i + idx] & mask); + } + } +} + +_INLINE_ void +rotr_small(OUT syndrome_t *out, IN const syndrome_t *in, IN const size_t bits) +{ + bike_static_assert(bits < 64, rotr_small_err); + bike_static_assert(sizeof(*out) > (8 * R_QW), rotr_small_qw_err); + + // Convert |bits| to 0/1 by using !!bits then create a mask of 0 or 0xffffffffff + // Use high_shift to avoid undefined behaviour when doing x << 64; + const uint64_t mask = (0 - (!!bits)); + const uint64_t high_shift = (64 - bits) & mask; + + for(size_t i = 0; i < R_QW; i++) + { + const uint64_t low_part = in->qw[i] >> bits; + const uint64_t high_part = (in->qw[i + 1] << high_shift) & mask; + out->qw[i] = low_part | high_part; + } +} + +void +rotate_right(OUT syndrome_t *out, + IN const syndrome_t *in, + IN const uint32_t bitscount) +{ + // Rotate (64-bit) quad-words + rotr_big(out, in, (bitscount / 64)); + // Rotate bits (less than 64) + rotr_small(out, out, (bitscount % 64)); +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/sha.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/sha.h index 63687055f2..f323cd6b67 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/sha.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/sha.h @@ -1,41 +1,41 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -#include "cleanup.h" -#include "types.h" -#include "utilities.h" -#include <openssl/sha.h> - -#define SHA384_HASH_SIZE 48ULL -#define SHA384_HASH_QWORDS (SHA384_HASH_SIZE / 8) - -typedef struct sha384_hash_s -{ - union { - uint8_t raw[SHA384_HASH_SIZE]; - uint64_t qw[SHA384_HASH_QWORDS]; - } u; -} sha384_hash_t; -bike_static_assert(sizeof(sha384_hash_t) == SHA384_HASH_SIZE, sha384_hash_size); - -typedef sha384_hash_t sha_hash_t; - -_INLINE_ void -sha_hash_cleanup(IN OUT sha_hash_t *o) -{ - secure_clean(o->u.raw, sizeof(*o)); -} - -_INLINE_ int -sha(OUT sha_hash_t *hash_out, IN const uint32_t byte_len, IN const uint8_t *msg) -{ - SHA384(msg, byte_len, hash_out->u.raw); - return 1; -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +#include "cleanup.h" +#include "types.h" +#include "utilities.h" +#include <openssl/sha.h> + +#define SHA384_HASH_SIZE 48ULL +#define SHA384_HASH_QWORDS (SHA384_HASH_SIZE / 8) + +typedef struct sha384_hash_s +{ + union { + uint8_t raw[SHA384_HASH_SIZE]; + uint64_t qw[SHA384_HASH_QWORDS]; + } u; +} sha384_hash_t; +bike_static_assert(sizeof(sha384_hash_t) == SHA384_HASH_SIZE, sha384_hash_size); + +typedef sha384_hash_t sha_hash_t; + +_INLINE_ void +sha_hash_cleanup(IN OUT sha_hash_t *o) +{ + secure_clean(o->u.raw, sizeof(*o)); +} + +_INLINE_ int +sha(OUT sha_hash_t *hash_out, IN const uint32_t byte_len, IN const uint8_t *msg) +{ + SHA384(msg, byte_len, hash_out->u.raw); + return 1; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/types.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/types.h index 044b7ee38e..647efdf811 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/types.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/types.h @@ -1,139 +1,139 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -#include "bike_defs.h" -#include "error.h" -#include <stdint.h> - -typedef struct uint128_s -{ - union { - uint8_t bytes[16]; - uint32_t dw[4]; - uint64_t qw[2]; - } u; -} uint128_t; - -// Make sure no compiler optimizations. -#pragma pack(push, 1) - -typedef struct seed_s -{ - uint8_t raw[32]; -} seed_t; - -typedef struct seeds_s -{ - seed_t seed[NUM_OF_SEEDS]; -} seeds_t; - -typedef struct r_s -{ - uint8_t raw[R_SIZE]; -} r_t; - -typedef struct e_s -{ - uint8_t raw[N_SIZE]; -} e_t; - -typedef struct generic_param_n_s -{ - r_t val[N0]; -} generic_param_n_t; - -typedef generic_param_n_t ct_t; -typedef generic_param_n_t pk_t; -typedef generic_param_n_t split_e_t; - -typedef uint32_t idx_t; - -typedef struct compressed_idx_dv_s -{ - idx_t val[DV]; -} compressed_idx_dv_t; - -typedef compressed_idx_dv_t compressed_idx_dv_ar_t[N0]; - -typedef struct compressed_idx_t_t -{ - idx_t val[T1]; -} compressed_idx_t_t; - -// The secret key holds both representation for avoiding -// the compression in the decaps stage -typedef struct sk_s -{ - compressed_idx_dv_ar_t wlist; - r_t bin[N0]; -#ifndef INDCPA - r_t sigma0; - r_t sigma1; -#endif -} sk_t; - -// Pad e to the next Block -typedef ALIGN(8) struct padded_e_s -{ - e_t val; - uint8_t pad[N_PADDED_SIZE - N_SIZE]; -} padded_e_t; - -// Pad r to the next Block -typedef ALIGN(8) struct padded_r_s -{ - r_t val; - uint8_t pad[R_PADDED_SIZE - R_SIZE]; -} padded_r_t; - -typedef padded_r_t padded_param_n_t[N0]; -typedef padded_param_n_t pad_sk_t; -typedef padded_param_n_t pad_pk_t; -typedef padded_param_n_t pad_ct_t; - -// Need to allocate twice the room for the results -typedef ALIGN(8) struct dbl_padded_r_s -{ - r_t val; - uint8_t pad[(2 * R_PADDED_SIZE) - R_SIZE]; -} dbl_padded_r_t; - -typedef dbl_padded_r_t dbl_padded_param_n_t[N0]; -typedef dbl_padded_param_n_t dbl_pad_pk_t; -typedef dbl_padded_param_n_t dbl_pad_ct_t; -typedef dbl_padded_param_n_t dbl_pad_syndrome_t; - -typedef struct ss_s -{ - uint8_t raw[ELL_K_SIZE]; -} ss_t; - -// For optimization purposes -// 1- For a faster rotate we duplicate the syndrome (dup1/2) -// 2- We extend it to fit the boundary of DDQW -typedef ALIGN(64) struct syndrome_s -{ - uint64_t qw[3 * R_QW]; -} syndrome_t; - -typedef struct upc_slice_s -{ - union { - padded_r_t r; - uint64_t qw[sizeof(padded_r_t) / 8]; - } u; -} upc_slice_t; - -typedef struct upc_s -{ - upc_slice_t slice[SLICES]; -} upc_t; - -#pragma pack(pop) +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +#include "bike_defs.h" +#include "error.h" +#include <stdint.h> + +typedef struct uint128_s +{ + union { + uint8_t bytes[16]; + uint32_t dw[4]; + uint64_t qw[2]; + } u; +} uint128_t; + +// Make sure no compiler optimizations. +#pragma pack(push, 1) + +typedef struct seed_s +{ + uint8_t raw[32]; +} seed_t; + +typedef struct seeds_s +{ + seed_t seed[NUM_OF_SEEDS]; +} seeds_t; + +typedef struct r_s +{ + uint8_t raw[R_SIZE]; +} r_t; + +typedef struct e_s +{ + uint8_t raw[N_SIZE]; +} e_t; + +typedef struct generic_param_n_s +{ + r_t val[N0]; +} generic_param_n_t; + +typedef generic_param_n_t ct_t; +typedef generic_param_n_t pk_t; +typedef generic_param_n_t split_e_t; + +typedef uint32_t idx_t; + +typedef struct compressed_idx_dv_s +{ + idx_t val[DV]; +} compressed_idx_dv_t; + +typedef compressed_idx_dv_t compressed_idx_dv_ar_t[N0]; + +typedef struct compressed_idx_t_t +{ + idx_t val[T1]; +} compressed_idx_t_t; + +// The secret key holds both representation for avoiding +// the compression in the decaps stage +typedef struct sk_s +{ + compressed_idx_dv_ar_t wlist; + r_t bin[N0]; +#ifndef INDCPA + r_t sigma0; + r_t sigma1; +#endif +} sk_t; + +// Pad e to the next Block +typedef ALIGN(8) struct padded_e_s +{ + e_t val; + uint8_t pad[N_PADDED_SIZE - N_SIZE]; +} padded_e_t; + +// Pad r to the next Block +typedef ALIGN(8) struct padded_r_s +{ + r_t val; + uint8_t pad[R_PADDED_SIZE - R_SIZE]; +} padded_r_t; + +typedef padded_r_t padded_param_n_t[N0]; +typedef padded_param_n_t pad_sk_t; +typedef padded_param_n_t pad_pk_t; +typedef padded_param_n_t pad_ct_t; + +// Need to allocate twice the room for the results +typedef ALIGN(8) struct dbl_padded_r_s +{ + r_t val; + uint8_t pad[(2 * R_PADDED_SIZE) - R_SIZE]; +} dbl_padded_r_t; + +typedef dbl_padded_r_t dbl_padded_param_n_t[N0]; +typedef dbl_padded_param_n_t dbl_pad_pk_t; +typedef dbl_padded_param_n_t dbl_pad_ct_t; +typedef dbl_padded_param_n_t dbl_pad_syndrome_t; + +typedef struct ss_s +{ + uint8_t raw[ELL_K_SIZE]; +} ss_t; + +// For optimization purposes +// 1- For a faster rotate we duplicate the syndrome (dup1/2) +// 2- We extend it to fit the boundary of DDQW +typedef ALIGN(64) struct syndrome_s +{ + uint64_t qw[3 * R_QW]; +} syndrome_t; + +typedef struct upc_slice_s +{ + union { + padded_r_t r; + uint64_t qw[sizeof(padded_r_t) / 8]; + } u; +} upc_slice_t; + +typedef struct upc_s +{ + upc_slice_t slice[SLICES]; +} upc_t; + +#pragma pack(pop) diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/utilities.c b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/utilities.c index 4f049af86a..baed622b78 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/utilities.c +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/utilities.c @@ -1,160 +1,160 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#include "utilities.h" -#include <inttypes.h> - -#define BITS_IN_QW 64ULL -#define BITS_IN_BYTE 8ULL - -// Print a new line only if we prints in qw blocks -_INLINE_ void -print_newline(IN const uint64_t qw_pos) -{ -#ifndef NO_NEWLINE - if((qw_pos % 4) == 3) - { - printf("\n "); - } -#endif -} - -// This function is stitched for R_BITS vector -uint64_t -r_bits_vector_weight(IN const r_t *in) -{ - uint64_t acc = 0; - for(size_t i = 0; i < (R_SIZE - 1); i++) - { - acc += __builtin_popcount(in->raw[i]); - } - - acc += __builtin_popcount(in->raw[R_SIZE - 1] & LAST_R_BYTE_MASK); - return acc; -} - -// Prints a QW in LE/BE in win/linux format -_INLINE_ void -print_uint64(IN const uint64_t val) -{ -// If printing in BE is required swap the order of bytes -#ifdef PRINT_IN_BE - uint64_t tmp = bswap_64(val); -#else - uint64_t tmp = val; -#endif - - printf("%.16" PRIx64, tmp); - -#ifndef NO_SPACE - printf(" "); -#endif -} - -// Last block requires a special handling as we should zero mask all the bits -// above the desired number endien - 0 - BE, 1 - LE Return 1 if last block was -// printed else 0 -_INLINE_ uint8_t -print_last_block(IN const uint8_t *last_bytes, - IN const uint32_t bits_num, - IN const uint32_t endien) -{ - // Floor of bits/64 the reminder is in the next QW - const uint32_t qw_num = bits_num / BITS_IN_QW; - - // How many bits to pad with zero - const uint32_t rem_bits = bits_num - (BITS_IN_QW * qw_num); - - // We read byte byte and not the whole QW to avoid reading bad memory address - const uint32_t bytes_num = ((rem_bits % 8) == 0) ? rem_bits / BITS_IN_BYTE - : 1 + rem_bits / BITS_IN_BYTE; - - // Must be signed for the LE loop - int i; - - if(0 == rem_bits) - { - return 0; - } - - // Mask unneeded bits - const uint8_t last_byte = (rem_bits % 8 == 0) - ? last_bytes[bytes_num - 1] - : last_bytes[bytes_num - 1] & MASK(rem_bits % 8); - // BE - if(0 == endien) - { - for(i = 0; (uint32_t)i < (bytes_num - 1); i++) - { - printf("%.2x", last_bytes[i]); - } - - printf("%.2x", last_byte); - - for(i++; (uint32_t)i < sizeof(uint64_t); i++) - { - printf("__"); - } - } - else - { - for(i = sizeof(uint64_t) - 1; (uint32_t)i >= bytes_num; i--) - { - printf("__"); - } - - printf("%.2x", last_byte); - - for(i--; i >= 0; i--) - { - printf("%.2x", last_bytes[i]); - } - } - -#ifndef NO_SPACE - printf(" "); -#endif - - return 1; -} - -void -print_LE(IN const uint64_t *in, IN const uint32_t bits_num) -{ - const uint32_t qw_num = bits_num / BITS_IN_QW; - - // Print the MSB QW - uint32_t qw_pos = print_last_block((const uint8_t *)&in[qw_num], bits_num, 1); - - // Print each 8 bytes separated by space (if required) - for(int i = ((int)qw_num) - 1; i >= 0; i--, qw_pos++) - { - print_uint64(in[i]); - print_newline(qw_pos); - } - - printf("\n"); -} - -void -print_BE(IN const uint64_t *in, IN const uint32_t bits_num) -{ - const uint32_t qw_num = bits_num / BITS_IN_QW; - - // Print each 16 numbers separatly - for(uint32_t i = 0; i < qw_num; ++i) - { - print_uint64(in[i]); - print_newline(i); - } - - // Print the MSB QW - print_last_block((const uint8_t *)&in[qw_num], bits_num, 0); - - printf("\n"); -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#include "utilities.h" +#include <inttypes.h> + +#define BITS_IN_QW 64ULL +#define BITS_IN_BYTE 8ULL + +// Print a new line only if we prints in qw blocks +_INLINE_ void +print_newline(IN const uint64_t qw_pos) +{ +#ifndef NO_NEWLINE + if((qw_pos % 4) == 3) + { + printf("\n "); + } +#endif +} + +// This function is stitched for R_BITS vector +uint64_t +r_bits_vector_weight(IN const r_t *in) +{ + uint64_t acc = 0; + for(size_t i = 0; i < (R_SIZE - 1); i++) + { + acc += __builtin_popcount(in->raw[i]); + } + + acc += __builtin_popcount(in->raw[R_SIZE - 1] & LAST_R_BYTE_MASK); + return acc; +} + +// Prints a QW in LE/BE in win/linux format +_INLINE_ void +print_uint64(IN const uint64_t val) +{ +// If printing in BE is required swap the order of bytes +#ifdef PRINT_IN_BE + uint64_t tmp = bswap_64(val); +#else + uint64_t tmp = val; +#endif + + printf("%.16" PRIx64, tmp); + +#ifndef NO_SPACE + printf(" "); +#endif +} + +// Last block requires a special handling as we should zero mask all the bits +// above the desired number endien - 0 - BE, 1 - LE Return 1 if last block was +// printed else 0 +_INLINE_ uint8_t +print_last_block(IN const uint8_t *last_bytes, + IN const uint32_t bits_num, + IN const uint32_t endien) +{ + // Floor of bits/64 the reminder is in the next QW + const uint32_t qw_num = bits_num / BITS_IN_QW; + + // How many bits to pad with zero + const uint32_t rem_bits = bits_num - (BITS_IN_QW * qw_num); + + // We read byte byte and not the whole QW to avoid reading bad memory address + const uint32_t bytes_num = ((rem_bits % 8) == 0) ? rem_bits / BITS_IN_BYTE + : 1 + rem_bits / BITS_IN_BYTE; + + // Must be signed for the LE loop + int i; + + if(0 == rem_bits) + { + return 0; + } + + // Mask unneeded bits + const uint8_t last_byte = (rem_bits % 8 == 0) + ? last_bytes[bytes_num - 1] + : last_bytes[bytes_num - 1] & MASK(rem_bits % 8); + // BE + if(0 == endien) + { + for(i = 0; (uint32_t)i < (bytes_num - 1); i++) + { + printf("%.2x", last_bytes[i]); + } + + printf("%.2x", last_byte); + + for(i++; (uint32_t)i < sizeof(uint64_t); i++) + { + printf("__"); + } + } + else + { + for(i = sizeof(uint64_t) - 1; (uint32_t)i >= bytes_num; i--) + { + printf("__"); + } + + printf("%.2x", last_byte); + + for(i--; i >= 0; i--) + { + printf("%.2x", last_bytes[i]); + } + } + +#ifndef NO_SPACE + printf(" "); +#endif + + return 1; +} + +void +print_LE(IN const uint64_t *in, IN const uint32_t bits_num) +{ + const uint32_t qw_num = bits_num / BITS_IN_QW; + + // Print the MSB QW + uint32_t qw_pos = print_last_block((const uint8_t *)&in[qw_num], bits_num, 1); + + // Print each 8 bytes separated by space (if required) + for(int i = ((int)qw_num) - 1; i >= 0; i--, qw_pos++) + { + print_uint64(in[i]); + print_newline(qw_pos); + } + + printf("\n"); +} + +void +print_BE(IN const uint64_t *in, IN const uint32_t bits_num) +{ + const uint32_t qw_num = bits_num / BITS_IN_QW; + + // Print each 16 numbers separatly + for(uint32_t i = 0; i < qw_num; ++i) + { + print_uint64(in[i]); + print_newline(i); + } + + // Print the MSB QW + print_last_block((const uint8_t *)&in[qw_num], bits_num, 0); + + printf("\n"); +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/utilities.h b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/utilities.h index be8f4b9b10..bd2f163183 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/bike_r2/utilities.h +++ b/contrib/restricted/aws/s2n/pq-crypto/bike_r2/utilities.h @@ -1,158 +1,158 @@ -/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * SPDX-License-Identifier: Apache-2.0" - * - * Written by Nir Drucker and Shay Gueron - * AWS Cryptographic Algorithms Group. - * (ndrucker@amazon.com, gueron@amazon.com) - */ - -#pragma once - -#include "cleanup.h" - -#ifndef bswap_64 -# define bswap_64(x) __builtin_bswap64(x) -#endif - -// Printing values in Little Endian -void -print_LE(IN const uint64_t *in, IN uint32_t bits_num); - -// Printing values in Big Endian -void -print_BE(IN const uint64_t *in, IN uint32_t bits_num); - -// Printing number is required only in verbose level 2 or above -#if VERBOSE >= 2 -# ifdef PRINT_IN_BE -// Print in Big Endian -# define print(name, in, bits_num) \ - do \ - { \ - EDMSG(name); \ - print_BE(in, bits_num); \ - } while(0) -# else -// Print in Little Endian -# define print(name, in, bits_num) \ - do \ - { \ - EDMSG(name); \ - print_LE(in, bits_num); \ - } while(0) -# endif -#else -// No prints at all -# define print(name, in, bits_num) -#endif - -// Comparing value in a constant time manner -_INLINE_ uint32_t -secure_cmp(IN const uint8_t *a, IN const uint8_t *b, IN const uint32_t size) -{ - volatile uint8_t res = 0; - - for(uint32_t i = 0; i < size; ++i) - { - res |= (a[i] ^ b[i]); - } - - return (0 == res); -} - -uint64_t -r_bits_vector_weight(IN const r_t *in); - -// Constant time -_INLINE_ uint32_t -iszero(IN const uint8_t *s, IN const uint32_t len) -{ - volatile uint32_t res = 0; - for(uint64_t i = 0; i < len; i++) - { - res |= s[i]; - } - return (0 == res); -} - -// BSR returns ceil(log2(val)) -_INLINE_ uint8_t -bit_scan_reverse(uint64_t val) -{ - // index is always smaller than 64 - uint8_t index = 0; - - while(val != 0) - { - val >>= 1; - index++; - } - - return index; -} - -// Return 1 if equal 0 otherwise -_INLINE_ uint32_t -secure_cmp32(IN const uint32_t v1, IN const uint32_t v2) -{ -#if defined(__aarch64__) - uint32_t res; - __asm__ __volatile__("cmp %w1, %w2; \n " - "cset %w0, EQ; \n" - : "=r"(res) - : "r"(v1), "r"(v2) - :); - return res; -#elif defined(__x86_64__) || defined(__i386__) - uint32_t res; - __asm__ __volatile__("xor %%edx, %%edx; \n" - "cmp %1, %2; \n " - "sete %%dl; \n" - "mov %%edx, %0; \n" - : "=r"(res) - : "r"(v1), "r"(v2) - : "rdx"); - return res; -#else - // Insecure comparison: The main purpose of secure_cmp32 is to avoid - // branches and thus to prevent potential side channel attacks. To do that - // we normally leverage some CPU special instructions such as "sete" - // (for __x86_64__) and "cset" (for __aarch64__). When dealing with general - // CPU architectures, the interpretation of the line below is left for the - // compiler, which may lead to an insecure branch. - return (v1 == v2 ? 1 : 0); -#endif -} - -// Return 0 if v1 < v2, (-1) otherwise -_INLINE_ uint32_t -secure_l32_mask(IN const uint32_t v1, IN const uint32_t v2) -{ -#if defined(__aarch64__) - uint32_t res; - __asm__ __volatile__("cmp %w2, %w1; \n " - "cset %w0, HI; \n" - : "=r"(res) - : "r"(v1), "r"(v2) - :); - return (res - 1); -#elif defined(__x86_64__) || defined(__i386__) - uint32_t res; - __asm__ __volatile__("xor %%edx, %%edx; \n" - "cmp %1, %2; \n " - "setl %%dl; \n" - "dec %%edx; \n" - "mov %%edx, %0; \n" - - : "=r"(res) - : "r"(v2), "r"(v1) - : "rdx"); - - return res; -#else - // If v1 >= v2 then the subtraction result is 0^32||(v1-v2) - // else it will be 1^32||(v2-v1+1). Subsequently, negating the upper - // 32 bits gives 0 if v1 < v2 and otherwise (-1). - return ~((uint32_t)(((uint64_t)v1 - (uint64_t)v2) >> 32)); -#endif -} +/* Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * SPDX-License-Identifier: Apache-2.0" + * + * Written by Nir Drucker and Shay Gueron + * AWS Cryptographic Algorithms Group. + * (ndrucker@amazon.com, gueron@amazon.com) + */ + +#pragma once + +#include "cleanup.h" + +#ifndef bswap_64 +# define bswap_64(x) __builtin_bswap64(x) +#endif + +// Printing values in Little Endian +void +print_LE(IN const uint64_t *in, IN uint32_t bits_num); + +// Printing values in Big Endian +void +print_BE(IN const uint64_t *in, IN uint32_t bits_num); + +// Printing number is required only in verbose level 2 or above +#if VERBOSE >= 2 +# ifdef PRINT_IN_BE +// Print in Big Endian +# define print(name, in, bits_num) \ + do \ + { \ + EDMSG(name); \ + print_BE(in, bits_num); \ + } while(0) +# else +// Print in Little Endian +# define print(name, in, bits_num) \ + do \ + { \ + EDMSG(name); \ + print_LE(in, bits_num); \ + } while(0) +# endif +#else +// No prints at all +# define print(name, in, bits_num) +#endif + +// Comparing value in a constant time manner +_INLINE_ uint32_t +secure_cmp(IN const uint8_t *a, IN const uint8_t *b, IN const uint32_t size) +{ + volatile uint8_t res = 0; + + for(uint32_t i = 0; i < size; ++i) + { + res |= (a[i] ^ b[i]); + } + + return (0 == res); +} + +uint64_t +r_bits_vector_weight(IN const r_t *in); + +// Constant time +_INLINE_ uint32_t +iszero(IN const uint8_t *s, IN const uint32_t len) +{ + volatile uint32_t res = 0; + for(uint64_t i = 0; i < len; i++) + { + res |= s[i]; + } + return (0 == res); +} + +// BSR returns ceil(log2(val)) +_INLINE_ uint8_t +bit_scan_reverse(uint64_t val) +{ + // index is always smaller than 64 + uint8_t index = 0; + + while(val != 0) + { + val >>= 1; + index++; + } + + return index; +} + +// Return 1 if equal 0 otherwise +_INLINE_ uint32_t +secure_cmp32(IN const uint32_t v1, IN const uint32_t v2) +{ +#if defined(__aarch64__) + uint32_t res; + __asm__ __volatile__("cmp %w1, %w2; \n " + "cset %w0, EQ; \n" + : "=r"(res) + : "r"(v1), "r"(v2) + :); + return res; +#elif defined(__x86_64__) || defined(__i386__) + uint32_t res; + __asm__ __volatile__("xor %%edx, %%edx; \n" + "cmp %1, %2; \n " + "sete %%dl; \n" + "mov %%edx, %0; \n" + : "=r"(res) + : "r"(v1), "r"(v2) + : "rdx"); + return res; +#else + // Insecure comparison: The main purpose of secure_cmp32 is to avoid + // branches and thus to prevent potential side channel attacks. To do that + // we normally leverage some CPU special instructions such as "sete" + // (for __x86_64__) and "cset" (for __aarch64__). When dealing with general + // CPU architectures, the interpretation of the line below is left for the + // compiler, which may lead to an insecure branch. + return (v1 == v2 ? 1 : 0); +#endif +} + +// Return 0 if v1 < v2, (-1) otherwise +_INLINE_ uint32_t +secure_l32_mask(IN const uint32_t v1, IN const uint32_t v2) +{ +#if defined(__aarch64__) + uint32_t res; + __asm__ __volatile__("cmp %w2, %w1; \n " + "cset %w0, HI; \n" + : "=r"(res) + : "r"(v1), "r"(v2) + :); + return (res - 1); +#elif defined(__x86_64__) || defined(__i386__) + uint32_t res; + __asm__ __volatile__("xor %%edx, %%edx; \n" + "cmp %1, %2; \n " + "setl %%dl; \n" + "dec %%edx; \n" + "mov %%edx, %0; \n" + + : "=r"(res) + : "r"(v2), "r"(v1) + : "rdx"); + + return res; +#else + // If v1 >= v2 then the subtraction result is 0^32||(v1-v2) + // else it will be 1^32||(v2-v1+1). Subsequently, negating the upper + // 32 bits gives 0 if v1 < v2 and otherwise (-1). + return ~((uint32_t)(((uint64_t)v1 - (uint64_t)v2) >> 32)); +#endif +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/aes.h b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/aes.h index a14d946b5d..ded80be742 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/aes.h +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/aes.h @@ -1,201 +1,201 @@ -/** - * \file aes.h - * \brief Header defining the API for OQS AES - * - * SPDX-License-Identifier: MIT - */ - -#ifndef OQS_AES_H -#define OQS_AES_H - -#include <stdint.h> -#include <stdlib.h> - -#define AES256_KEYBYTES 32 -#define AESCTR_NONCEBYTES 12 -#define AES_BLOCKBYTES 16 - -typedef void *aes256ctx; - -#define aes256_ecb_keyexp(r, key) OQS_AES256_ECB_load_schedule((key), (r), 1); -#define aes256_ecb(out, in, nblocks, ctx) OQS_AES256_ECB_enc_sch((in), (nblocks) * AES_BLOCKBYTES, *(ctx), (out)); -#define aes256_ctr_keyexp(r, key) OQS_AES256_CTR_load_schedule((key), (r)); -#define aes256_ctr(out, outlen, iv, ctx) OQS_AES256_CTR_sch((iv), AESCTR_NONCEBYTES, *(ctx), (out), (outlen)) -#define aes256_ctx_release(ctx) OQS_AES256_free_schedule(*(ctx)); - - -/** copied from common.h **/ - -#define OQS_EXIT_IF_NULLPTR(x) \ - do { \ - if ( (x) == (void*)0 ) \ - exit(EXIT_FAILURE); \ - } while (0) - -/** copied from common.c **/ - -/** - * Zeros out `len` bytes of memory starting at `ptr`. - * - * Designed to be protected against optimizing compilers which try to remove - * "unnecessary" operations. Should be used for all buffers containing secret - * data. - * - * @param[in] ptr The start of the memory to zero out. - * @param[in] len The number of bytes to zero out. - */ -void OQS_MEM_cleanse(void *ptr, size_t len); - -/** - * Zeros out `len` bytes of memory starting at `ptr`, then frees `ptr`. - * - * Can be called with `ptr = NULL`, in which case no operation is performed. - * - * Designed to be protected against optimizing compilers which try to remove - * "unnecessary" operations. Should be used for all buffers containing secret - * data. - * - * @param[in] ptr The start of the memory to zero out and free. - * @param[in] len The number of bytes to zero out. - */ -void OQS_MEM_secure_free(void *ptr, size_t len); - - - -/** - * Function to fill a key schedule given an initial key for use in ECB mode. - * - * @param key Initial Key. - * @param schedule Abstract data structure for a key schedule. - * @param for_encryption 1 if key schedule is for encryption, 0 if for decryption. - */ -void OQS_AES128_ECB_load_schedule(const uint8_t *key, void **schedule, int for_encryption); - -/** - * Function to free a key schedule. - * - * @param schedule Schedule generated with OQS_AES128_ECB_load_schedule(). - */ -void OQS_AES128_free_schedule(void *schedule); - -/** - * Function to encrypt blocks of plaintext using ECB mode. - * A schedule based on the key is generated and used internally. - * - * @param plaintext Plaintext to be encrypted. - * @param plaintext_len Length on the plaintext in bytes. Must be a multiple of 16. - * @param key Key to be used for encryption. - * @param ciphertext Pointer to a block of memory which >= in size to the plaintext block. The result will be written here. - */ -void OQS_AES128_ECB_enc(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext); - -/** - * Function to decrypt blocks of plaintext using ECB mode. - * A schedule based on the key is generated and used internally. - * - * @param ciphertext Ciphertext to be decrypted. - * @param ciphertext_len Length on the ciphertext in bytes. Must be a multiple of 16. - * @param key Key to be used for encryption. - * @param plaintext Pointer to a block of memory which >= in size to the ciphertext block. The result will be written here. - */ -void OQS_AES128_ECB_dec(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext); - -/** - * Same as OQS_AES128_ECB_enc() except a schedule generated by - * OQS_AES128_ECB_load_schedule() is passed rather then a key. This is faster - * if the same schedule is used for multiple encryptions since it does - * not have to be regenerated from the key. - */ -void OQS_AES128_ECB_enc_sch(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext); - -/** - * Same as OQS_AES128_ECB_dec() except a schedule generated by - * OQS_AES128_ECB_load_schedule() is passed rather then a key. This is faster - * if the same schedule is used for multiple encryptions since it does - * not have to be regenerated from the key. - */ -void OQS_AES128_ECB_dec_sch(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext); - -/** - * Function to fill a key schedule given an initial key for use in ECB mode. - * - * @param key Initial Key. - * @param schedule Abstract data structure for a key schedule. - * @param for_encryption 1 if key schedule is for encryption, 0 if for decryption. - */ -void OQS_AES256_ECB_load_schedule(const uint8_t *key, void **schedule, int for_encryption); - -/** - * Function to fill a key schedule given an initial key for use in CTR mode. - * - * @param key Initial Key. - * @param schedule Abstract data structure for a key schedule. - */ -void OQS_AES256_CTR_load_schedule(const uint8_t *key, void **schedule); - -/** - * Function to free a key schedule. - * - * @param schedule Schedule generated with OQS_AES256_ECB_load_schedule - * or OQS_AES256_CTR_load_schedule. - */ -void OQS_AES256_free_schedule(void *schedule); - -/** - * Function to encrypt blocks of plaintext using ECB mode. - * A schedule based on the key is generated and used internally. - * - * @param plaintext Plaintext to be encrypted. - * @param plaintext_len Length on the plaintext in bytes. Must be a multiple of 16. - * @param key Key to be used for encryption. - * @param ciphertext Pointer to a block of memory which >= in size to the plaintext block. The result will be written here. - */ -void OQS_AES256_ECB_enc(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext); - -/** - * Function to decrypt blocks of plaintext using ECB mode. - * A schedule based on the key is generated and used internally. - * - * @param ciphertext Ciphertext to be decrypted. - * @param ciphertext_len Length on the ciphertext in bytes. Must be a multiple of 16. - * @param key Key to be used for encryption. - * @param plaintext Pointer to a block of memory which >= in size to the ciphertext block. The result will be written here. - */ -void OQS_AES256_ECB_dec(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext); - -/** - * Same as OQS_AES256_ECB_enc() except a schedule generated by - * OQS_AES256_ECB_load_schedule() is passed rather then a key. This is faster - * if the same schedule is used for multiple encryptions since it does - * not have to be regenerated from the key. - */ -void OQS_AES256_ECB_enc_sch(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext); - -/** - * Same as OQS_AES256_ECB_dec() except a schedule generated by - * OQS_AES256_ECB_load_schedule() is passed rather then a key. This is faster - * if the same schedule is used for multiple encryptions since it does - * not have to be regenerated from the key. - */ -void OQS_AES256_ECB_dec_sch(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext); - -/** - * AES counter mode keystream generator. A scheduled generated by - * OQS_AES256_CTR_load_schedule() is passed rather then a key. - * - * Handles a 12- or 16-byte IV. If a 12-byte IV is given, then 4 counter - * bytes are initialized to all zeros. - * - * @param iv 12- or 16-byte initialization vector. - * @param iv_len Lengh of IV in bytes. - * @param schedule Abstract data structure for a key schedule. - * @param out Pointer to a block of memory which is big enough to contain out_len bytes; the result will be written here. - * @param out_len Length of output bytes to generate. - */ -void OQS_AES256_CTR_sch(const uint8_t *iv, size_t iv_len, const void *schedule, uint8_t *out, size_t out_len); - -#if defined(__cplusplus) -} // extern "C" -#endif - -#endif // OQS_AES_H +/** + * \file aes.h + * \brief Header defining the API for OQS AES + * + * SPDX-License-Identifier: MIT + */ + +#ifndef OQS_AES_H +#define OQS_AES_H + +#include <stdint.h> +#include <stdlib.h> + +#define AES256_KEYBYTES 32 +#define AESCTR_NONCEBYTES 12 +#define AES_BLOCKBYTES 16 + +typedef void *aes256ctx; + +#define aes256_ecb_keyexp(r, key) OQS_AES256_ECB_load_schedule((key), (r), 1); +#define aes256_ecb(out, in, nblocks, ctx) OQS_AES256_ECB_enc_sch((in), (nblocks) * AES_BLOCKBYTES, *(ctx), (out)); +#define aes256_ctr_keyexp(r, key) OQS_AES256_CTR_load_schedule((key), (r)); +#define aes256_ctr(out, outlen, iv, ctx) OQS_AES256_CTR_sch((iv), AESCTR_NONCEBYTES, *(ctx), (out), (outlen)) +#define aes256_ctx_release(ctx) OQS_AES256_free_schedule(*(ctx)); + + +/** copied from common.h **/ + +#define OQS_EXIT_IF_NULLPTR(x) \ + do { \ + if ( (x) == (void*)0 ) \ + exit(EXIT_FAILURE); \ + } while (0) + +/** copied from common.c **/ + +/** + * Zeros out `len` bytes of memory starting at `ptr`. + * + * Designed to be protected against optimizing compilers which try to remove + * "unnecessary" operations. Should be used for all buffers containing secret + * data. + * + * @param[in] ptr The start of the memory to zero out. + * @param[in] len The number of bytes to zero out. + */ +void OQS_MEM_cleanse(void *ptr, size_t len); + +/** + * Zeros out `len` bytes of memory starting at `ptr`, then frees `ptr`. + * + * Can be called with `ptr = NULL`, in which case no operation is performed. + * + * Designed to be protected against optimizing compilers which try to remove + * "unnecessary" operations. Should be used for all buffers containing secret + * data. + * + * @param[in] ptr The start of the memory to zero out and free. + * @param[in] len The number of bytes to zero out. + */ +void OQS_MEM_secure_free(void *ptr, size_t len); + + + +/** + * Function to fill a key schedule given an initial key for use in ECB mode. + * + * @param key Initial Key. + * @param schedule Abstract data structure for a key schedule. + * @param for_encryption 1 if key schedule is for encryption, 0 if for decryption. + */ +void OQS_AES128_ECB_load_schedule(const uint8_t *key, void **schedule, int for_encryption); + +/** + * Function to free a key schedule. + * + * @param schedule Schedule generated with OQS_AES128_ECB_load_schedule(). + */ +void OQS_AES128_free_schedule(void *schedule); + +/** + * Function to encrypt blocks of plaintext using ECB mode. + * A schedule based on the key is generated and used internally. + * + * @param plaintext Plaintext to be encrypted. + * @param plaintext_len Length on the plaintext in bytes. Must be a multiple of 16. + * @param key Key to be used for encryption. + * @param ciphertext Pointer to a block of memory which >= in size to the plaintext block. The result will be written here. + */ +void OQS_AES128_ECB_enc(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext); + +/** + * Function to decrypt blocks of plaintext using ECB mode. + * A schedule based on the key is generated and used internally. + * + * @param ciphertext Ciphertext to be decrypted. + * @param ciphertext_len Length on the ciphertext in bytes. Must be a multiple of 16. + * @param key Key to be used for encryption. + * @param plaintext Pointer to a block of memory which >= in size to the ciphertext block. The result will be written here. + */ +void OQS_AES128_ECB_dec(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext); + +/** + * Same as OQS_AES128_ECB_enc() except a schedule generated by + * OQS_AES128_ECB_load_schedule() is passed rather then a key. This is faster + * if the same schedule is used for multiple encryptions since it does + * not have to be regenerated from the key. + */ +void OQS_AES128_ECB_enc_sch(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext); + +/** + * Same as OQS_AES128_ECB_dec() except a schedule generated by + * OQS_AES128_ECB_load_schedule() is passed rather then a key. This is faster + * if the same schedule is used for multiple encryptions since it does + * not have to be regenerated from the key. + */ +void OQS_AES128_ECB_dec_sch(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext); + +/** + * Function to fill a key schedule given an initial key for use in ECB mode. + * + * @param key Initial Key. + * @param schedule Abstract data structure for a key schedule. + * @param for_encryption 1 if key schedule is for encryption, 0 if for decryption. + */ +void OQS_AES256_ECB_load_schedule(const uint8_t *key, void **schedule, int for_encryption); + +/** + * Function to fill a key schedule given an initial key for use in CTR mode. + * + * @param key Initial Key. + * @param schedule Abstract data structure for a key schedule. + */ +void OQS_AES256_CTR_load_schedule(const uint8_t *key, void **schedule); + +/** + * Function to free a key schedule. + * + * @param schedule Schedule generated with OQS_AES256_ECB_load_schedule + * or OQS_AES256_CTR_load_schedule. + */ +void OQS_AES256_free_schedule(void *schedule); + +/** + * Function to encrypt blocks of plaintext using ECB mode. + * A schedule based on the key is generated and used internally. + * + * @param plaintext Plaintext to be encrypted. + * @param plaintext_len Length on the plaintext in bytes. Must be a multiple of 16. + * @param key Key to be used for encryption. + * @param ciphertext Pointer to a block of memory which >= in size to the plaintext block. The result will be written here. + */ +void OQS_AES256_ECB_enc(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext); + +/** + * Function to decrypt blocks of plaintext using ECB mode. + * A schedule based on the key is generated and used internally. + * + * @param ciphertext Ciphertext to be decrypted. + * @param ciphertext_len Length on the ciphertext in bytes. Must be a multiple of 16. + * @param key Key to be used for encryption. + * @param plaintext Pointer to a block of memory which >= in size to the ciphertext block. The result will be written here. + */ +void OQS_AES256_ECB_dec(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext); + +/** + * Same as OQS_AES256_ECB_enc() except a schedule generated by + * OQS_AES256_ECB_load_schedule() is passed rather then a key. This is faster + * if the same schedule is used for multiple encryptions since it does + * not have to be regenerated from the key. + */ +void OQS_AES256_ECB_enc_sch(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext); + +/** + * Same as OQS_AES256_ECB_dec() except a schedule generated by + * OQS_AES256_ECB_load_schedule() is passed rather then a key. This is faster + * if the same schedule is used for multiple encryptions since it does + * not have to be regenerated from the key. + */ +void OQS_AES256_ECB_dec_sch(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext); + +/** + * AES counter mode keystream generator. A scheduled generated by + * OQS_AES256_CTR_load_schedule() is passed rather then a key. + * + * Handles a 12- or 16-byte IV. If a 12-byte IV is given, then 4 counter + * bytes are initialized to all zeros. + * + * @param iv 12- or 16-byte initialization vector. + * @param iv_len Lengh of IV in bytes. + * @param schedule Abstract data structure for a key schedule. + * @param out Pointer to a block of memory which is big enough to contain out_len bytes; the result will be written here. + * @param out_len Length of output bytes to generate. + */ +void OQS_AES256_CTR_sch(const uint8_t *iv, size_t iv_len, const void *schedule, uint8_t *out, size_t out_len); + +#if defined(__cplusplus) +} // extern "C" +#endif + +#endif // OQS_AES_H diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/aes256ctr.c b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/aes256ctr.c index 0feaed6bcf..5f6fec27ab 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/aes256ctr.c +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/aes256ctr.c @@ -1,99 +1,99 @@ -#include "aes256ctr.h" -#include "aes.h" -#include <stddef.h> -#include <stdint.h> -#include <string.h> - -static inline void br_enc32be(unsigned char *dst, uint32_t x) { - dst[3] = (unsigned char)x; - dst[2] = (unsigned char)(x >> 8); - dst[1] = (unsigned char)(x >> 16); - dst[0] = (unsigned char)(x >> 24); -} - -static void aes256_ctr_xof(unsigned char *out, size_t outlen, const unsigned char *iv, uint32_t ctr, const aes256ctx *ctx) { - uint8_t ivw[16]; - uint8_t buf[AES_BLOCKBYTES]; - - memcpy(ivw, iv, AESCTR_NONCEBYTES); - br_enc32be(ivw + AESCTR_NONCEBYTES, ctr); - - while (outlen > AES_BLOCKBYTES) { - aes256_ecb(out, ivw, 1, ctx); - br_enc32be(ivw + AESCTR_NONCEBYTES, ++ctr); - out += AES_BLOCKBYTES; - outlen -= AES_BLOCKBYTES; - } - if (outlen > 0) { - aes256_ecb(buf, ivw, 1, ctx); - for (size_t i = 0; i < outlen; i++) { - out[i] = buf[i]; - } - } -} - -/************************************************* -* Name: aes256_prf -* -* Description: AES256 stream generation in CTR mode using 32-bit counter, -* nonce is zero-padded to 12 bytes, counter starts at zero -* -* Arguments: - uint8_t *output: pointer to output -* - size_t outlen: length of requested output in bytes -* - const uint8_t *key: pointer to 32-byte key -* - uint8_t nonce: 1-byte nonce (will be zero-padded to 12 bytes) -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_aes256_prf(uint8_t *output, size_t outlen, const uint8_t *key, uint8_t nonce) { - uint8_t iv[12]; - for (int i = 1; i < 12; i++) { - iv[i] = 0; - } - iv[0] = nonce; - - aes256ctx ctx; - aes256_ctr_keyexp(&ctx, key); - aes256_ctr(output, outlen, iv, &ctx); - aes256_ctx_release(&ctx); -} - -/************************************************* -* Name: aes256xof_absorb -* -* Description: AES256 CTR used as a replacement for a XOF; this function -* "absorbs" a 32-byte key and two additional bytes that are zero-padded -* to a 12-byte nonce -* -* Arguments: - aes256xof_ctx *s: pointer to state to "absorb" key and IV into -* - const uint8_t *key: pointer to 32-byte key -* - uint8_t x: first additional byte to "absorb" -* - uint8_t y: second additional byte to "absorb" -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_aes256xof_absorb(aes256xof_ctx *s, const uint8_t *key, uint8_t x, uint8_t y) { - aes256_ecb_keyexp(&s->sk_exp, key); - for (int i = 2; i < 12; i++) { - s->iv[i] = 0; - } - s->iv[0] = x; - s->iv[1] = y; - s->ctr = 0; -} - -/************************************************* -* Name: aes256xof_squeezeblocks -* -* Description: AES256 CTR used as a replacement for a XOF; this function -* generates 4 blocks out AES256-CTR output -* -* Arguments: - uint8_t *out: pointer to output -* - size_t nblocks: number of reqested 64-byte output blocks -* - aes256xof_ctx *s: AES "state", i.e. expanded key and IV -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_aes256xof_squeezeblocks(uint8_t *out, size_t nblocks, aes256xof_ctx *s) { - aes256_ctr_xof(out, nblocks * 64, s->iv, s->ctr, &s->sk_exp); - s->ctr += (uint32_t) (4 * nblocks); -} - -/** Free the AES ctx **/ -void PQCLEAN_KYBER51290S_CLEAN_aes256xof_ctx_release(aes256xof_ctx *s) { - aes256_ctx_release(&s->sk_exp); -} +#include "aes256ctr.h" +#include "aes.h" +#include <stddef.h> +#include <stdint.h> +#include <string.h> + +static inline void br_enc32be(unsigned char *dst, uint32_t x) { + dst[3] = (unsigned char)x; + dst[2] = (unsigned char)(x >> 8); + dst[1] = (unsigned char)(x >> 16); + dst[0] = (unsigned char)(x >> 24); +} + +static void aes256_ctr_xof(unsigned char *out, size_t outlen, const unsigned char *iv, uint32_t ctr, const aes256ctx *ctx) { + uint8_t ivw[16]; + uint8_t buf[AES_BLOCKBYTES]; + + memcpy(ivw, iv, AESCTR_NONCEBYTES); + br_enc32be(ivw + AESCTR_NONCEBYTES, ctr); + + while (outlen > AES_BLOCKBYTES) { + aes256_ecb(out, ivw, 1, ctx); + br_enc32be(ivw + AESCTR_NONCEBYTES, ++ctr); + out += AES_BLOCKBYTES; + outlen -= AES_BLOCKBYTES; + } + if (outlen > 0) { + aes256_ecb(buf, ivw, 1, ctx); + for (size_t i = 0; i < outlen; i++) { + out[i] = buf[i]; + } + } +} + +/************************************************* +* Name: aes256_prf +* +* Description: AES256 stream generation in CTR mode using 32-bit counter, +* nonce is zero-padded to 12 bytes, counter starts at zero +* +* Arguments: - uint8_t *output: pointer to output +* - size_t outlen: length of requested output in bytes +* - const uint8_t *key: pointer to 32-byte key +* - uint8_t nonce: 1-byte nonce (will be zero-padded to 12 bytes) +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_aes256_prf(uint8_t *output, size_t outlen, const uint8_t *key, uint8_t nonce) { + uint8_t iv[12]; + for (int i = 1; i < 12; i++) { + iv[i] = 0; + } + iv[0] = nonce; + + aes256ctx ctx; + aes256_ctr_keyexp(&ctx, key); + aes256_ctr(output, outlen, iv, &ctx); + aes256_ctx_release(&ctx); +} + +/************************************************* +* Name: aes256xof_absorb +* +* Description: AES256 CTR used as a replacement for a XOF; this function +* "absorbs" a 32-byte key and two additional bytes that are zero-padded +* to a 12-byte nonce +* +* Arguments: - aes256xof_ctx *s: pointer to state to "absorb" key and IV into +* - const uint8_t *key: pointer to 32-byte key +* - uint8_t x: first additional byte to "absorb" +* - uint8_t y: second additional byte to "absorb" +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_aes256xof_absorb(aes256xof_ctx *s, const uint8_t *key, uint8_t x, uint8_t y) { + aes256_ecb_keyexp(&s->sk_exp, key); + for (int i = 2; i < 12; i++) { + s->iv[i] = 0; + } + s->iv[0] = x; + s->iv[1] = y; + s->ctr = 0; +} + +/************************************************* +* Name: aes256xof_squeezeblocks +* +* Description: AES256 CTR used as a replacement for a XOF; this function +* generates 4 blocks out AES256-CTR output +* +* Arguments: - uint8_t *out: pointer to output +* - size_t nblocks: number of reqested 64-byte output blocks +* - aes256xof_ctx *s: AES "state", i.e. expanded key and IV +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_aes256xof_squeezeblocks(uint8_t *out, size_t nblocks, aes256xof_ctx *s) { + aes256_ctr_xof(out, nblocks * 64, s->iv, s->ctr, &s->sk_exp); + s->ctr += (uint32_t) (4 * nblocks); +} + +/** Free the AES ctx **/ +void PQCLEAN_KYBER51290S_CLEAN_aes256xof_ctx_release(aes256xof_ctx *s) { + aes256_ctx_release(&s->sk_exp); +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/aes256ctr.h b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/aes256ctr.h index 3efa256731..fd2bc6d7a9 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/aes256ctr.h +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/aes256ctr.h @@ -1,20 +1,20 @@ -#ifndef AES256CTR_H -#define AES256CTR_H - -#include "aes.h" - -#include <stddef.h> -#include <stdint.h> - -typedef struct { - aes256ctx sk_exp; - uint8_t iv[12]; - uint32_t ctr; -} aes256xof_ctx; - -void PQCLEAN_KYBER51290S_CLEAN_aes256_prf(uint8_t *output, size_t outlen, const uint8_t *key, uint8_t nonce); -void PQCLEAN_KYBER51290S_CLEAN_aes256xof_absorb(aes256xof_ctx *s, const uint8_t *key, uint8_t x, uint8_t y); -void PQCLEAN_KYBER51290S_CLEAN_aes256xof_squeezeblocks(uint8_t *out, size_t nblocks, aes256xof_ctx *s); -void PQCLEAN_KYBER51290S_CLEAN_aes256xof_ctx_release(aes256xof_ctx *s); - -#endif +#ifndef AES256CTR_H +#define AES256CTR_H + +#include "aes.h" + +#include <stddef.h> +#include <stdint.h> + +typedef struct { + aes256ctx sk_exp; + uint8_t iv[12]; + uint32_t ctr; +} aes256xof_ctx; + +void PQCLEAN_KYBER51290S_CLEAN_aes256_prf(uint8_t *output, size_t outlen, const uint8_t *key, uint8_t nonce); +void PQCLEAN_KYBER51290S_CLEAN_aes256xof_absorb(aes256xof_ctx *s, const uint8_t *key, uint8_t x, uint8_t y); +void PQCLEAN_KYBER51290S_CLEAN_aes256xof_squeezeblocks(uint8_t *out, size_t nblocks, aes256xof_ctx *s); +void PQCLEAN_KYBER51290S_CLEAN_aes256xof_ctx_release(aes256xof_ctx *s); + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/aes_c.c b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/aes_c.c index 009054155d..7c9450d393 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/aes_c.c +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/aes_c.c @@ -1,576 +1,576 @@ -// Simple, thoroughly commented implementation of 128-bit AES / Rijndael using C -// Chris Hulbert - chris.hulbert@gmail.com - http://splinter.com.au/blog -// References: -// http://en.wikipedia.org/wiki/Advanced_Encryption_Standard -// http://en.wikipedia.org/wiki/Rijndael_key_schedule -// http://en.wikipedia.org/wiki/Rijndael_mix_columns -// http://en.wikipedia.org/wiki/Rijndael_S-box -// This code is public domain, or any OSI-approved license, your choice. No warranty. -// SPDX-License-Identifier: MIT - -#include <assert.h> -#include <stdio.h> -#include <string.h> - -#include "aes.h" - -typedef unsigned char byte; - -// Here are all the lookup tables for the row shifts, rcon, s-boxes, and galois field multiplications -static const byte shift_rows_table[] = {0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11}; -static const byte shift_rows_table_inv[] = {0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3}; -static const byte lookup_rcon[] = { - 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a -}; -static const byte lookup_sbox[] = { - 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, - 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, - 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, - 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, - 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, - 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, - 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, - 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, - 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, - 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, - 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, - 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, - 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, - 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, - 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, - 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 -}; -static const byte lookup_sbox_inv[] = { - 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, - 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, - 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, - 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, - 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, - 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, - 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, - 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, - 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, - 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, - 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, - 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, - 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, - 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, - 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, - 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d -}; -static const byte lookup_g2[] = { - 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, - 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, - 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, - 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, - 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, - 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, - 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde, - 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, - 0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05, - 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25, - 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45, - 0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65, - 0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85, - 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5, - 0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5, - 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5 -}; -static const byte lookup_g3[] = { - 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11, - 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, - 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, - 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41, - 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, - 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1, - 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1, - 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, - 0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a, - 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba, - 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea, - 0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda, - 0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a, - 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a, - 0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a, - 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a -}; -static const byte lookup_g9[] = { - 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, - 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, - 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, - 0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc, - 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01, - 0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91, - 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a, - 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa, - 0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b, - 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b, - 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0, - 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, - 0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed, - 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d, - 0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6, - 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46 -}; -static const byte lookup_g11[] = { - 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69, - 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, - 0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12, - 0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2, - 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f, - 0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f, - 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4, - 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54, - 0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e, - 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e, - 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5, - 0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55, - 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68, - 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, - 0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13, - 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3 -}; -static const byte lookup_g13[] = { - 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b, - 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, - 0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0, - 0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20, - 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, - 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, - 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d, - 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d, - 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91, - 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41, - 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a, - 0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa, - 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc, - 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, - 0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47, - 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97 -}; -static const byte lookup_g14[] = { - 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a, - 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, - 0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81, - 0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61, - 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, - 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17, - 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c, - 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc, - 0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b, - 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb, - 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0, - 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, - 0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6, - 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56, - 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d, - 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d -}; - -// Xor's all elements in a n byte array a by b -static void xor (byte *a, const byte *b, int n) { - int i; - for (i = 0; i < n; i++) { - a[i] ^= b[i]; - } -} - -// Xor the current cipher state by a specific round key -static void xor_round_key(byte *state, const byte *keys, int round) { - xor(state, keys + round * 16, 16); -} - -// Apply the rijndael s-box to all elements in an array -// http://en.wikipedia.org/wiki/Rijndael_S-box -static void sub_bytes(byte *a, int n) { - int i; - for (i = 0; i < n; i++) { - a[i] = lookup_sbox[a[i]]; - } -} -static void sub_bytes_inv(byte *a, int n) { - int i; - for (i = 0; i < n; i++) { - a[i] = lookup_sbox_inv[a[i]]; - } -} - -// Rotate the first four bytes of the input eight bits to the left -static inline void rot_word(byte *a) { - byte temp = a[0]; - a[0] = a[1]; - a[1] = a[2]; - a[2] = a[3]; - a[3] = temp; -} - -// Perform the core key schedule transform on 4 bytes, as part of the key expansion process -// http://en.wikipedia.org/wiki/Rijndael_key_schedule#Key_schedule_core -static void key_schedule_core(byte *a, int i) { - byte temp = a[0]; // Rotate the output eight bits to the left - a[0] = a[1]; - a[1] = a[2]; - a[2] = a[3]; - a[3] = temp; - sub_bytes(a, 4); // Apply Rijndael's S-box on all four individual bytes in the output word - a[0] ^= lookup_rcon[i]; // On just the first (leftmost) byte of the output word, perform the rcon operation with i - // as the input, and exclusive or the rcon output with the first byte of the output word -} - -// Expand the 16-byte key to 11 round keys (176 bytes) -// http://en.wikipedia.org/wiki/Rijndael_key_schedule#The_key_schedule -void OQS_AES128_ECB_load_schedule(const uint8_t *key, void **_schedule, int for_encryption) { - *_schedule = malloc(16 * 11); - OQS_EXIT_IF_NULLPTR(*_schedule); - uint8_t *schedule = (uint8_t *) *_schedule; - int bytes = 16; // The count of how many bytes we've created so far - int i = 1; // The rcon iteration value i is set to 1 - int j; // For repeating the second stage 3 times - byte t[4]; // Temporary working area known as 't' in the Wiki article - memcpy(schedule, key, 16); // The first 16 bytes of the expanded key are simply the encryption key - - while (bytes < 176) { // Until we have 176 bytes of expanded key, we do the following: - memcpy(t, schedule + bytes - 4, 4); // We assign the value of the previous four bytes in the expanded key to t - key_schedule_core(t, i); // We perform the key schedule core on t, with i as the rcon iteration value - i++; // We increment i by 1 - xor(t, schedule + bytes - 16, 4); // We exclusive-or t with the four-byte block 16 bytes before the new expanded key. - memcpy(schedule + bytes, t, 4); // This becomes the next 4 bytes in the expanded key - bytes += 4; // Keep track of how many expanded key bytes we've added - - // We then do the following three times to create the next twelve bytes - for (j = 0; j < 3; j++) { - memcpy(t, schedule + bytes - 4, 4); // We assign the value of the previous 4 bytes in the expanded key to t - xor(t, schedule + bytes - 16, 4); // We exclusive-or t with the four-byte block n bytes before - memcpy(schedule + bytes, t, 4); // This becomes the next 4 bytes in the expanded key - bytes += 4; // Keep track of how many expanded key bytes we've added - } - } -} - -void OQS_AES128_free_schedule(void *schedule) { - if (schedule != NULL) { - OQS_MEM_secure_free(schedule, 176); - } -} - -// Expand the 16-byte key to 15 round keys (240 bytes) -// http://en.wikipedia.org/wiki/Rijndael_key_schedule#The_key_schedule -void OQS_AES256_ECB_load_schedule(const uint8_t *key, void **_schedule, int for_encryption) { - *_schedule = malloc(16 * 15); - OQS_EXIT_IF_NULLPTR(*_schedule); - uint8_t *schedule = (uint8_t *) *_schedule; - int i = 0; // The count of how many iterations we've done - uint8_t t[4]; // Temporary working area - - // The first 32 bytes of the expanded key are simply the encryption key - memcpy(schedule, key, 8 * 4); - - // The remaining 240-32 bytes of the expanded key are computed in one of three ways: - for (i = 8; i < 4 * 15; i++) { - if (i % 8 == 0) { - memcpy(t, schedule + 4 * (i - 1), 4); // We assign the value of the previous 4 bytes in the expanded key to t - sub_bytes(t, 4); // We apply byte-wise substitution to t - rot_word(t); // We rotate t one byte left - t[0] ^= lookup_rcon[i / 8]; // We xor in the round constant - xor(t, schedule + 4 * (i - 8), 4); // We xor in the four-byte block n bytes before - memcpy(schedule + 4 * i, t, 4); // This becomes the next 4 bytes in the expanded key - } else if (i % 8 == 4) { - memcpy(t, schedule + 4 * (i - 1), 4); // We assign the value of the previous 4 bytes in the expanded key to t - sub_bytes(t, 4); // We apply byte-wise substitution to t - xor(t, schedule + 4 * (i - 8), 4); // We xor in the four-byte block n bytes before - memcpy(schedule + 4 * i, t, 4); // This becomes the next 4 bytes in the expanded key - } else { - memcpy(t, schedule + 4 * (i - 1), 4); // We assign the value of the previous 4 bytes in the expanded key to t - xor(t, schedule + 4 * (i - 8), 4); // We xor in the four-byte block n bytes before - memcpy(schedule + 4 * i, t, 4); // This becomes the next 4 bytes in the expanded key - } - } -} - -void OQS_AES256_CTR_load_schedule(const uint8_t *key, void **_schedule) { - OQS_AES256_ECB_load_schedule(key, _schedule, 1); -} - -/** copied from common.c **/ - -void OQS_MEM_cleanse(void *ptr, size_t len) { -#if defined(_WIN32) - SecureZeroMemory(ptr, len); -#elif defined(HAVE_MEMSET_S) - if (0U < len && memset_s(ptr, (rsize_t)len, 0, (rsize_t)len) != 0) { - abort(); - } -#else - typedef void *(*memset_t)(void *, int, size_t); - static volatile memset_t memset_func = memset; - memset_func(ptr, 0, len); -#endif -} - -void OQS_MEM_secure_free(void *ptr, size_t len) { - if (ptr != NULL) { - OQS_MEM_cleanse(ptr, len); - free(ptr); // IGNORE free-check - } -} - -void OQS_AES256_free_schedule(void *schedule) { - if (schedule != NULL) { - OQS_MEM_secure_free(schedule, 16 * 15); - } -} - -// Apply the shift rows step on the 16 byte cipher state -// http://en.wikipedia.org/wiki/Advanced_Encryption_Standard#The_ShiftRows_step -static void shift_rows(byte *state) { - int i; - byte temp[16]; - memcpy(temp, state, 16); - for (i = 0; i < 16; i++) { - state[i] = temp[shift_rows_table[i]]; - } -} -static void shift_rows_inv(byte *state) { - int i; - byte temp[16]; - memcpy(temp, state, 16); - for (i = 0; i < 16; i++) { - state[i] = temp[shift_rows_table_inv[i]]; - } -} - -// Perform the mix columns matrix on one column of 4 bytes -// http://en.wikipedia.org/wiki/Rijndael_mix_columns -static void mix_col(byte *state) { - byte a0 = state[0]; - byte a1 = state[1]; - byte a2 = state[2]; - byte a3 = state[3]; - state[0] = lookup_g2[a0] ^ lookup_g3[a1] ^ a2 ^ a3; - state[1] = lookup_g2[a1] ^ lookup_g3[a2] ^ a3 ^ a0; - state[2] = lookup_g2[a2] ^ lookup_g3[a3] ^ a0 ^ a1; - state[3] = lookup_g2[a3] ^ lookup_g3[a0] ^ a1 ^ a2; -} - -// Perform the mix columns matrix on each column of the 16 bytes -static void mix_cols(byte *state) { - mix_col(state); - mix_col(state + 4); - mix_col(state + 8); - mix_col(state + 12); -} - -// Perform the inverse mix columns matrix on one column of 4 bytes -// http://en.wikipedia.org/wiki/Rijndael_mix_columns -static void mix_col_inv(byte *state) { - byte a0 = state[0]; - byte a1 = state[1]; - byte a2 = state[2]; - byte a3 = state[3]; - state[0] = lookup_g14[a0] ^ lookup_g9[a3] ^ lookup_g13[a2] ^ lookup_g11[a1]; - state[1] = lookup_g14[a1] ^ lookup_g9[a0] ^ lookup_g13[a3] ^ lookup_g11[a2]; - state[2] = lookup_g14[a2] ^ lookup_g9[a1] ^ lookup_g13[a0] ^ lookup_g11[a3]; - state[3] = lookup_g14[a3] ^ lookup_g9[a2] ^ lookup_g13[a1] ^ lookup_g11[a0]; -} - -// Perform the inverse mix columns matrix on each column of the 16 bytes -static void mix_cols_inv(byte *state) { - mix_col_inv(state); - mix_col_inv(state + 4); - mix_col_inv(state + 8); - mix_col_inv(state + 12); -} - -void oqs_aes128_enc_sch_block_c(const uint8_t *plaintext, const void *_schedule, uint8_t *ciphertext) { - const uint8_t *schedule = (const uint8_t *) _schedule; - int i; // To count the rounds - - // First Round - memcpy(ciphertext, plaintext, 16); - xor_round_key(ciphertext, schedule, 0); - - // Middle rounds - for (i = 0; i < 9; i++) { - sub_bytes(ciphertext, 16); - shift_rows(ciphertext); - mix_cols(ciphertext); - xor_round_key(ciphertext, schedule, i + 1); - } - - // Final Round - sub_bytes(ciphertext, 16); - shift_rows(ciphertext); - xor_round_key(ciphertext, schedule, 10); -} - -void oqs_aes128_dec_sch_block_c(const uint8_t *ciphertext, const void *_schedule, uint8_t *plaintext) { - const uint8_t *schedule = (const uint8_t *) _schedule; - int i; // To count the rounds - - // Reverse the final Round - memcpy(plaintext, ciphertext, 16); - xor_round_key(plaintext, schedule, 10); - shift_rows_inv(plaintext); - sub_bytes_inv(plaintext, 16); - - // Reverse the middle rounds - for (i = 0; i < 9; i++) { - xor_round_key(plaintext, schedule, 9 - i); - mix_cols_inv(plaintext); - shift_rows_inv(plaintext); - sub_bytes_inv(plaintext, 16); - } - - // Reverse the first Round - xor_round_key(plaintext, schedule, 0); -} - -void oqs_aes256_enc_sch_block_c(const uint8_t *plaintext, const void *_schedule, uint8_t *ciphertext) { - const uint8_t *schedule = (const uint8_t *) _schedule; - int i; // To count the rounds - - // First Round - memcpy(ciphertext, plaintext, 16); - xor_round_key(ciphertext, schedule, 0); - - // Middle rounds - for (i = 0; i < 13; i++) { - sub_bytes(ciphertext, 16); - shift_rows(ciphertext); - mix_cols(ciphertext); - xor_round_key(ciphertext, schedule, i + 1); - } - - // Final Round - sub_bytes(ciphertext, 16); - shift_rows(ciphertext); - xor_round_key(ciphertext, schedule, 14); -} - -void oqs_aes256_dec_sch_block_c(const uint8_t *ciphertext, const void *_schedule, uint8_t *plaintext) { - const uint8_t *schedule = (const uint8_t *) _schedule; - int i; // To count the rounds - - // Reverse the final Round - memcpy(plaintext, ciphertext, 16); - xor_round_key(plaintext, schedule, 14); - shift_rows_inv(plaintext); - sub_bytes_inv(plaintext, 16); - - // Reverse the middle rounds - for (i = 0; i < 13; i++) { - xor_round_key(plaintext, schedule, 13 - i); - mix_cols_inv(plaintext); - shift_rows_inv(plaintext); - sub_bytes_inv(plaintext, 16); - } - - // Reverse the first Round - xor_round_key(plaintext, schedule, 0); -} - -void OQS_AES128_ECB_enc(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext) { - void *schedule = NULL; - OQS_AES128_ECB_load_schedule(key, &schedule, 1); - OQS_AES128_ECB_enc_sch(plaintext, plaintext_len, schedule, ciphertext); - OQS_AES128_free_schedule(schedule); -} - -void OQS_AES128_ECB_enc_sch(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext) { - assert(plaintext_len % 16 == 0); - for (size_t block = 0; block < plaintext_len / 16; block++) { - oqs_aes128_enc_sch_block_c(plaintext + (16 * block), schedule, ciphertext + (16 * block)); - } -} - -void OQS_AES128_ECB_dec(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext) { - void *schedule = NULL; - OQS_AES128_ECB_load_schedule(key, &schedule, 0); - OQS_AES128_ECB_dec_sch(ciphertext, ciphertext_len, schedule, plaintext); - OQS_AES128_free_schedule(schedule); -} - -void OQS_AES128_ECB_dec_sch(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext) { - assert(ciphertext_len % 16 == 0); - for (size_t block = 0; block < ciphertext_len / 16; block++) { - oqs_aes128_dec_sch_block_c(ciphertext + (16 * block), schedule, plaintext + (16 * block)); - } -} - -void OQS_AES256_ECB_enc(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext) { - void *schedule = NULL; - OQS_AES256_ECB_load_schedule(key, &schedule, 1); - OQS_AES256_ECB_enc_sch(plaintext, plaintext_len, schedule, ciphertext); - OQS_AES256_free_schedule(schedule); -} - -void OQS_AES256_ECB_enc_sch(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext) { - assert(plaintext_len % 16 == 0); - for (size_t block = 0; block < plaintext_len / 16; block++) { - oqs_aes256_enc_sch_block_c(plaintext + (16 * block), schedule, ciphertext + (16 * block)); - } -} - -void OQS_AES256_ECB_dec(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext) { - void *schedule = NULL; - OQS_AES256_ECB_load_schedule(key, &schedule, 0); - OQS_AES256_ECB_dec_sch(ciphertext, ciphertext_len, schedule, plaintext); - OQS_AES256_free_schedule(schedule); -} - -void OQS_AES256_ECB_dec_sch(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext) { - assert(ciphertext_len % 16 == 0); - for (size_t block = 0; block < ciphertext_len / 16; block++) { - oqs_aes256_dec_sch_block_c(ciphertext + (16 * block), schedule, plaintext + (16 * block)); - } -} - -static inline uint32_t UINT32_TO_BE(const uint32_t x) { - union { - uint32_t val; - uint8_t bytes[4]; - } y; - - /* As part of the union, these bytes get read when y.val is read */ - y.bytes[0] = (x >> 24) & 0xFF; - y.bytes[1] = (x >> 16) & 0xFF; - y.bytes[2] = (x >> 8) & 0xFF; - /* cppcheck-suppress unreadVariable */ - y.bytes[3] = x & 0xFF; - - return y.val; -} -#define BE_TO_UINT32(n) (uint32_t)((((uint8_t *) &(n))[0] << 24) | (((uint8_t *) &(n))[1] << 16) | (((uint8_t *) &(n))[2] << 8) | (((uint8_t *) &(n))[3] << 0)) - -void OQS_AES256_CTR_sch(const uint8_t *iv, size_t iv_len, const void *schedule, uint8_t *out, size_t out_len) { - uint8_t block[16]; - uint32_t ctr; - uint32_t ctr_be; - memcpy(block, iv, 12); - if (iv_len == 12) { - ctr = 0; - } else if (iv_len == 16) { - memcpy(&ctr_be, &iv[12], 4); - - /* ctr_be gets cast to a uint8_t* before being accessed; the non-zero indices are valid */ - /* cppcheck-suppress objectIndex */ - ctr = BE_TO_UINT32(ctr_be); - } else { - exit(EXIT_FAILURE); - } - while (out_len >= 16) { - ctr_be = UINT32_TO_BE(ctr); - memcpy(&block[12], (uint8_t *) &ctr_be, 4); - oqs_aes256_enc_sch_block_c(block, schedule, out); - out += 16; - out_len -= 16; - ctr++; - } - if (out_len > 0) { - uint8_t tmp[16]; - ctr_be = UINT32_TO_BE(ctr); - memcpy(&block[12], (uint8_t *) &ctr_be, 4); - oqs_aes256_enc_sch_block_c(block, schedule, tmp); - memcpy(out, tmp, out_len); - } -} +// Simple, thoroughly commented implementation of 128-bit AES / Rijndael using C +// Chris Hulbert - chris.hulbert@gmail.com - http://splinter.com.au/blog +// References: +// http://en.wikipedia.org/wiki/Advanced_Encryption_Standard +// http://en.wikipedia.org/wiki/Rijndael_key_schedule +// http://en.wikipedia.org/wiki/Rijndael_mix_columns +// http://en.wikipedia.org/wiki/Rijndael_S-box +// This code is public domain, or any OSI-approved license, your choice. No warranty. +// SPDX-License-Identifier: MIT + +#include <assert.h> +#include <stdio.h> +#include <string.h> + +#include "aes.h" + +typedef unsigned char byte; + +// Here are all the lookup tables for the row shifts, rcon, s-boxes, and galois field multiplications +static const byte shift_rows_table[] = {0, 5, 10, 15, 4, 9, 14, 3, 8, 13, 2, 7, 12, 1, 6, 11}; +static const byte shift_rows_table_inv[] = {0, 13, 10, 7, 4, 1, 14, 11, 8, 5, 2, 15, 12, 9, 6, 3}; +static const byte lookup_rcon[] = { + 0x8d, 0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36, 0x6c, 0xd8, 0xab, 0x4d, 0x9a +}; +static const byte lookup_sbox[] = { + 0x63, 0x7c, 0x77, 0x7b, 0xf2, 0x6b, 0x6f, 0xc5, 0x30, 0x01, 0x67, 0x2b, 0xfe, 0xd7, 0xab, 0x76, + 0xca, 0x82, 0xc9, 0x7d, 0xfa, 0x59, 0x47, 0xf0, 0xad, 0xd4, 0xa2, 0xaf, 0x9c, 0xa4, 0x72, 0xc0, + 0xb7, 0xfd, 0x93, 0x26, 0x36, 0x3f, 0xf7, 0xcc, 0x34, 0xa5, 0xe5, 0xf1, 0x71, 0xd8, 0x31, 0x15, + 0x04, 0xc7, 0x23, 0xc3, 0x18, 0x96, 0x05, 0x9a, 0x07, 0x12, 0x80, 0xe2, 0xeb, 0x27, 0xb2, 0x75, + 0x09, 0x83, 0x2c, 0x1a, 0x1b, 0x6e, 0x5a, 0xa0, 0x52, 0x3b, 0xd6, 0xb3, 0x29, 0xe3, 0x2f, 0x84, + 0x53, 0xd1, 0x00, 0xed, 0x20, 0xfc, 0xb1, 0x5b, 0x6a, 0xcb, 0xbe, 0x39, 0x4a, 0x4c, 0x58, 0xcf, + 0xd0, 0xef, 0xaa, 0xfb, 0x43, 0x4d, 0x33, 0x85, 0x45, 0xf9, 0x02, 0x7f, 0x50, 0x3c, 0x9f, 0xa8, + 0x51, 0xa3, 0x40, 0x8f, 0x92, 0x9d, 0x38, 0xf5, 0xbc, 0xb6, 0xda, 0x21, 0x10, 0xff, 0xf3, 0xd2, + 0xcd, 0x0c, 0x13, 0xec, 0x5f, 0x97, 0x44, 0x17, 0xc4, 0xa7, 0x7e, 0x3d, 0x64, 0x5d, 0x19, 0x73, + 0x60, 0x81, 0x4f, 0xdc, 0x22, 0x2a, 0x90, 0x88, 0x46, 0xee, 0xb8, 0x14, 0xde, 0x5e, 0x0b, 0xdb, + 0xe0, 0x32, 0x3a, 0x0a, 0x49, 0x06, 0x24, 0x5c, 0xc2, 0xd3, 0xac, 0x62, 0x91, 0x95, 0xe4, 0x79, + 0xe7, 0xc8, 0x37, 0x6d, 0x8d, 0xd5, 0x4e, 0xa9, 0x6c, 0x56, 0xf4, 0xea, 0x65, 0x7a, 0xae, 0x08, + 0xba, 0x78, 0x25, 0x2e, 0x1c, 0xa6, 0xb4, 0xc6, 0xe8, 0xdd, 0x74, 0x1f, 0x4b, 0xbd, 0x8b, 0x8a, + 0x70, 0x3e, 0xb5, 0x66, 0x48, 0x03, 0xf6, 0x0e, 0x61, 0x35, 0x57, 0xb9, 0x86, 0xc1, 0x1d, 0x9e, + 0xe1, 0xf8, 0x98, 0x11, 0x69, 0xd9, 0x8e, 0x94, 0x9b, 0x1e, 0x87, 0xe9, 0xce, 0x55, 0x28, 0xdf, + 0x8c, 0xa1, 0x89, 0x0d, 0xbf, 0xe6, 0x42, 0x68, 0x41, 0x99, 0x2d, 0x0f, 0xb0, 0x54, 0xbb, 0x16 +}; +static const byte lookup_sbox_inv[] = { + 0x52, 0x09, 0x6a, 0xd5, 0x30, 0x36, 0xa5, 0x38, 0xbf, 0x40, 0xa3, 0x9e, 0x81, 0xf3, 0xd7, 0xfb, + 0x7c, 0xe3, 0x39, 0x82, 0x9b, 0x2f, 0xff, 0x87, 0x34, 0x8e, 0x43, 0x44, 0xc4, 0xde, 0xe9, 0xcb, + 0x54, 0x7b, 0x94, 0x32, 0xa6, 0xc2, 0x23, 0x3d, 0xee, 0x4c, 0x95, 0x0b, 0x42, 0xfa, 0xc3, 0x4e, + 0x08, 0x2e, 0xa1, 0x66, 0x28, 0xd9, 0x24, 0xb2, 0x76, 0x5b, 0xa2, 0x49, 0x6d, 0x8b, 0xd1, 0x25, + 0x72, 0xf8, 0xf6, 0x64, 0x86, 0x68, 0x98, 0x16, 0xd4, 0xa4, 0x5c, 0xcc, 0x5d, 0x65, 0xb6, 0x92, + 0x6c, 0x70, 0x48, 0x50, 0xfd, 0xed, 0xb9, 0xda, 0x5e, 0x15, 0x46, 0x57, 0xa7, 0x8d, 0x9d, 0x84, + 0x90, 0xd8, 0xab, 0x00, 0x8c, 0xbc, 0xd3, 0x0a, 0xf7, 0xe4, 0x58, 0x05, 0xb8, 0xb3, 0x45, 0x06, + 0xd0, 0x2c, 0x1e, 0x8f, 0xca, 0x3f, 0x0f, 0x02, 0xc1, 0xaf, 0xbd, 0x03, 0x01, 0x13, 0x8a, 0x6b, + 0x3a, 0x91, 0x11, 0x41, 0x4f, 0x67, 0xdc, 0xea, 0x97, 0xf2, 0xcf, 0xce, 0xf0, 0xb4, 0xe6, 0x73, + 0x96, 0xac, 0x74, 0x22, 0xe7, 0xad, 0x35, 0x85, 0xe2, 0xf9, 0x37, 0xe8, 0x1c, 0x75, 0xdf, 0x6e, + 0x47, 0xf1, 0x1a, 0x71, 0x1d, 0x29, 0xc5, 0x89, 0x6f, 0xb7, 0x62, 0x0e, 0xaa, 0x18, 0xbe, 0x1b, + 0xfc, 0x56, 0x3e, 0x4b, 0xc6, 0xd2, 0x79, 0x20, 0x9a, 0xdb, 0xc0, 0xfe, 0x78, 0xcd, 0x5a, 0xf4, + 0x1f, 0xdd, 0xa8, 0x33, 0x88, 0x07, 0xc7, 0x31, 0xb1, 0x12, 0x10, 0x59, 0x27, 0x80, 0xec, 0x5f, + 0x60, 0x51, 0x7f, 0xa9, 0x19, 0xb5, 0x4a, 0x0d, 0x2d, 0xe5, 0x7a, 0x9f, 0x93, 0xc9, 0x9c, 0xef, + 0xa0, 0xe0, 0x3b, 0x4d, 0xae, 0x2a, 0xf5, 0xb0, 0xc8, 0xeb, 0xbb, 0x3c, 0x83, 0x53, 0x99, 0x61, + 0x17, 0x2b, 0x04, 0x7e, 0xba, 0x77, 0xd6, 0x26, 0xe1, 0x69, 0x14, 0x63, 0x55, 0x21, 0x0c, 0x7d +}; +static const byte lookup_g2[] = { + 0x00, 0x02, 0x04, 0x06, 0x08, 0x0a, 0x0c, 0x0e, 0x10, 0x12, 0x14, 0x16, 0x18, 0x1a, 0x1c, 0x1e, + 0x20, 0x22, 0x24, 0x26, 0x28, 0x2a, 0x2c, 0x2e, 0x30, 0x32, 0x34, 0x36, 0x38, 0x3a, 0x3c, 0x3e, + 0x40, 0x42, 0x44, 0x46, 0x48, 0x4a, 0x4c, 0x4e, 0x50, 0x52, 0x54, 0x56, 0x58, 0x5a, 0x5c, 0x5e, + 0x60, 0x62, 0x64, 0x66, 0x68, 0x6a, 0x6c, 0x6e, 0x70, 0x72, 0x74, 0x76, 0x78, 0x7a, 0x7c, 0x7e, + 0x80, 0x82, 0x84, 0x86, 0x88, 0x8a, 0x8c, 0x8e, 0x90, 0x92, 0x94, 0x96, 0x98, 0x9a, 0x9c, 0x9e, + 0xa0, 0xa2, 0xa4, 0xa6, 0xa8, 0xaa, 0xac, 0xae, 0xb0, 0xb2, 0xb4, 0xb6, 0xb8, 0xba, 0xbc, 0xbe, + 0xc0, 0xc2, 0xc4, 0xc6, 0xc8, 0xca, 0xcc, 0xce, 0xd0, 0xd2, 0xd4, 0xd6, 0xd8, 0xda, 0xdc, 0xde, + 0xe0, 0xe2, 0xe4, 0xe6, 0xe8, 0xea, 0xec, 0xee, 0xf0, 0xf2, 0xf4, 0xf6, 0xf8, 0xfa, 0xfc, 0xfe, + 0x1b, 0x19, 0x1f, 0x1d, 0x13, 0x11, 0x17, 0x15, 0x0b, 0x09, 0x0f, 0x0d, 0x03, 0x01, 0x07, 0x05, + 0x3b, 0x39, 0x3f, 0x3d, 0x33, 0x31, 0x37, 0x35, 0x2b, 0x29, 0x2f, 0x2d, 0x23, 0x21, 0x27, 0x25, + 0x5b, 0x59, 0x5f, 0x5d, 0x53, 0x51, 0x57, 0x55, 0x4b, 0x49, 0x4f, 0x4d, 0x43, 0x41, 0x47, 0x45, + 0x7b, 0x79, 0x7f, 0x7d, 0x73, 0x71, 0x77, 0x75, 0x6b, 0x69, 0x6f, 0x6d, 0x63, 0x61, 0x67, 0x65, + 0x9b, 0x99, 0x9f, 0x9d, 0x93, 0x91, 0x97, 0x95, 0x8b, 0x89, 0x8f, 0x8d, 0x83, 0x81, 0x87, 0x85, + 0xbb, 0xb9, 0xbf, 0xbd, 0xb3, 0xb1, 0xb7, 0xb5, 0xab, 0xa9, 0xaf, 0xad, 0xa3, 0xa1, 0xa7, 0xa5, + 0xdb, 0xd9, 0xdf, 0xdd, 0xd3, 0xd1, 0xd7, 0xd5, 0xcb, 0xc9, 0xcf, 0xcd, 0xc3, 0xc1, 0xc7, 0xc5, + 0xfb, 0xf9, 0xff, 0xfd, 0xf3, 0xf1, 0xf7, 0xf5, 0xeb, 0xe9, 0xef, 0xed, 0xe3, 0xe1, 0xe7, 0xe5 +}; +static const byte lookup_g3[] = { + 0x00, 0x03, 0x06, 0x05, 0x0c, 0x0f, 0x0a, 0x09, 0x18, 0x1b, 0x1e, 0x1d, 0x14, 0x17, 0x12, 0x11, + 0x30, 0x33, 0x36, 0x35, 0x3c, 0x3f, 0x3a, 0x39, 0x28, 0x2b, 0x2e, 0x2d, 0x24, 0x27, 0x22, 0x21, + 0x60, 0x63, 0x66, 0x65, 0x6c, 0x6f, 0x6a, 0x69, 0x78, 0x7b, 0x7e, 0x7d, 0x74, 0x77, 0x72, 0x71, + 0x50, 0x53, 0x56, 0x55, 0x5c, 0x5f, 0x5a, 0x59, 0x48, 0x4b, 0x4e, 0x4d, 0x44, 0x47, 0x42, 0x41, + 0xc0, 0xc3, 0xc6, 0xc5, 0xcc, 0xcf, 0xca, 0xc9, 0xd8, 0xdb, 0xde, 0xdd, 0xd4, 0xd7, 0xd2, 0xd1, + 0xf0, 0xf3, 0xf6, 0xf5, 0xfc, 0xff, 0xfa, 0xf9, 0xe8, 0xeb, 0xee, 0xed, 0xe4, 0xe7, 0xe2, 0xe1, + 0xa0, 0xa3, 0xa6, 0xa5, 0xac, 0xaf, 0xaa, 0xa9, 0xb8, 0xbb, 0xbe, 0xbd, 0xb4, 0xb7, 0xb2, 0xb1, + 0x90, 0x93, 0x96, 0x95, 0x9c, 0x9f, 0x9a, 0x99, 0x88, 0x8b, 0x8e, 0x8d, 0x84, 0x87, 0x82, 0x81, + 0x9b, 0x98, 0x9d, 0x9e, 0x97, 0x94, 0x91, 0x92, 0x83, 0x80, 0x85, 0x86, 0x8f, 0x8c, 0x89, 0x8a, + 0xab, 0xa8, 0xad, 0xae, 0xa7, 0xa4, 0xa1, 0xa2, 0xb3, 0xb0, 0xb5, 0xb6, 0xbf, 0xbc, 0xb9, 0xba, + 0xfb, 0xf8, 0xfd, 0xfe, 0xf7, 0xf4, 0xf1, 0xf2, 0xe3, 0xe0, 0xe5, 0xe6, 0xef, 0xec, 0xe9, 0xea, + 0xcb, 0xc8, 0xcd, 0xce, 0xc7, 0xc4, 0xc1, 0xc2, 0xd3, 0xd0, 0xd5, 0xd6, 0xdf, 0xdc, 0xd9, 0xda, + 0x5b, 0x58, 0x5d, 0x5e, 0x57, 0x54, 0x51, 0x52, 0x43, 0x40, 0x45, 0x46, 0x4f, 0x4c, 0x49, 0x4a, + 0x6b, 0x68, 0x6d, 0x6e, 0x67, 0x64, 0x61, 0x62, 0x73, 0x70, 0x75, 0x76, 0x7f, 0x7c, 0x79, 0x7a, + 0x3b, 0x38, 0x3d, 0x3e, 0x37, 0x34, 0x31, 0x32, 0x23, 0x20, 0x25, 0x26, 0x2f, 0x2c, 0x29, 0x2a, + 0x0b, 0x08, 0x0d, 0x0e, 0x07, 0x04, 0x01, 0x02, 0x13, 0x10, 0x15, 0x16, 0x1f, 0x1c, 0x19, 0x1a +}; +static const byte lookup_g9[] = { + 0x00, 0x09, 0x12, 0x1b, 0x24, 0x2d, 0x36, 0x3f, 0x48, 0x41, 0x5a, 0x53, 0x6c, 0x65, 0x7e, 0x77, + 0x90, 0x99, 0x82, 0x8b, 0xb4, 0xbd, 0xa6, 0xaf, 0xd8, 0xd1, 0xca, 0xc3, 0xfc, 0xf5, 0xee, 0xe7, + 0x3b, 0x32, 0x29, 0x20, 0x1f, 0x16, 0x0d, 0x04, 0x73, 0x7a, 0x61, 0x68, 0x57, 0x5e, 0x45, 0x4c, + 0xab, 0xa2, 0xb9, 0xb0, 0x8f, 0x86, 0x9d, 0x94, 0xe3, 0xea, 0xf1, 0xf8, 0xc7, 0xce, 0xd5, 0xdc, + 0x76, 0x7f, 0x64, 0x6d, 0x52, 0x5b, 0x40, 0x49, 0x3e, 0x37, 0x2c, 0x25, 0x1a, 0x13, 0x08, 0x01, + 0xe6, 0xef, 0xf4, 0xfd, 0xc2, 0xcb, 0xd0, 0xd9, 0xae, 0xa7, 0xbc, 0xb5, 0x8a, 0x83, 0x98, 0x91, + 0x4d, 0x44, 0x5f, 0x56, 0x69, 0x60, 0x7b, 0x72, 0x05, 0x0c, 0x17, 0x1e, 0x21, 0x28, 0x33, 0x3a, + 0xdd, 0xd4, 0xcf, 0xc6, 0xf9, 0xf0, 0xeb, 0xe2, 0x95, 0x9c, 0x87, 0x8e, 0xb1, 0xb8, 0xa3, 0xaa, + 0xec, 0xe5, 0xfe, 0xf7, 0xc8, 0xc1, 0xda, 0xd3, 0xa4, 0xad, 0xb6, 0xbf, 0x80, 0x89, 0x92, 0x9b, + 0x7c, 0x75, 0x6e, 0x67, 0x58, 0x51, 0x4a, 0x43, 0x34, 0x3d, 0x26, 0x2f, 0x10, 0x19, 0x02, 0x0b, + 0xd7, 0xde, 0xc5, 0xcc, 0xf3, 0xfa, 0xe1, 0xe8, 0x9f, 0x96, 0x8d, 0x84, 0xbb, 0xb2, 0xa9, 0xa0, + 0x47, 0x4e, 0x55, 0x5c, 0x63, 0x6a, 0x71, 0x78, 0x0f, 0x06, 0x1d, 0x14, 0x2b, 0x22, 0x39, 0x30, + 0x9a, 0x93, 0x88, 0x81, 0xbe, 0xb7, 0xac, 0xa5, 0xd2, 0xdb, 0xc0, 0xc9, 0xf6, 0xff, 0xe4, 0xed, + 0x0a, 0x03, 0x18, 0x11, 0x2e, 0x27, 0x3c, 0x35, 0x42, 0x4b, 0x50, 0x59, 0x66, 0x6f, 0x74, 0x7d, + 0xa1, 0xa8, 0xb3, 0xba, 0x85, 0x8c, 0x97, 0x9e, 0xe9, 0xe0, 0xfb, 0xf2, 0xcd, 0xc4, 0xdf, 0xd6, + 0x31, 0x38, 0x23, 0x2a, 0x15, 0x1c, 0x07, 0x0e, 0x79, 0x70, 0x6b, 0x62, 0x5d, 0x54, 0x4f, 0x46 +}; +static const byte lookup_g11[] = { + 0x00, 0x0b, 0x16, 0x1d, 0x2c, 0x27, 0x3a, 0x31, 0x58, 0x53, 0x4e, 0x45, 0x74, 0x7f, 0x62, 0x69, + 0xb0, 0xbb, 0xa6, 0xad, 0x9c, 0x97, 0x8a, 0x81, 0xe8, 0xe3, 0xfe, 0xf5, 0xc4, 0xcf, 0xd2, 0xd9, + 0x7b, 0x70, 0x6d, 0x66, 0x57, 0x5c, 0x41, 0x4a, 0x23, 0x28, 0x35, 0x3e, 0x0f, 0x04, 0x19, 0x12, + 0xcb, 0xc0, 0xdd, 0xd6, 0xe7, 0xec, 0xf1, 0xfa, 0x93, 0x98, 0x85, 0x8e, 0xbf, 0xb4, 0xa9, 0xa2, + 0xf6, 0xfd, 0xe0, 0xeb, 0xda, 0xd1, 0xcc, 0xc7, 0xae, 0xa5, 0xb8, 0xb3, 0x82, 0x89, 0x94, 0x9f, + 0x46, 0x4d, 0x50, 0x5b, 0x6a, 0x61, 0x7c, 0x77, 0x1e, 0x15, 0x08, 0x03, 0x32, 0x39, 0x24, 0x2f, + 0x8d, 0x86, 0x9b, 0x90, 0xa1, 0xaa, 0xb7, 0xbc, 0xd5, 0xde, 0xc3, 0xc8, 0xf9, 0xf2, 0xef, 0xe4, + 0x3d, 0x36, 0x2b, 0x20, 0x11, 0x1a, 0x07, 0x0c, 0x65, 0x6e, 0x73, 0x78, 0x49, 0x42, 0x5f, 0x54, + 0xf7, 0xfc, 0xe1, 0xea, 0xdb, 0xd0, 0xcd, 0xc6, 0xaf, 0xa4, 0xb9, 0xb2, 0x83, 0x88, 0x95, 0x9e, + 0x47, 0x4c, 0x51, 0x5a, 0x6b, 0x60, 0x7d, 0x76, 0x1f, 0x14, 0x09, 0x02, 0x33, 0x38, 0x25, 0x2e, + 0x8c, 0x87, 0x9a, 0x91, 0xa0, 0xab, 0xb6, 0xbd, 0xd4, 0xdf, 0xc2, 0xc9, 0xf8, 0xf3, 0xee, 0xe5, + 0x3c, 0x37, 0x2a, 0x21, 0x10, 0x1b, 0x06, 0x0d, 0x64, 0x6f, 0x72, 0x79, 0x48, 0x43, 0x5e, 0x55, + 0x01, 0x0a, 0x17, 0x1c, 0x2d, 0x26, 0x3b, 0x30, 0x59, 0x52, 0x4f, 0x44, 0x75, 0x7e, 0x63, 0x68, + 0xb1, 0xba, 0xa7, 0xac, 0x9d, 0x96, 0x8b, 0x80, 0xe9, 0xe2, 0xff, 0xf4, 0xc5, 0xce, 0xd3, 0xd8, + 0x7a, 0x71, 0x6c, 0x67, 0x56, 0x5d, 0x40, 0x4b, 0x22, 0x29, 0x34, 0x3f, 0x0e, 0x05, 0x18, 0x13, + 0xca, 0xc1, 0xdc, 0xd7, 0xe6, 0xed, 0xf0, 0xfb, 0x92, 0x99, 0x84, 0x8f, 0xbe, 0xb5, 0xa8, 0xa3 +}; +static const byte lookup_g13[] = { + 0x00, 0x0d, 0x1a, 0x17, 0x34, 0x39, 0x2e, 0x23, 0x68, 0x65, 0x72, 0x7f, 0x5c, 0x51, 0x46, 0x4b, + 0xd0, 0xdd, 0xca, 0xc7, 0xe4, 0xe9, 0xfe, 0xf3, 0xb8, 0xb5, 0xa2, 0xaf, 0x8c, 0x81, 0x96, 0x9b, + 0xbb, 0xb6, 0xa1, 0xac, 0x8f, 0x82, 0x95, 0x98, 0xd3, 0xde, 0xc9, 0xc4, 0xe7, 0xea, 0xfd, 0xf0, + 0x6b, 0x66, 0x71, 0x7c, 0x5f, 0x52, 0x45, 0x48, 0x03, 0x0e, 0x19, 0x14, 0x37, 0x3a, 0x2d, 0x20, + 0x6d, 0x60, 0x77, 0x7a, 0x59, 0x54, 0x43, 0x4e, 0x05, 0x08, 0x1f, 0x12, 0x31, 0x3c, 0x2b, 0x26, + 0xbd, 0xb0, 0xa7, 0xaa, 0x89, 0x84, 0x93, 0x9e, 0xd5, 0xd8, 0xcf, 0xc2, 0xe1, 0xec, 0xfb, 0xf6, + 0xd6, 0xdb, 0xcc, 0xc1, 0xe2, 0xef, 0xf8, 0xf5, 0xbe, 0xb3, 0xa4, 0xa9, 0x8a, 0x87, 0x90, 0x9d, + 0x06, 0x0b, 0x1c, 0x11, 0x32, 0x3f, 0x28, 0x25, 0x6e, 0x63, 0x74, 0x79, 0x5a, 0x57, 0x40, 0x4d, + 0xda, 0xd7, 0xc0, 0xcd, 0xee, 0xe3, 0xf4, 0xf9, 0xb2, 0xbf, 0xa8, 0xa5, 0x86, 0x8b, 0x9c, 0x91, + 0x0a, 0x07, 0x10, 0x1d, 0x3e, 0x33, 0x24, 0x29, 0x62, 0x6f, 0x78, 0x75, 0x56, 0x5b, 0x4c, 0x41, + 0x61, 0x6c, 0x7b, 0x76, 0x55, 0x58, 0x4f, 0x42, 0x09, 0x04, 0x13, 0x1e, 0x3d, 0x30, 0x27, 0x2a, + 0xb1, 0xbc, 0xab, 0xa6, 0x85, 0x88, 0x9f, 0x92, 0xd9, 0xd4, 0xc3, 0xce, 0xed, 0xe0, 0xf7, 0xfa, + 0xb7, 0xba, 0xad, 0xa0, 0x83, 0x8e, 0x99, 0x94, 0xdf, 0xd2, 0xc5, 0xc8, 0xeb, 0xe6, 0xf1, 0xfc, + 0x67, 0x6a, 0x7d, 0x70, 0x53, 0x5e, 0x49, 0x44, 0x0f, 0x02, 0x15, 0x18, 0x3b, 0x36, 0x21, 0x2c, + 0x0c, 0x01, 0x16, 0x1b, 0x38, 0x35, 0x22, 0x2f, 0x64, 0x69, 0x7e, 0x73, 0x50, 0x5d, 0x4a, 0x47, + 0xdc, 0xd1, 0xc6, 0xcb, 0xe8, 0xe5, 0xf2, 0xff, 0xb4, 0xb9, 0xae, 0xa3, 0x80, 0x8d, 0x9a, 0x97 +}; +static const byte lookup_g14[] = { + 0x00, 0x0e, 0x1c, 0x12, 0x38, 0x36, 0x24, 0x2a, 0x70, 0x7e, 0x6c, 0x62, 0x48, 0x46, 0x54, 0x5a, + 0xe0, 0xee, 0xfc, 0xf2, 0xd8, 0xd6, 0xc4, 0xca, 0x90, 0x9e, 0x8c, 0x82, 0xa8, 0xa6, 0xb4, 0xba, + 0xdb, 0xd5, 0xc7, 0xc9, 0xe3, 0xed, 0xff, 0xf1, 0xab, 0xa5, 0xb7, 0xb9, 0x93, 0x9d, 0x8f, 0x81, + 0x3b, 0x35, 0x27, 0x29, 0x03, 0x0d, 0x1f, 0x11, 0x4b, 0x45, 0x57, 0x59, 0x73, 0x7d, 0x6f, 0x61, + 0xad, 0xa3, 0xb1, 0xbf, 0x95, 0x9b, 0x89, 0x87, 0xdd, 0xd3, 0xc1, 0xcf, 0xe5, 0xeb, 0xf9, 0xf7, + 0x4d, 0x43, 0x51, 0x5f, 0x75, 0x7b, 0x69, 0x67, 0x3d, 0x33, 0x21, 0x2f, 0x05, 0x0b, 0x19, 0x17, + 0x76, 0x78, 0x6a, 0x64, 0x4e, 0x40, 0x52, 0x5c, 0x06, 0x08, 0x1a, 0x14, 0x3e, 0x30, 0x22, 0x2c, + 0x96, 0x98, 0x8a, 0x84, 0xae, 0xa0, 0xb2, 0xbc, 0xe6, 0xe8, 0xfa, 0xf4, 0xde, 0xd0, 0xc2, 0xcc, + 0x41, 0x4f, 0x5d, 0x53, 0x79, 0x77, 0x65, 0x6b, 0x31, 0x3f, 0x2d, 0x23, 0x09, 0x07, 0x15, 0x1b, + 0xa1, 0xaf, 0xbd, 0xb3, 0x99, 0x97, 0x85, 0x8b, 0xd1, 0xdf, 0xcd, 0xc3, 0xe9, 0xe7, 0xf5, 0xfb, + 0x9a, 0x94, 0x86, 0x88, 0xa2, 0xac, 0xbe, 0xb0, 0xea, 0xe4, 0xf6, 0xf8, 0xd2, 0xdc, 0xce, 0xc0, + 0x7a, 0x74, 0x66, 0x68, 0x42, 0x4c, 0x5e, 0x50, 0x0a, 0x04, 0x16, 0x18, 0x32, 0x3c, 0x2e, 0x20, + 0xec, 0xe2, 0xf0, 0xfe, 0xd4, 0xda, 0xc8, 0xc6, 0x9c, 0x92, 0x80, 0x8e, 0xa4, 0xaa, 0xb8, 0xb6, + 0x0c, 0x02, 0x10, 0x1e, 0x34, 0x3a, 0x28, 0x26, 0x7c, 0x72, 0x60, 0x6e, 0x44, 0x4a, 0x58, 0x56, + 0x37, 0x39, 0x2b, 0x25, 0x0f, 0x01, 0x13, 0x1d, 0x47, 0x49, 0x5b, 0x55, 0x7f, 0x71, 0x63, 0x6d, + 0xd7, 0xd9, 0xcb, 0xc5, 0xef, 0xe1, 0xf3, 0xfd, 0xa7, 0xa9, 0xbb, 0xb5, 0x9f, 0x91, 0x83, 0x8d +}; + +// Xor's all elements in a n byte array a by b +static void xor (byte *a, const byte *b, int n) { + int i; + for (i = 0; i < n; i++) { + a[i] ^= b[i]; + } +} + +// Xor the current cipher state by a specific round key +static void xor_round_key(byte *state, const byte *keys, int round) { + xor(state, keys + round * 16, 16); +} + +// Apply the rijndael s-box to all elements in an array +// http://en.wikipedia.org/wiki/Rijndael_S-box +static void sub_bytes(byte *a, int n) { + int i; + for (i = 0; i < n; i++) { + a[i] = lookup_sbox[a[i]]; + } +} +static void sub_bytes_inv(byte *a, int n) { + int i; + for (i = 0; i < n; i++) { + a[i] = lookup_sbox_inv[a[i]]; + } +} + +// Rotate the first four bytes of the input eight bits to the left +static inline void rot_word(byte *a) { + byte temp = a[0]; + a[0] = a[1]; + a[1] = a[2]; + a[2] = a[3]; + a[3] = temp; +} + +// Perform the core key schedule transform on 4 bytes, as part of the key expansion process +// http://en.wikipedia.org/wiki/Rijndael_key_schedule#Key_schedule_core +static void key_schedule_core(byte *a, int i) { + byte temp = a[0]; // Rotate the output eight bits to the left + a[0] = a[1]; + a[1] = a[2]; + a[2] = a[3]; + a[3] = temp; + sub_bytes(a, 4); // Apply Rijndael's S-box on all four individual bytes in the output word + a[0] ^= lookup_rcon[i]; // On just the first (leftmost) byte of the output word, perform the rcon operation with i + // as the input, and exclusive or the rcon output with the first byte of the output word +} + +// Expand the 16-byte key to 11 round keys (176 bytes) +// http://en.wikipedia.org/wiki/Rijndael_key_schedule#The_key_schedule +void OQS_AES128_ECB_load_schedule(const uint8_t *key, void **_schedule, int for_encryption) { + *_schedule = malloc(16 * 11); + OQS_EXIT_IF_NULLPTR(*_schedule); + uint8_t *schedule = (uint8_t *) *_schedule; + int bytes = 16; // The count of how many bytes we've created so far + int i = 1; // The rcon iteration value i is set to 1 + int j; // For repeating the second stage 3 times + byte t[4]; // Temporary working area known as 't' in the Wiki article + memcpy(schedule, key, 16); // The first 16 bytes of the expanded key are simply the encryption key + + while (bytes < 176) { // Until we have 176 bytes of expanded key, we do the following: + memcpy(t, schedule + bytes - 4, 4); // We assign the value of the previous four bytes in the expanded key to t + key_schedule_core(t, i); // We perform the key schedule core on t, with i as the rcon iteration value + i++; // We increment i by 1 + xor(t, schedule + bytes - 16, 4); // We exclusive-or t with the four-byte block 16 bytes before the new expanded key. + memcpy(schedule + bytes, t, 4); // This becomes the next 4 bytes in the expanded key + bytes += 4; // Keep track of how many expanded key bytes we've added + + // We then do the following three times to create the next twelve bytes + for (j = 0; j < 3; j++) { + memcpy(t, schedule + bytes - 4, 4); // We assign the value of the previous 4 bytes in the expanded key to t + xor(t, schedule + bytes - 16, 4); // We exclusive-or t with the four-byte block n bytes before + memcpy(schedule + bytes, t, 4); // This becomes the next 4 bytes in the expanded key + bytes += 4; // Keep track of how many expanded key bytes we've added + } + } +} + +void OQS_AES128_free_schedule(void *schedule) { + if (schedule != NULL) { + OQS_MEM_secure_free(schedule, 176); + } +} + +// Expand the 16-byte key to 15 round keys (240 bytes) +// http://en.wikipedia.org/wiki/Rijndael_key_schedule#The_key_schedule +void OQS_AES256_ECB_load_schedule(const uint8_t *key, void **_schedule, int for_encryption) { + *_schedule = malloc(16 * 15); + OQS_EXIT_IF_NULLPTR(*_schedule); + uint8_t *schedule = (uint8_t *) *_schedule; + int i = 0; // The count of how many iterations we've done + uint8_t t[4]; // Temporary working area + + // The first 32 bytes of the expanded key are simply the encryption key + memcpy(schedule, key, 8 * 4); + + // The remaining 240-32 bytes of the expanded key are computed in one of three ways: + for (i = 8; i < 4 * 15; i++) { + if (i % 8 == 0) { + memcpy(t, schedule + 4 * (i - 1), 4); // We assign the value of the previous 4 bytes in the expanded key to t + sub_bytes(t, 4); // We apply byte-wise substitution to t + rot_word(t); // We rotate t one byte left + t[0] ^= lookup_rcon[i / 8]; // We xor in the round constant + xor(t, schedule + 4 * (i - 8), 4); // We xor in the four-byte block n bytes before + memcpy(schedule + 4 * i, t, 4); // This becomes the next 4 bytes in the expanded key + } else if (i % 8 == 4) { + memcpy(t, schedule + 4 * (i - 1), 4); // We assign the value of the previous 4 bytes in the expanded key to t + sub_bytes(t, 4); // We apply byte-wise substitution to t + xor(t, schedule + 4 * (i - 8), 4); // We xor in the four-byte block n bytes before + memcpy(schedule + 4 * i, t, 4); // This becomes the next 4 bytes in the expanded key + } else { + memcpy(t, schedule + 4 * (i - 1), 4); // We assign the value of the previous 4 bytes in the expanded key to t + xor(t, schedule + 4 * (i - 8), 4); // We xor in the four-byte block n bytes before + memcpy(schedule + 4 * i, t, 4); // This becomes the next 4 bytes in the expanded key + } + } +} + +void OQS_AES256_CTR_load_schedule(const uint8_t *key, void **_schedule) { + OQS_AES256_ECB_load_schedule(key, _schedule, 1); +} + +/** copied from common.c **/ + +void OQS_MEM_cleanse(void *ptr, size_t len) { +#if defined(_WIN32) + SecureZeroMemory(ptr, len); +#elif defined(HAVE_MEMSET_S) + if (0U < len && memset_s(ptr, (rsize_t)len, 0, (rsize_t)len) != 0) { + abort(); + } +#else + typedef void *(*memset_t)(void *, int, size_t); + static volatile memset_t memset_func = memset; + memset_func(ptr, 0, len); +#endif +} + +void OQS_MEM_secure_free(void *ptr, size_t len) { + if (ptr != NULL) { + OQS_MEM_cleanse(ptr, len); + free(ptr); // IGNORE free-check + } +} + +void OQS_AES256_free_schedule(void *schedule) { + if (schedule != NULL) { + OQS_MEM_secure_free(schedule, 16 * 15); + } +} + +// Apply the shift rows step on the 16 byte cipher state +// http://en.wikipedia.org/wiki/Advanced_Encryption_Standard#The_ShiftRows_step +static void shift_rows(byte *state) { + int i; + byte temp[16]; + memcpy(temp, state, 16); + for (i = 0; i < 16; i++) { + state[i] = temp[shift_rows_table[i]]; + } +} +static void shift_rows_inv(byte *state) { + int i; + byte temp[16]; + memcpy(temp, state, 16); + for (i = 0; i < 16; i++) { + state[i] = temp[shift_rows_table_inv[i]]; + } +} + +// Perform the mix columns matrix on one column of 4 bytes +// http://en.wikipedia.org/wiki/Rijndael_mix_columns +static void mix_col(byte *state) { + byte a0 = state[0]; + byte a1 = state[1]; + byte a2 = state[2]; + byte a3 = state[3]; + state[0] = lookup_g2[a0] ^ lookup_g3[a1] ^ a2 ^ a3; + state[1] = lookup_g2[a1] ^ lookup_g3[a2] ^ a3 ^ a0; + state[2] = lookup_g2[a2] ^ lookup_g3[a3] ^ a0 ^ a1; + state[3] = lookup_g2[a3] ^ lookup_g3[a0] ^ a1 ^ a2; +} + +// Perform the mix columns matrix on each column of the 16 bytes +static void mix_cols(byte *state) { + mix_col(state); + mix_col(state + 4); + mix_col(state + 8); + mix_col(state + 12); +} + +// Perform the inverse mix columns matrix on one column of 4 bytes +// http://en.wikipedia.org/wiki/Rijndael_mix_columns +static void mix_col_inv(byte *state) { + byte a0 = state[0]; + byte a1 = state[1]; + byte a2 = state[2]; + byte a3 = state[3]; + state[0] = lookup_g14[a0] ^ lookup_g9[a3] ^ lookup_g13[a2] ^ lookup_g11[a1]; + state[1] = lookup_g14[a1] ^ lookup_g9[a0] ^ lookup_g13[a3] ^ lookup_g11[a2]; + state[2] = lookup_g14[a2] ^ lookup_g9[a1] ^ lookup_g13[a0] ^ lookup_g11[a3]; + state[3] = lookup_g14[a3] ^ lookup_g9[a2] ^ lookup_g13[a1] ^ lookup_g11[a0]; +} + +// Perform the inverse mix columns matrix on each column of the 16 bytes +static void mix_cols_inv(byte *state) { + mix_col_inv(state); + mix_col_inv(state + 4); + mix_col_inv(state + 8); + mix_col_inv(state + 12); +} + +void oqs_aes128_enc_sch_block_c(const uint8_t *plaintext, const void *_schedule, uint8_t *ciphertext) { + const uint8_t *schedule = (const uint8_t *) _schedule; + int i; // To count the rounds + + // First Round + memcpy(ciphertext, plaintext, 16); + xor_round_key(ciphertext, schedule, 0); + + // Middle rounds + for (i = 0; i < 9; i++) { + sub_bytes(ciphertext, 16); + shift_rows(ciphertext); + mix_cols(ciphertext); + xor_round_key(ciphertext, schedule, i + 1); + } + + // Final Round + sub_bytes(ciphertext, 16); + shift_rows(ciphertext); + xor_round_key(ciphertext, schedule, 10); +} + +void oqs_aes128_dec_sch_block_c(const uint8_t *ciphertext, const void *_schedule, uint8_t *plaintext) { + const uint8_t *schedule = (const uint8_t *) _schedule; + int i; // To count the rounds + + // Reverse the final Round + memcpy(plaintext, ciphertext, 16); + xor_round_key(plaintext, schedule, 10); + shift_rows_inv(plaintext); + sub_bytes_inv(plaintext, 16); + + // Reverse the middle rounds + for (i = 0; i < 9; i++) { + xor_round_key(plaintext, schedule, 9 - i); + mix_cols_inv(plaintext); + shift_rows_inv(plaintext); + sub_bytes_inv(plaintext, 16); + } + + // Reverse the first Round + xor_round_key(plaintext, schedule, 0); +} + +void oqs_aes256_enc_sch_block_c(const uint8_t *plaintext, const void *_schedule, uint8_t *ciphertext) { + const uint8_t *schedule = (const uint8_t *) _schedule; + int i; // To count the rounds + + // First Round + memcpy(ciphertext, plaintext, 16); + xor_round_key(ciphertext, schedule, 0); + + // Middle rounds + for (i = 0; i < 13; i++) { + sub_bytes(ciphertext, 16); + shift_rows(ciphertext); + mix_cols(ciphertext); + xor_round_key(ciphertext, schedule, i + 1); + } + + // Final Round + sub_bytes(ciphertext, 16); + shift_rows(ciphertext); + xor_round_key(ciphertext, schedule, 14); +} + +void oqs_aes256_dec_sch_block_c(const uint8_t *ciphertext, const void *_schedule, uint8_t *plaintext) { + const uint8_t *schedule = (const uint8_t *) _schedule; + int i; // To count the rounds + + // Reverse the final Round + memcpy(plaintext, ciphertext, 16); + xor_round_key(plaintext, schedule, 14); + shift_rows_inv(plaintext); + sub_bytes_inv(plaintext, 16); + + // Reverse the middle rounds + for (i = 0; i < 13; i++) { + xor_round_key(plaintext, schedule, 13 - i); + mix_cols_inv(plaintext); + shift_rows_inv(plaintext); + sub_bytes_inv(plaintext, 16); + } + + // Reverse the first Round + xor_round_key(plaintext, schedule, 0); +} + +void OQS_AES128_ECB_enc(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext) { + void *schedule = NULL; + OQS_AES128_ECB_load_schedule(key, &schedule, 1); + OQS_AES128_ECB_enc_sch(plaintext, plaintext_len, schedule, ciphertext); + OQS_AES128_free_schedule(schedule); +} + +void OQS_AES128_ECB_enc_sch(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext) { + assert(plaintext_len % 16 == 0); + for (size_t block = 0; block < plaintext_len / 16; block++) { + oqs_aes128_enc_sch_block_c(plaintext + (16 * block), schedule, ciphertext + (16 * block)); + } +} + +void OQS_AES128_ECB_dec(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext) { + void *schedule = NULL; + OQS_AES128_ECB_load_schedule(key, &schedule, 0); + OQS_AES128_ECB_dec_sch(ciphertext, ciphertext_len, schedule, plaintext); + OQS_AES128_free_schedule(schedule); +} + +void OQS_AES128_ECB_dec_sch(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext) { + assert(ciphertext_len % 16 == 0); + for (size_t block = 0; block < ciphertext_len / 16; block++) { + oqs_aes128_dec_sch_block_c(ciphertext + (16 * block), schedule, plaintext + (16 * block)); + } +} + +void OQS_AES256_ECB_enc(const uint8_t *plaintext, const size_t plaintext_len, const uint8_t *key, uint8_t *ciphertext) { + void *schedule = NULL; + OQS_AES256_ECB_load_schedule(key, &schedule, 1); + OQS_AES256_ECB_enc_sch(plaintext, plaintext_len, schedule, ciphertext); + OQS_AES256_free_schedule(schedule); +} + +void OQS_AES256_ECB_enc_sch(const uint8_t *plaintext, const size_t plaintext_len, const void *schedule, uint8_t *ciphertext) { + assert(plaintext_len % 16 == 0); + for (size_t block = 0; block < plaintext_len / 16; block++) { + oqs_aes256_enc_sch_block_c(plaintext + (16 * block), schedule, ciphertext + (16 * block)); + } +} + +void OQS_AES256_ECB_dec(const uint8_t *ciphertext, const size_t ciphertext_len, const uint8_t *key, uint8_t *plaintext) { + void *schedule = NULL; + OQS_AES256_ECB_load_schedule(key, &schedule, 0); + OQS_AES256_ECB_dec_sch(ciphertext, ciphertext_len, schedule, plaintext); + OQS_AES256_free_schedule(schedule); +} + +void OQS_AES256_ECB_dec_sch(const uint8_t *ciphertext, const size_t ciphertext_len, const void *schedule, uint8_t *plaintext) { + assert(ciphertext_len % 16 == 0); + for (size_t block = 0; block < ciphertext_len / 16; block++) { + oqs_aes256_dec_sch_block_c(ciphertext + (16 * block), schedule, plaintext + (16 * block)); + } +} + +static inline uint32_t UINT32_TO_BE(const uint32_t x) { + union { + uint32_t val; + uint8_t bytes[4]; + } y; + + /* As part of the union, these bytes get read when y.val is read */ + y.bytes[0] = (x >> 24) & 0xFF; + y.bytes[1] = (x >> 16) & 0xFF; + y.bytes[2] = (x >> 8) & 0xFF; + /* cppcheck-suppress unreadVariable */ + y.bytes[3] = x & 0xFF; + + return y.val; +} +#define BE_TO_UINT32(n) (uint32_t)((((uint8_t *) &(n))[0] << 24) | (((uint8_t *) &(n))[1] << 16) | (((uint8_t *) &(n))[2] << 8) | (((uint8_t *) &(n))[3] << 0)) + +void OQS_AES256_CTR_sch(const uint8_t *iv, size_t iv_len, const void *schedule, uint8_t *out, size_t out_len) { + uint8_t block[16]; + uint32_t ctr; + uint32_t ctr_be; + memcpy(block, iv, 12); + if (iv_len == 12) { + ctr = 0; + } else if (iv_len == 16) { + memcpy(&ctr_be, &iv[12], 4); + + /* ctr_be gets cast to a uint8_t* before being accessed; the non-zero indices are valid */ + /* cppcheck-suppress objectIndex */ + ctr = BE_TO_UINT32(ctr_be); + } else { + exit(EXIT_FAILURE); + } + while (out_len >= 16) { + ctr_be = UINT32_TO_BE(ctr); + memcpy(&block[12], (uint8_t *) &ctr_be, 4); + oqs_aes256_enc_sch_block_c(block, schedule, out); + out += 16; + out_len -= 16; + ctr++; + } + if (out_len > 0) { + uint8_t tmp[16]; + ctr_be = UINT32_TO_BE(ctr); + memcpy(&block[12], (uint8_t *) &ctr_be, 4); + oqs_aes256_enc_sch_block_c(block, schedule, tmp); + memcpy(out, tmp, out_len); + } +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/cbd.c b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/cbd.c index 89c62b933a..367fd21584 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/cbd.c +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/cbd.c @@ -1,51 +1,51 @@ -#include "cbd.h" -#include "params.h" - -#include <stddef.h> -#include <stdint.h> - -/************************************************* -* Name: load32_littleendian -* -* Description: load bytes into a 32-bit integer -* in little-endian order -* -* Arguments: - const uint8_t *x: pointer to input byte array -* -* Returns 32-bit unsigned integer loaded from x -**************************************************/ -static uint32_t load32_littleendian(const uint8_t *x) { - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - r |= (uint32_t)x[3] << 24; - return r; -} - -/************************************************* -* Name: cbd -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter KYBER_ETA -* specialized for KYBER_ETA=2 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *buf: pointer to input byte array -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_cbd(poly *r, const uint8_t *buf) { - int16_t a, b; - - for (size_t i = 0; i < KYBER_N / 8; i++) { - uint32_t t = load32_littleendian(buf + 4 * i); - uint32_t d = t & 0x55555555; - d += (t >> 1) & 0x55555555; - - for (size_t j = 0; j < 8; j++) { - a = (d >> 4 * j) & 0x3; - b = (d >> (4 * j + 2)) & 0x3; - r->coeffs[8 * i + j] = a - b; - } - } -} +#include "cbd.h" +#include "params.h" + +#include <stddef.h> +#include <stdint.h> + +/************************************************* +* Name: load32_littleendian +* +* Description: load bytes into a 32-bit integer +* in little-endian order +* +* Arguments: - const uint8_t *x: pointer to input byte array +* +* Returns 32-bit unsigned integer loaded from x +**************************************************/ +static uint32_t load32_littleendian(const uint8_t *x) { + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + r |= (uint32_t)x[3] << 24; + return r; +} + +/************************************************* +* Name: cbd +* +* Description: Given an array of uniformly random bytes, compute +* polynomial with coefficients distributed according to +* a centered binomial distribution with parameter KYBER_ETA +* specialized for KYBER_ETA=2 +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *buf: pointer to input byte array +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_cbd(poly *r, const uint8_t *buf) { + int16_t a, b; + + for (size_t i = 0; i < KYBER_N / 8; i++) { + uint32_t t = load32_littleendian(buf + 4 * i); + uint32_t d = t & 0x55555555; + d += (t >> 1) & 0x55555555; + + for (size_t j = 0; j < 8; j++) { + a = (d >> 4 * j) & 0x3; + b = (d >> (4 * j + 2)) & 0x3; + r->coeffs[8 * i + j] = a - b; + } + } +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/cbd.h b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/cbd.h index a3f4c21d28..e4f660937a 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/cbd.h +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/cbd.h @@ -1,8 +1,8 @@ -#ifndef CBD_H -#define CBD_H - -#include "poly.h" - -void PQCLEAN_KYBER51290S_CLEAN_cbd(poly *r, const uint8_t *buf); - -#endif +#ifndef CBD_H +#define CBD_H + +#include "poly.h" + +void PQCLEAN_KYBER51290S_CLEAN_cbd(poly *r, const uint8_t *buf); + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/indcpa.c b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/indcpa.c index c37548326d..b2834d9215 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/indcpa.c +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/indcpa.c @@ -1,301 +1,301 @@ -#include "indcpa.h" -#include "ntt.h" -#include "params.h" -#include "poly.h" -#include "polyvec.h" -#include "../s2n_pq_random.h" -#include "utils/s2n_safety.h" -#include "symmetric.h" - -#include <stdint.h> - -/************************************************* -* Name: pack_pk -* -* Description: Serialize the public key as concatenation of the -* serialized vector of polynomials pk -* and the public seed used to generate the matrix A. -* -* Arguments: uint8_t *r: pointer to the output serialized public key -* const poly *pk: pointer to the input public-key polynomial -* const uint8_t *seed: pointer to the input public seed -**************************************************/ -static void pack_pk(uint8_t *r, polyvec *pk, const uint8_t *seed) { - PQCLEAN_KYBER51290S_CLEAN_polyvec_tobytes(r, pk); - for (size_t i = 0; i < KYBER_SYMBYTES; i++) { - r[i + KYBER_POLYVECBYTES] = seed[i]; - } -} - -/************************************************* -* Name: unpack_pk -* -* Description: De-serialize public key from a byte array; -* approximate inverse of pack_pk -* -* Arguments: - polyvec *pk: pointer to output public-key vector of polynomials -* - uint8_t *seed: pointer to output seed to generate matrix A -* - const uint8_t *packedpk: pointer to input serialized public key -**************************************************/ -static void unpack_pk(polyvec *pk, uint8_t *seed, const uint8_t *packedpk) { - PQCLEAN_KYBER51290S_CLEAN_polyvec_frombytes(pk, packedpk); - for (size_t i = 0; i < KYBER_SYMBYTES; i++) { - seed[i] = packedpk[i + KYBER_POLYVECBYTES]; - } -} - -/************************************************* -* Name: pack_sk -* -* Description: Serialize the secret key -* -* Arguments: - uint8_t *r: pointer to output serialized secret key -* - const polyvec *sk: pointer to input vector of polynomials (secret key) -**************************************************/ -static void pack_sk(uint8_t *r, polyvec *sk) { - PQCLEAN_KYBER51290S_CLEAN_polyvec_tobytes(r, sk); -} - -/************************************************* -* Name: unpack_sk -* -* Description: De-serialize the secret key; -* inverse of pack_sk -* -* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key) -* - const uint8_t *packedsk: pointer to input serialized secret key -**************************************************/ -static void unpack_sk(polyvec *sk, const uint8_t *packedsk) { - PQCLEAN_KYBER51290S_CLEAN_polyvec_frombytes(sk, packedsk); -} - -/************************************************* -* Name: pack_ciphertext -* -* Description: Serialize the ciphertext as concatenation of the -* compressed and serialized vector of polynomials b -* and the compressed and serialized polynomial v -* -* Arguments: uint8_t *r: pointer to the output serialized ciphertext -* const poly *pk: pointer to the input vector of polynomials b -* const uint8_t *seed: pointer to the input polynomial v -**************************************************/ -static void pack_ciphertext(uint8_t *r, polyvec *b, poly *v) { - PQCLEAN_KYBER51290S_CLEAN_polyvec_compress(r, b); - PQCLEAN_KYBER51290S_CLEAN_poly_compress(r + KYBER_POLYVECCOMPRESSEDBYTES, v); -} - -/************************************************* -* Name: unpack_ciphertext -* -* Description: De-serialize and decompress ciphertext from a byte array; -* approximate inverse of pack_ciphertext -* -* Arguments: - polyvec *b: pointer to the output vector of polynomials b -* - poly *v: pointer to the output polynomial v -* - const uint8_t *c: pointer to the input serialized ciphertext -**************************************************/ -static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t *c) { - PQCLEAN_KYBER51290S_CLEAN_polyvec_decompress(b, c); - PQCLEAN_KYBER51290S_CLEAN_poly_decompress(v, c + KYBER_POLYVECCOMPRESSEDBYTES); -} - -/************************************************* -* Name: rej_uniform -* -* Description: Run rejection sampling on uniform random bytes to generate -* uniform random integers mod q -* -* Arguments: - int16_t *r: pointer to output buffer -* - size_t len: requested number of 16-bit integers (uniform mod q) -* - const uint8_t *buf: pointer to input buffer (assumed to be uniform random bytes) -* - size_t buflen: length of input buffer in bytes -* -* Returns number of sampled 16-bit integers (at most len) -**************************************************/ -static size_t rej_uniform(int16_t *r, size_t len, const uint8_t *buf, size_t buflen) { - size_t ctr, pos; - - ctr = pos = 0; - while (ctr < len && pos + 2 <= buflen) { - uint16_t val = (uint16_t)(buf[pos] | ((uint16_t)buf[pos + 1] << 8)); - pos += 2; - - if (val < 19 * KYBER_Q) { - val -= (uint16_t)((val >> 12) * KYBER_Q); // Barrett reduction - r[ctr++] = (int16_t)val; - } - } - - return ctr; -} - -#define gen_a(A,B) gen_matrix(A,B,0) -#define gen_at(A,B) gen_matrix(A,B,1) - -/************************************************* -* Name: gen_matrix -* -* Description: Deterministically generate matrix A (or the transpose of A) -* from a seed. Entries of the matrix are polynomials that look -* uniformly random. Performs rejection sampling on output of -* a XOF -* -* Arguments: - polyvec *a: pointer to ouptput matrix A -* - const uint8_t *seed: pointer to input seed -* - int transposed: boolean deciding whether A or A^T is generated -**************************************************/ -#define MAXNBLOCKS ((530+XOF_BLOCKBYTES)/XOF_BLOCKBYTES) /* 530 is expected number of required bytes */ -static void gen_matrix(polyvec *a, const uint8_t *seed, int transposed) { - size_t ctr; - uint8_t i, j; - uint8_t buf[XOF_BLOCKBYTES * MAXNBLOCKS + 1]; - xof_state state; - - for (i = 0; i < KYBER_K; i++) { - for (j = 0; j < KYBER_K; j++) { - if (transposed) { - xof_absorb(&state, seed, i, j); - } else { - xof_absorb(&state, seed, j, i); - } - - xof_squeezeblocks(buf, MAXNBLOCKS, &state); - ctr = rej_uniform(a[i].vec[j].coeffs, KYBER_N, buf, MAXNBLOCKS * XOF_BLOCKBYTES); - - while (ctr < KYBER_N) { - xof_squeezeblocks(buf, 1, &state); - ctr += rej_uniform(a[i].vec[j].coeffs + ctr, KYBER_N - ctr, buf, XOF_BLOCKBYTES); - } - xof_ctx_release(&state); - } - } -} - -/************************************************* -* Name: indcpa_keypair -* -* Description: Generates public and private key for the CPA-secure -* public-key encryption scheme underlying Kyber -* -* Arguments: - uint8_t *pk: pointer to output public key (of length KYBER_INDCPA_PUBLICKEYBYTES bytes) -* - uint8_t *sk: pointer to output private key (of length KYBER_INDCPA_SECRETKEYBYTES bytes) -**************************************************/ -int PQCLEAN_KYBER51290S_CLEAN_indcpa_keypair(uint8_t *pk, uint8_t *sk) { - polyvec a[KYBER_K], e, pkpv, skpv; - uint8_t buf[2 * KYBER_SYMBYTES]; - uint8_t *publicseed = buf; - uint8_t *noiseseed = buf + KYBER_SYMBYTES; - uint8_t nonce = 0; - - GUARD_AS_POSIX(s2n_get_random_bytes(buf, KYBER_SYMBYTES)); - hash_g(buf, buf, KYBER_SYMBYTES); - - gen_a(a, publicseed); - - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER51290S_CLEAN_poly_getnoise(skpv.vec + i, noiseseed, nonce++); - } - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER51290S_CLEAN_poly_getnoise(e.vec + i, noiseseed, nonce++); - } - - PQCLEAN_KYBER51290S_CLEAN_polyvec_ntt(&skpv); - PQCLEAN_KYBER51290S_CLEAN_polyvec_ntt(&e); - - // matrix-vector multiplication - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER51290S_CLEAN_polyvec_pointwise_acc(&pkpv.vec[i], &a[i], &skpv); - PQCLEAN_KYBER51290S_CLEAN_poly_frommont(&pkpv.vec[i]); - } - - PQCLEAN_KYBER51290S_CLEAN_polyvec_add(&pkpv, &pkpv, &e); - PQCLEAN_KYBER51290S_CLEAN_polyvec_reduce(&pkpv); - - pack_sk(sk, &skpv); - pack_pk(pk, &pkpv, publicseed); - return 0; -} - -/************************************************* -* Name: indcpa_enc -* -* Description: Encryption function of the CPA-secure -* public-key encryption scheme underlying Kyber. -* -* Arguments: - uint8_t *c: pointer to output ciphertext (of length KYBER_INDCPA_BYTES bytes) -* - const uint8_t *m: pointer to input message (of length KYBER_INDCPA_MSGBYTES bytes) -* - const uint8_t *pk: pointer to input public key (of length KYBER_INDCPA_PUBLICKEYBYTES bytes) -* - const uint8_t *coin: pointer to input random coins used as seed (of length KYBER_SYMBYTES bytes) -* to deterministically generate all randomness -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_indcpa_enc(uint8_t *c, - const uint8_t *m, - const uint8_t *pk, - const uint8_t *coins) { - polyvec sp, pkpv, ep, at[KYBER_K], bp; - poly v, k, epp; - uint8_t seed[KYBER_SYMBYTES]; - uint8_t nonce = 0; - - unpack_pk(&pkpv, seed, pk); - PQCLEAN_KYBER51290S_CLEAN_poly_frommsg(&k, m); - gen_at(at, seed); - - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER51290S_CLEAN_poly_getnoise(sp.vec + i, coins, nonce++); - } - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER51290S_CLEAN_poly_getnoise(ep.vec + i, coins, nonce++); - } - PQCLEAN_KYBER51290S_CLEAN_poly_getnoise(&epp, coins, nonce++); - - PQCLEAN_KYBER51290S_CLEAN_polyvec_ntt(&sp); - - // matrix-vector multiplication - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER51290S_CLEAN_polyvec_pointwise_acc(&bp.vec[i], &at[i], &sp); - } - - PQCLEAN_KYBER51290S_CLEAN_polyvec_pointwise_acc(&v, &pkpv, &sp); - - PQCLEAN_KYBER51290S_CLEAN_polyvec_invntt(&bp); - PQCLEAN_KYBER51290S_CLEAN_poly_invntt(&v); - - PQCLEAN_KYBER51290S_CLEAN_polyvec_add(&bp, &bp, &ep); - PQCLEAN_KYBER51290S_CLEAN_poly_add(&v, &v, &epp); - PQCLEAN_KYBER51290S_CLEAN_poly_add(&v, &v, &k); - PQCLEAN_KYBER51290S_CLEAN_polyvec_reduce(&bp); - PQCLEAN_KYBER51290S_CLEAN_poly_reduce(&v); - - pack_ciphertext(c, &bp, &v); -} - -/************************************************* -* Name: indcpa_dec -* -* Description: Decryption function of the CPA-secure -* public-key encryption scheme underlying Kyber. -* -* Arguments: - uint8_t *m: pointer to output decrypted message (of length KYBER_INDCPA_MSGBYTES) -* - const uint8_t *c: pointer to input ciphertext (of length KYBER_INDCPA_BYTES) -* - const uint8_t *sk: pointer to input secret key (of length KYBER_INDCPA_SECRETKEYBYTES) -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_indcpa_dec(uint8_t *m, - const uint8_t *c, - const uint8_t *sk) { - polyvec bp, skpv; - poly v, mp; - - unpack_ciphertext(&bp, &v, c); - unpack_sk(&skpv, sk); - - PQCLEAN_KYBER51290S_CLEAN_polyvec_ntt(&bp); - PQCLEAN_KYBER51290S_CLEAN_polyvec_pointwise_acc(&mp, &skpv, &bp); - PQCLEAN_KYBER51290S_CLEAN_poly_invntt(&mp); - - PQCLEAN_KYBER51290S_CLEAN_poly_sub(&mp, &v, &mp); - PQCLEAN_KYBER51290S_CLEAN_poly_reduce(&mp); - - PQCLEAN_KYBER51290S_CLEAN_poly_tomsg(m, &mp); -} +#include "indcpa.h" +#include "ntt.h" +#include "params.h" +#include "poly.h" +#include "polyvec.h" +#include "../s2n_pq_random.h" +#include "utils/s2n_safety.h" +#include "symmetric.h" + +#include <stdint.h> + +/************************************************* +* Name: pack_pk +* +* Description: Serialize the public key as concatenation of the +* serialized vector of polynomials pk +* and the public seed used to generate the matrix A. +* +* Arguments: uint8_t *r: pointer to the output serialized public key +* const poly *pk: pointer to the input public-key polynomial +* const uint8_t *seed: pointer to the input public seed +**************************************************/ +static void pack_pk(uint8_t *r, polyvec *pk, const uint8_t *seed) { + PQCLEAN_KYBER51290S_CLEAN_polyvec_tobytes(r, pk); + for (size_t i = 0; i < KYBER_SYMBYTES; i++) { + r[i + KYBER_POLYVECBYTES] = seed[i]; + } +} + +/************************************************* +* Name: unpack_pk +* +* Description: De-serialize public key from a byte array; +* approximate inverse of pack_pk +* +* Arguments: - polyvec *pk: pointer to output public-key vector of polynomials +* - uint8_t *seed: pointer to output seed to generate matrix A +* - const uint8_t *packedpk: pointer to input serialized public key +**************************************************/ +static void unpack_pk(polyvec *pk, uint8_t *seed, const uint8_t *packedpk) { + PQCLEAN_KYBER51290S_CLEAN_polyvec_frombytes(pk, packedpk); + for (size_t i = 0; i < KYBER_SYMBYTES; i++) { + seed[i] = packedpk[i + KYBER_POLYVECBYTES]; + } +} + +/************************************************* +* Name: pack_sk +* +* Description: Serialize the secret key +* +* Arguments: - uint8_t *r: pointer to output serialized secret key +* - const polyvec *sk: pointer to input vector of polynomials (secret key) +**************************************************/ +static void pack_sk(uint8_t *r, polyvec *sk) { + PQCLEAN_KYBER51290S_CLEAN_polyvec_tobytes(r, sk); +} + +/************************************************* +* Name: unpack_sk +* +* Description: De-serialize the secret key; +* inverse of pack_sk +* +* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key) +* - const uint8_t *packedsk: pointer to input serialized secret key +**************************************************/ +static void unpack_sk(polyvec *sk, const uint8_t *packedsk) { + PQCLEAN_KYBER51290S_CLEAN_polyvec_frombytes(sk, packedsk); +} + +/************************************************* +* Name: pack_ciphertext +* +* Description: Serialize the ciphertext as concatenation of the +* compressed and serialized vector of polynomials b +* and the compressed and serialized polynomial v +* +* Arguments: uint8_t *r: pointer to the output serialized ciphertext +* const poly *pk: pointer to the input vector of polynomials b +* const uint8_t *seed: pointer to the input polynomial v +**************************************************/ +static void pack_ciphertext(uint8_t *r, polyvec *b, poly *v) { + PQCLEAN_KYBER51290S_CLEAN_polyvec_compress(r, b); + PQCLEAN_KYBER51290S_CLEAN_poly_compress(r + KYBER_POLYVECCOMPRESSEDBYTES, v); +} + +/************************************************* +* Name: unpack_ciphertext +* +* Description: De-serialize and decompress ciphertext from a byte array; +* approximate inverse of pack_ciphertext +* +* Arguments: - polyvec *b: pointer to the output vector of polynomials b +* - poly *v: pointer to the output polynomial v +* - const uint8_t *c: pointer to the input serialized ciphertext +**************************************************/ +static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t *c) { + PQCLEAN_KYBER51290S_CLEAN_polyvec_decompress(b, c); + PQCLEAN_KYBER51290S_CLEAN_poly_decompress(v, c + KYBER_POLYVECCOMPRESSEDBYTES); +} + +/************************************************* +* Name: rej_uniform +* +* Description: Run rejection sampling on uniform random bytes to generate +* uniform random integers mod q +* +* Arguments: - int16_t *r: pointer to output buffer +* - size_t len: requested number of 16-bit integers (uniform mod q) +* - const uint8_t *buf: pointer to input buffer (assumed to be uniform random bytes) +* - size_t buflen: length of input buffer in bytes +* +* Returns number of sampled 16-bit integers (at most len) +**************************************************/ +static size_t rej_uniform(int16_t *r, size_t len, const uint8_t *buf, size_t buflen) { + size_t ctr, pos; + + ctr = pos = 0; + while (ctr < len && pos + 2 <= buflen) { + uint16_t val = (uint16_t)(buf[pos] | ((uint16_t)buf[pos + 1] << 8)); + pos += 2; + + if (val < 19 * KYBER_Q) { + val -= (uint16_t)((val >> 12) * KYBER_Q); // Barrett reduction + r[ctr++] = (int16_t)val; + } + } + + return ctr; +} + +#define gen_a(A,B) gen_matrix(A,B,0) +#define gen_at(A,B) gen_matrix(A,B,1) + +/************************************************* +* Name: gen_matrix +* +* Description: Deterministically generate matrix A (or the transpose of A) +* from a seed. Entries of the matrix are polynomials that look +* uniformly random. Performs rejection sampling on output of +* a XOF +* +* Arguments: - polyvec *a: pointer to ouptput matrix A +* - const uint8_t *seed: pointer to input seed +* - int transposed: boolean deciding whether A or A^T is generated +**************************************************/ +#define MAXNBLOCKS ((530+XOF_BLOCKBYTES)/XOF_BLOCKBYTES) /* 530 is expected number of required bytes */ +static void gen_matrix(polyvec *a, const uint8_t *seed, int transposed) { + size_t ctr; + uint8_t i, j; + uint8_t buf[XOF_BLOCKBYTES * MAXNBLOCKS + 1]; + xof_state state; + + for (i = 0; i < KYBER_K; i++) { + for (j = 0; j < KYBER_K; j++) { + if (transposed) { + xof_absorb(&state, seed, i, j); + } else { + xof_absorb(&state, seed, j, i); + } + + xof_squeezeblocks(buf, MAXNBLOCKS, &state); + ctr = rej_uniform(a[i].vec[j].coeffs, KYBER_N, buf, MAXNBLOCKS * XOF_BLOCKBYTES); + + while (ctr < KYBER_N) { + xof_squeezeblocks(buf, 1, &state); + ctr += rej_uniform(a[i].vec[j].coeffs + ctr, KYBER_N - ctr, buf, XOF_BLOCKBYTES); + } + xof_ctx_release(&state); + } + } +} + +/************************************************* +* Name: indcpa_keypair +* +* Description: Generates public and private key for the CPA-secure +* public-key encryption scheme underlying Kyber +* +* Arguments: - uint8_t *pk: pointer to output public key (of length KYBER_INDCPA_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key (of length KYBER_INDCPA_SECRETKEYBYTES bytes) +**************************************************/ +int PQCLEAN_KYBER51290S_CLEAN_indcpa_keypair(uint8_t *pk, uint8_t *sk) { + polyvec a[KYBER_K], e, pkpv, skpv; + uint8_t buf[2 * KYBER_SYMBYTES]; + uint8_t *publicseed = buf; + uint8_t *noiseseed = buf + KYBER_SYMBYTES; + uint8_t nonce = 0; + + GUARD_AS_POSIX(s2n_get_random_bytes(buf, KYBER_SYMBYTES)); + hash_g(buf, buf, KYBER_SYMBYTES); + + gen_a(a, publicseed); + + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER51290S_CLEAN_poly_getnoise(skpv.vec + i, noiseseed, nonce++); + } + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER51290S_CLEAN_poly_getnoise(e.vec + i, noiseseed, nonce++); + } + + PQCLEAN_KYBER51290S_CLEAN_polyvec_ntt(&skpv); + PQCLEAN_KYBER51290S_CLEAN_polyvec_ntt(&e); + + // matrix-vector multiplication + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER51290S_CLEAN_polyvec_pointwise_acc(&pkpv.vec[i], &a[i], &skpv); + PQCLEAN_KYBER51290S_CLEAN_poly_frommont(&pkpv.vec[i]); + } + + PQCLEAN_KYBER51290S_CLEAN_polyvec_add(&pkpv, &pkpv, &e); + PQCLEAN_KYBER51290S_CLEAN_polyvec_reduce(&pkpv); + + pack_sk(sk, &skpv); + pack_pk(pk, &pkpv, publicseed); + return 0; +} + +/************************************************* +* Name: indcpa_enc +* +* Description: Encryption function of the CPA-secure +* public-key encryption scheme underlying Kyber. +* +* Arguments: - uint8_t *c: pointer to output ciphertext (of length KYBER_INDCPA_BYTES bytes) +* - const uint8_t *m: pointer to input message (of length KYBER_INDCPA_MSGBYTES bytes) +* - const uint8_t *pk: pointer to input public key (of length KYBER_INDCPA_PUBLICKEYBYTES bytes) +* - const uint8_t *coin: pointer to input random coins used as seed (of length KYBER_SYMBYTES bytes) +* to deterministically generate all randomness +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_indcpa_enc(uint8_t *c, + const uint8_t *m, + const uint8_t *pk, + const uint8_t *coins) { + polyvec sp, pkpv, ep, at[KYBER_K], bp; + poly v, k, epp; + uint8_t seed[KYBER_SYMBYTES]; + uint8_t nonce = 0; + + unpack_pk(&pkpv, seed, pk); + PQCLEAN_KYBER51290S_CLEAN_poly_frommsg(&k, m); + gen_at(at, seed); + + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER51290S_CLEAN_poly_getnoise(sp.vec + i, coins, nonce++); + } + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER51290S_CLEAN_poly_getnoise(ep.vec + i, coins, nonce++); + } + PQCLEAN_KYBER51290S_CLEAN_poly_getnoise(&epp, coins, nonce++); + + PQCLEAN_KYBER51290S_CLEAN_polyvec_ntt(&sp); + + // matrix-vector multiplication + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER51290S_CLEAN_polyvec_pointwise_acc(&bp.vec[i], &at[i], &sp); + } + + PQCLEAN_KYBER51290S_CLEAN_polyvec_pointwise_acc(&v, &pkpv, &sp); + + PQCLEAN_KYBER51290S_CLEAN_polyvec_invntt(&bp); + PQCLEAN_KYBER51290S_CLEAN_poly_invntt(&v); + + PQCLEAN_KYBER51290S_CLEAN_polyvec_add(&bp, &bp, &ep); + PQCLEAN_KYBER51290S_CLEAN_poly_add(&v, &v, &epp); + PQCLEAN_KYBER51290S_CLEAN_poly_add(&v, &v, &k); + PQCLEAN_KYBER51290S_CLEAN_polyvec_reduce(&bp); + PQCLEAN_KYBER51290S_CLEAN_poly_reduce(&v); + + pack_ciphertext(c, &bp, &v); +} + +/************************************************* +* Name: indcpa_dec +* +* Description: Decryption function of the CPA-secure +* public-key encryption scheme underlying Kyber. +* +* Arguments: - uint8_t *m: pointer to output decrypted message (of length KYBER_INDCPA_MSGBYTES) +* - const uint8_t *c: pointer to input ciphertext (of length KYBER_INDCPA_BYTES) +* - const uint8_t *sk: pointer to input secret key (of length KYBER_INDCPA_SECRETKEYBYTES) +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_indcpa_dec(uint8_t *m, + const uint8_t *c, + const uint8_t *sk) { + polyvec bp, skpv; + poly v, mp; + + unpack_ciphertext(&bp, &v, c); + unpack_sk(&skpv, sk); + + PQCLEAN_KYBER51290S_CLEAN_polyvec_ntt(&bp); + PQCLEAN_KYBER51290S_CLEAN_polyvec_pointwise_acc(&mp, &skpv, &bp); + PQCLEAN_KYBER51290S_CLEAN_poly_invntt(&mp); + + PQCLEAN_KYBER51290S_CLEAN_poly_sub(&mp, &v, &mp); + PQCLEAN_KYBER51290S_CLEAN_poly_reduce(&mp); + + PQCLEAN_KYBER51290S_CLEAN_poly_tomsg(m, &mp); +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/indcpa.h b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/indcpa.h index 802e72f9e2..7ed72ca534 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/indcpa.h +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/indcpa.h @@ -1,21 +1,21 @@ -#ifndef INDCPA_H -#define INDCPA_H - -#include <stdint.h> - -int PQCLEAN_KYBER51290S_CLEAN_indcpa_keypair( - uint8_t *pk, - uint8_t *sk); - -void PQCLEAN_KYBER51290S_CLEAN_indcpa_enc( - uint8_t *c, - const uint8_t *m, - const uint8_t *pk, - const uint8_t *coins); - -void PQCLEAN_KYBER51290S_CLEAN_indcpa_dec( - uint8_t *m, - const uint8_t *c, - const uint8_t *sk); - -#endif +#ifndef INDCPA_H +#define INDCPA_H + +#include <stdint.h> + +int PQCLEAN_KYBER51290S_CLEAN_indcpa_keypair( + uint8_t *pk, + uint8_t *sk); + +void PQCLEAN_KYBER51290S_CLEAN_indcpa_enc( + uint8_t *c, + const uint8_t *m, + const uint8_t *pk, + const uint8_t *coins); + +void PQCLEAN_KYBER51290S_CLEAN_indcpa_dec( + uint8_t *m, + const uint8_t *c, + const uint8_t *sk); + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/kyber_90s_r2_kem.c b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/kyber_90s_r2_kem.c index ff28dd0351..75ed9ad22c 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/kyber_90s_r2_kem.c +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/kyber_90s_r2_kem.c @@ -1,102 +1,102 @@ -#include "indcpa.h" -#include "params.h" -#include "symmetric.h" -#include "verify.h" - -#include "../s2n_pq_random.h" -#include "utils/s2n_safety.h" -#include "tls/s2n_kem.h" - -#include <stdlib.h> - -/************************************************* -* Name: crypto_kem_keypair -* -* Description: Generates public and private key -* for CCA-secure Kyber key encapsulation mechanism -* -* Arguments: - uint8_t *pk: pointer to output public key (an already allocated array of CRYPTO_PUBLICKEYBYTES bytes) -* - uint8_t *sk: pointer to output private key (an already allocated array of CRYPTO_SECRETKEYBYTES bytes) -* -* Returns 0 (success) -**************************************************/ -int kyber_512_90s_r2_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) { - size_t i; - PQCLEAN_KYBER51290S_CLEAN_indcpa_keypair(pk, sk); - for (i = 0; i < KYBER_INDCPA_PUBLICKEYBYTES; i++) { - sk[i + KYBER_INDCPA_SECRETKEYBYTES] = pk[i]; - } - hash_h(sk + KYBER_SECRETKEYBYTES - 2 * KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); - GUARD_AS_POSIX(s2n_get_random_bytes(sk + KYBER_SECRETKEYBYTES - KYBER_SYMBYTES, KYBER_SYMBYTES)); /* Value z for pseudo-random output on reject */ - return 0; -} - -/************************************************* -* Name: crypto_kem_enc -* -* Description: Generates cipher text and shared -* secret for given public key -* -* Arguments: - uint8_t *ct: pointer to output cipher text (an already allocated array of CRYPTO_CIPHERTEXTBYTES bytes) -* - uint8_t *ss: pointer to output shared secret (an already allocated array of CRYPTO_BYTES bytes) -* - const uint8_t *pk: pointer to input public key (an already allocated array of CRYPTO_PUBLICKEYBYTES bytes) -* -* Returns 0 (success) -**************************************************/ -int kyber_512_90s_r2_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) { - uint8_t kr[2 * KYBER_SYMBYTES]; /* Will contain key, coins */ - uint8_t buf[2 * KYBER_SYMBYTES]; - - GUARD_AS_POSIX(s2n_get_random_bytes(buf, KYBER_SYMBYTES)); - hash_h(buf, buf, KYBER_SYMBYTES); /* Don't release system RNG output */ - - hash_h(buf + KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); /* Multitarget countermeasure for coins + contributory KEM */ - hash_g(kr, buf, 2 * KYBER_SYMBYTES); - - PQCLEAN_KYBER51290S_CLEAN_indcpa_enc(ct, buf, pk, kr + KYBER_SYMBYTES); /* coins are in kr+KYBER_SYMBYTES */ - - hash_h(kr + KYBER_SYMBYTES, ct, KYBER_CIPHERTEXTBYTES); /* overwrite coins in kr with H(c) */ - kdf(ss, kr, 2 * KYBER_SYMBYTES); /* hash concatenation of pre-k and H(c) to k */ - return 0; -} - -/************************************************* -* Name: crypto_kem_dec -* -* Description: Generates shared secret for given -* cipher text and private key -* -* Arguments: - uint8_t *ss: pointer to output shared secret (an already allocated array of CRYPTO_BYTES bytes) -* - const uint8_t *ct: pointer to input cipher text (an already allocated array of CRYPTO_CIPHERTEXTBYTES bytes) -* - const uint8_t *sk: pointer to input private key (an already allocated array of CRYPTO_SECRETKEYBYTES bytes) -* -* Returns 0. -* -* On failure, ss will contain a pseudo-random value. -**************************************************/ -int kyber_512_90s_r2_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) { - size_t i; - uint8_t fail; - uint8_t cmp[KYBER_CIPHERTEXTBYTES]; - uint8_t buf[2 * KYBER_SYMBYTES]; - uint8_t kr[2 * KYBER_SYMBYTES]; /* Will contain key, coins */ - const uint8_t *pk = sk + KYBER_INDCPA_SECRETKEYBYTES; - - PQCLEAN_KYBER51290S_CLEAN_indcpa_dec(buf, ct, sk); - - for (i = 0; i < KYBER_SYMBYTES; i++) { /* Multitarget countermeasure for coins + contributory KEM */ - buf[KYBER_SYMBYTES + i] = sk[KYBER_SECRETKEYBYTES - 2 * KYBER_SYMBYTES + i]; /* Save hash by storing H(pk) in sk */ - } - hash_g(kr, buf, 2 * KYBER_SYMBYTES); - - PQCLEAN_KYBER51290S_CLEAN_indcpa_enc(cmp, buf, pk, kr + KYBER_SYMBYTES); /* coins are in kr+KYBER_SYMBYTES */ - - fail = PQCLEAN_KYBER51290S_CLEAN_verify(ct, cmp, KYBER_CIPHERTEXTBYTES); - - hash_h(kr + KYBER_SYMBYTES, ct, KYBER_CIPHERTEXTBYTES); /* overwrite coins in kr with H(c) */ - - PQCLEAN_KYBER51290S_CLEAN_cmov(kr, sk + KYBER_SECRETKEYBYTES - KYBER_SYMBYTES, KYBER_SYMBYTES, fail); /* Overwrite pre-k with z on re-encryption failure */ - - kdf(ss, kr, 2 * KYBER_SYMBYTES); /* hash concatenation of pre-k and H(c) to k */ - return 0; -} +#include "indcpa.h" +#include "params.h" +#include "symmetric.h" +#include "verify.h" + +#include "../s2n_pq_random.h" +#include "utils/s2n_safety.h" +#include "tls/s2n_kem.h" + +#include <stdlib.h> + +/************************************************* +* Name: crypto_kem_keypair +* +* Description: Generates public and private key +* for CCA-secure Kyber key encapsulation mechanism +* +* Arguments: - uint8_t *pk: pointer to output public key (an already allocated array of CRYPTO_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key (an already allocated array of CRYPTO_SECRETKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int kyber_512_90s_r2_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) { + size_t i; + PQCLEAN_KYBER51290S_CLEAN_indcpa_keypair(pk, sk); + for (i = 0; i < KYBER_INDCPA_PUBLICKEYBYTES; i++) { + sk[i + KYBER_INDCPA_SECRETKEYBYTES] = pk[i]; + } + hash_h(sk + KYBER_SECRETKEYBYTES - 2 * KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); + GUARD_AS_POSIX(s2n_get_random_bytes(sk + KYBER_SECRETKEYBYTES - KYBER_SYMBYTES, KYBER_SYMBYTES)); /* Value z for pseudo-random output on reject */ + return 0; +} + +/************************************************* +* Name: crypto_kem_enc +* +* Description: Generates cipher text and shared +* secret for given public key +* +* Arguments: - uint8_t *ct: pointer to output cipher text (an already allocated array of CRYPTO_CIPHERTEXTBYTES bytes) +* - uint8_t *ss: pointer to output shared secret (an already allocated array of CRYPTO_BYTES bytes) +* - const uint8_t *pk: pointer to input public key (an already allocated array of CRYPTO_PUBLICKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int kyber_512_90s_r2_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) { + uint8_t kr[2 * KYBER_SYMBYTES]; /* Will contain key, coins */ + uint8_t buf[2 * KYBER_SYMBYTES]; + + GUARD_AS_POSIX(s2n_get_random_bytes(buf, KYBER_SYMBYTES)); + hash_h(buf, buf, KYBER_SYMBYTES); /* Don't release system RNG output */ + + hash_h(buf + KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); /* Multitarget countermeasure for coins + contributory KEM */ + hash_g(kr, buf, 2 * KYBER_SYMBYTES); + + PQCLEAN_KYBER51290S_CLEAN_indcpa_enc(ct, buf, pk, kr + KYBER_SYMBYTES); /* coins are in kr+KYBER_SYMBYTES */ + + hash_h(kr + KYBER_SYMBYTES, ct, KYBER_CIPHERTEXTBYTES); /* overwrite coins in kr with H(c) */ + kdf(ss, kr, 2 * KYBER_SYMBYTES); /* hash concatenation of pre-k and H(c) to k */ + return 0; +} + +/************************************************* +* Name: crypto_kem_dec +* +* Description: Generates shared secret for given +* cipher text and private key +* +* Arguments: - uint8_t *ss: pointer to output shared secret (an already allocated array of CRYPTO_BYTES bytes) +* - const uint8_t *ct: pointer to input cipher text (an already allocated array of CRYPTO_CIPHERTEXTBYTES bytes) +* - const uint8_t *sk: pointer to input private key (an already allocated array of CRYPTO_SECRETKEYBYTES bytes) +* +* Returns 0. +* +* On failure, ss will contain a pseudo-random value. +**************************************************/ +int kyber_512_90s_r2_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) { + size_t i; + uint8_t fail; + uint8_t cmp[KYBER_CIPHERTEXTBYTES]; + uint8_t buf[2 * KYBER_SYMBYTES]; + uint8_t kr[2 * KYBER_SYMBYTES]; /* Will contain key, coins */ + const uint8_t *pk = sk + KYBER_INDCPA_SECRETKEYBYTES; + + PQCLEAN_KYBER51290S_CLEAN_indcpa_dec(buf, ct, sk); + + for (i = 0; i < KYBER_SYMBYTES; i++) { /* Multitarget countermeasure for coins + contributory KEM */ + buf[KYBER_SYMBYTES + i] = sk[KYBER_SECRETKEYBYTES - 2 * KYBER_SYMBYTES + i]; /* Save hash by storing H(pk) in sk */ + } + hash_g(kr, buf, 2 * KYBER_SYMBYTES); + + PQCLEAN_KYBER51290S_CLEAN_indcpa_enc(cmp, buf, pk, kr + KYBER_SYMBYTES); /* coins are in kr+KYBER_SYMBYTES */ + + fail = PQCLEAN_KYBER51290S_CLEAN_verify(ct, cmp, KYBER_CIPHERTEXTBYTES); + + hash_h(kr + KYBER_SYMBYTES, ct, KYBER_CIPHERTEXTBYTES); /* overwrite coins in kr with H(c) */ + + PQCLEAN_KYBER51290S_CLEAN_cmov(kr, sk + KYBER_SECRETKEYBYTES - KYBER_SYMBYTES, KYBER_SYMBYTES, fail); /* Overwrite pre-k with z on re-encryption failure */ + + kdf(ss, kr, 2 * KYBER_SYMBYTES); /* hash concatenation of pre-k and H(c) to k */ + return 0; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/ntt.c b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/ntt.c index 28ee4c9562..395a63075c 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/ntt.c +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/ntt.c @@ -1,155 +1,155 @@ -#include "ntt.h" -#include "params.h" -#include "reduce.h" - -#include <stddef.h> -#include <stdint.h> - -/* Code to generate zetas and zetas_inv used in the number-theoretic transform: - -#define KYBER_ROOT_OF_UNITY 17 - -static const uint16_t tree[128] = { - 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120, - 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124, - 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122, - 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126, - 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121, - 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125, - 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123, - 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127}; - - -static int16_t fqmul(int16_t a, int16_t b) { - return montgomery_reduce((int32_t)a*b); -} - -void init_ntt() { - unsigned int i, j, k; - int16_t tmp[128]; - - tmp[0] = MONT; - for(i = 1; i < 128; ++i) - tmp[i] = fqmul(tmp[i-1], KYBER_ROOT_OF_UNITY*MONT % KYBER_Q); - - for(i = 0; i < 128; ++i) - zetas[i] = tmp[tree[i]]; - - k = 0; - for(i = 64; i >= 1; i >>= 1) - for(j = i; j < 2*i; ++j) - zetas_inv[k++] = -tmp[128 - tree[j]]; - - zetas_inv[127] = MONT * (MONT * (KYBER_Q - 1) * ((KYBER_Q - 1)/128) % KYBER_Q) % KYBER_Q; -} - -*/ -const int16_t PQCLEAN_KYBER51290S_CLEAN_zetas[128] = { - 2285, 2571, 2970, 1812, 1493, 1422, 287, 202, 3158, 622, 1577, 182, 962, 2127, 1855, 1468, - 573, 2004, 264, 383, 2500, 1458, 1727, 3199, 2648, 1017, 732, 608, 1787, 411, 3124, 1758, - 1223, 652, 2777, 1015, 2036, 1491, 3047, 1785, 516, 3321, 3009, 2663, 1711, 2167, 126, 1469, - 2476, 3239, 3058, 830, 107, 1908, 3082, 2378, 2931, 961, 1821, 2604, 448, 2264, 677, 2054, - 2226, 430, 555, 843, 2078, 871, 1550, 105, 422, 587, 177, 3094, 3038, 2869, 1574, 1653, - 3083, 778, 1159, 3182, 2552, 1483, 2727, 1119, 1739, 644, 2457, 349, 418, 329, 3173, 3254, - 817, 1097, 603, 610, 1322, 2044, 1864, 384, 2114, 3193, 1218, 1994, 2455, 220, 2142, 1670, - 2144, 1799, 2051, 794, 1819, 2475, 2459, 478, 3221, 3021, 996, 991, 958, 1869, 1522, 1628 -}; - -const int16_t PQCLEAN_KYBER51290S_CLEAN_zetas_inv[128] = { - 1701, 1807, 1460, 2371, 2338, 2333, 308, 108, 2851, 870, 854, 1510, 2535, 1278, 1530, 1185, - 1659, 1187, 3109, 874, 1335, 2111, 136, 1215, 2945, 1465, 1285, 2007, 2719, 2726, 2232, 2512, - 75, 156, 3000, 2911, 2980, 872, 2685, 1590, 2210, 602, 1846, 777, 147, 2170, 2551, 246, - 1676, 1755, 460, 291, 235, 3152, 2742, 2907, 3224, 1779, 2458, 1251, 2486, 2774, 2899, 1103, - 1275, 2652, 1065, 2881, 725, 1508, 2368, 398, 951, 247, 1421, 3222, 2499, 271, 90, 853, - 1860, 3203, 1162, 1618, 666, 320, 8, 2813, 1544, 282, 1838, 1293, 2314, 552, 2677, 2106, - 1571, 205, 2918, 1542, 2721, 2597, 2312, 681, 130, 1602, 1871, 829, 2946, 3065, 1325, 2756, - 1861, 1474, 1202, 2367, 3147, 1752, 2707, 171, 3127, 3042, 1907, 1836, 1517, 359, 758, 1441 -}; - - -/************************************************* -* Name: fqmul -* -* Description: Multiplication followed by Montgomery reduction -* -* Arguments: - int16_t a: first factor -* - int16_t b: second factor -* -* Returns 16-bit integer congruent to a*b*R^{-1} mod q -**************************************************/ -static int16_t fqmul(int16_t a, int16_t b) { - return PQCLEAN_KYBER51290S_CLEAN_montgomery_reduce((int32_t)a * b); -} - -/************************************************* -* Name: ntt -* -* Description: Inplace number-theoretic transform (NTT) in Rq -* input is in standard order, output is in bitreversed order -* -* Arguments: - int16_t poly[256]: pointer to input/output vector of elements of Zq -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_ntt(int16_t poly[256]) { - size_t j, k = 1; - int16_t t, zeta; - - for (size_t len = 128; len >= 2; len >>= 1) { - for (size_t start = 0; start < 256; start = j + len) { - zeta = PQCLEAN_KYBER51290S_CLEAN_zetas[k++]; - for (j = start; j < start + len; ++j) { - t = fqmul(zeta, poly[j + len]); - poly[j + len] = poly[j] - t; - poly[j] = poly[j] + t; - } - } - } -} - -/************************************************* -* Name: invntt -* -* Description: Inplace inverse number-theoretic transform in Rq -* input is in bitreversed order, output is in standard order -* -* Arguments: - int16_t poly[256]: pointer to input/output vector of elements of Zq -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_invntt(int16_t poly[256]) { - size_t j, k = 0; - int16_t t, zeta; - - for (size_t len = 2; len <= 128; len <<= 1) { - for (size_t start = 0; start < 256; start = j + len) { - zeta = PQCLEAN_KYBER51290S_CLEAN_zetas_inv[k++]; - for (j = start; j < start + len; ++j) { - t = poly[j]; - poly[j] = PQCLEAN_KYBER51290S_CLEAN_barrett_reduce(t + poly[j + len]); - poly[j + len] = t - poly[j + len]; - poly[j + len] = fqmul(zeta, poly[j + len]); - } - } - } - - for (j = 0; j < 256; ++j) { - poly[j] = fqmul(poly[j], PQCLEAN_KYBER51290S_CLEAN_zetas_inv[127]); - } -} - -/************************************************* -* Name: basemul -* -* Description: Multiplication of polynomials in Zq[X]/((X^2-zeta)) -* used for multiplication of elements in Rq in NTT domain -* -* Arguments: - int16_t r[2]: pointer to the output polynomial -* - const int16_t a[2]: pointer to the first factor -* - const int16_t b[2]: pointer to the second factor -* - int16_t zeta: integer defining the reduction polynomial -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta) { - r[0] = fqmul(a[1], b[1]); - r[0] = fqmul(r[0], zeta); - r[0] += fqmul(a[0], b[0]); - - r[1] = fqmul(a[0], b[1]); - r[1] += fqmul(a[1], b[0]); -} +#include "ntt.h" +#include "params.h" +#include "reduce.h" + +#include <stddef.h> +#include <stdint.h> + +/* Code to generate zetas and zetas_inv used in the number-theoretic transform: + +#define KYBER_ROOT_OF_UNITY 17 + +static const uint16_t tree[128] = { + 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120, + 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124, + 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122, + 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126, + 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121, + 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125, + 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123, + 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127}; + + +static int16_t fqmul(int16_t a, int16_t b) { + return montgomery_reduce((int32_t)a*b); +} + +void init_ntt() { + unsigned int i, j, k; + int16_t tmp[128]; + + tmp[0] = MONT; + for(i = 1; i < 128; ++i) + tmp[i] = fqmul(tmp[i-1], KYBER_ROOT_OF_UNITY*MONT % KYBER_Q); + + for(i = 0; i < 128; ++i) + zetas[i] = tmp[tree[i]]; + + k = 0; + for(i = 64; i >= 1; i >>= 1) + for(j = i; j < 2*i; ++j) + zetas_inv[k++] = -tmp[128 - tree[j]]; + + zetas_inv[127] = MONT * (MONT * (KYBER_Q - 1) * ((KYBER_Q - 1)/128) % KYBER_Q) % KYBER_Q; +} + +*/ +const int16_t PQCLEAN_KYBER51290S_CLEAN_zetas[128] = { + 2285, 2571, 2970, 1812, 1493, 1422, 287, 202, 3158, 622, 1577, 182, 962, 2127, 1855, 1468, + 573, 2004, 264, 383, 2500, 1458, 1727, 3199, 2648, 1017, 732, 608, 1787, 411, 3124, 1758, + 1223, 652, 2777, 1015, 2036, 1491, 3047, 1785, 516, 3321, 3009, 2663, 1711, 2167, 126, 1469, + 2476, 3239, 3058, 830, 107, 1908, 3082, 2378, 2931, 961, 1821, 2604, 448, 2264, 677, 2054, + 2226, 430, 555, 843, 2078, 871, 1550, 105, 422, 587, 177, 3094, 3038, 2869, 1574, 1653, + 3083, 778, 1159, 3182, 2552, 1483, 2727, 1119, 1739, 644, 2457, 349, 418, 329, 3173, 3254, + 817, 1097, 603, 610, 1322, 2044, 1864, 384, 2114, 3193, 1218, 1994, 2455, 220, 2142, 1670, + 2144, 1799, 2051, 794, 1819, 2475, 2459, 478, 3221, 3021, 996, 991, 958, 1869, 1522, 1628 +}; + +const int16_t PQCLEAN_KYBER51290S_CLEAN_zetas_inv[128] = { + 1701, 1807, 1460, 2371, 2338, 2333, 308, 108, 2851, 870, 854, 1510, 2535, 1278, 1530, 1185, + 1659, 1187, 3109, 874, 1335, 2111, 136, 1215, 2945, 1465, 1285, 2007, 2719, 2726, 2232, 2512, + 75, 156, 3000, 2911, 2980, 872, 2685, 1590, 2210, 602, 1846, 777, 147, 2170, 2551, 246, + 1676, 1755, 460, 291, 235, 3152, 2742, 2907, 3224, 1779, 2458, 1251, 2486, 2774, 2899, 1103, + 1275, 2652, 1065, 2881, 725, 1508, 2368, 398, 951, 247, 1421, 3222, 2499, 271, 90, 853, + 1860, 3203, 1162, 1618, 666, 320, 8, 2813, 1544, 282, 1838, 1293, 2314, 552, 2677, 2106, + 1571, 205, 2918, 1542, 2721, 2597, 2312, 681, 130, 1602, 1871, 829, 2946, 3065, 1325, 2756, + 1861, 1474, 1202, 2367, 3147, 1752, 2707, 171, 3127, 3042, 1907, 1836, 1517, 359, 758, 1441 +}; + + +/************************************************* +* Name: fqmul +* +* Description: Multiplication followed by Montgomery reduction +* +* Arguments: - int16_t a: first factor +* - int16_t b: second factor +* +* Returns 16-bit integer congruent to a*b*R^{-1} mod q +**************************************************/ +static int16_t fqmul(int16_t a, int16_t b) { + return PQCLEAN_KYBER51290S_CLEAN_montgomery_reduce((int32_t)a * b); +} + +/************************************************* +* Name: ntt +* +* Description: Inplace number-theoretic transform (NTT) in Rq +* input is in standard order, output is in bitreversed order +* +* Arguments: - int16_t poly[256]: pointer to input/output vector of elements of Zq +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_ntt(int16_t poly[256]) { + size_t j, k = 1; + int16_t t, zeta; + + for (size_t len = 128; len >= 2; len >>= 1) { + for (size_t start = 0; start < 256; start = j + len) { + zeta = PQCLEAN_KYBER51290S_CLEAN_zetas[k++]; + for (j = start; j < start + len; ++j) { + t = fqmul(zeta, poly[j + len]); + poly[j + len] = poly[j] - t; + poly[j] = poly[j] + t; + } + } + } +} + +/************************************************* +* Name: invntt +* +* Description: Inplace inverse number-theoretic transform in Rq +* input is in bitreversed order, output is in standard order +* +* Arguments: - int16_t poly[256]: pointer to input/output vector of elements of Zq +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_invntt(int16_t poly[256]) { + size_t j, k = 0; + int16_t t, zeta; + + for (size_t len = 2; len <= 128; len <<= 1) { + for (size_t start = 0; start < 256; start = j + len) { + zeta = PQCLEAN_KYBER51290S_CLEAN_zetas_inv[k++]; + for (j = start; j < start + len; ++j) { + t = poly[j]; + poly[j] = PQCLEAN_KYBER51290S_CLEAN_barrett_reduce(t + poly[j + len]); + poly[j + len] = t - poly[j + len]; + poly[j + len] = fqmul(zeta, poly[j + len]); + } + } + } + + for (j = 0; j < 256; ++j) { + poly[j] = fqmul(poly[j], PQCLEAN_KYBER51290S_CLEAN_zetas_inv[127]); + } +} + +/************************************************* +* Name: basemul +* +* Description: Multiplication of polynomials in Zq[X]/((X^2-zeta)) +* used for multiplication of elements in Rq in NTT domain +* +* Arguments: - int16_t r[2]: pointer to the output polynomial +* - const int16_t a[2]: pointer to the first factor +* - const int16_t b[2]: pointer to the second factor +* - int16_t zeta: integer defining the reduction polynomial +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta) { + r[0] = fqmul(a[1], b[1]); + r[0] = fqmul(r[0], zeta); + r[0] += fqmul(a[0], b[0]); + + r[1] = fqmul(a[0], b[1]); + r[1] += fqmul(a[1], b[0]); +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/ntt.h b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/ntt.h index 720bee975a..c30f733795 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/ntt.h +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/ntt.h @@ -1,13 +1,13 @@ -#ifndef NTT_H -#define NTT_H - -#include <stdint.h> - -extern const int16_t PQCLEAN_KYBER51290S_CLEAN_zetas[128]; -extern const int16_t PQCLEAN_KYBER51290S_CLEAN_zetasinv[128]; - -void PQCLEAN_KYBER51290S_CLEAN_ntt(int16_t *poly); -void PQCLEAN_KYBER51290S_CLEAN_invntt(int16_t *poly); -void PQCLEAN_KYBER51290S_CLEAN_basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta); - -#endif +#ifndef NTT_H +#define NTT_H + +#include <stdint.h> + +extern const int16_t PQCLEAN_KYBER51290S_CLEAN_zetas[128]; +extern const int16_t PQCLEAN_KYBER51290S_CLEAN_zetasinv[128]; + +void PQCLEAN_KYBER51290S_CLEAN_ntt(int16_t *poly); +void PQCLEAN_KYBER51290S_CLEAN_invntt(int16_t *poly); +void PQCLEAN_KYBER51290S_CLEAN_basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta); + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/params.h b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/params.h index d086d4c694..a0ff58a397 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/params.h +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/params.h @@ -1,32 +1,32 @@ -#ifndef PARAMS_H -#define PARAMS_H - - -/* Don't change parameters below this line */ - -#define KYBER_N 256 -#define KYBER_Q 3329 - -#define KYBER_ETA 2 - -#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ -#define KYBER_SSBYTES 32 /* size in bytes of shared key */ - -#define KYBER_POLYBYTES 384 -#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES) - - -#define KYBER_K 2 -#define KYBER_POLYCOMPRESSEDBYTES 96 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) - -#define KYBER_INDCPA_MSGBYTES KYBER_SYMBYTES -#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES) -#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) -#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) - -#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) -#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES) /* 32 bytes of additional space to save H(pk) */ -#define KYBER_CIPHERTEXTBYTES KYBER_INDCPA_BYTES - -#endif +#ifndef PARAMS_H +#define PARAMS_H + + +/* Don't change parameters below this line */ + +#define KYBER_N 256 +#define KYBER_Q 3329 + +#define KYBER_ETA 2 + +#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ +#define KYBER_SSBYTES 32 /* size in bytes of shared key */ + +#define KYBER_POLYBYTES 384 +#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES) + + +#define KYBER_K 2 +#define KYBER_POLYCOMPRESSEDBYTES 96 +#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) + +#define KYBER_INDCPA_MSGBYTES KYBER_SYMBYTES +#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES) +#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) +#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) + +#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) +#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES) /* 32 bytes of additional space to save H(pk) */ +#define KYBER_CIPHERTEXTBYTES KYBER_INDCPA_BYTES + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/poly.c b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/poly.c index f7e8ef87e8..1d0694f113 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/poly.c +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/poly.c @@ -1,277 +1,277 @@ -#include "cbd.h" -#include "ntt.h" -#include "params.h" -#include "poly.h" -#include "reduce.h" -#include "symmetric.h" - -#include <stdint.h> -/************************************************* -* Name: poly_compress -* -* Description: Compression and subsequent serialization of a polynomial -* -* Arguments: - uint8_t *r: pointer to output byte array (needs space for KYBER_POLYCOMPRESSEDBYTES bytes) -* - const poly *a: pointer to input polynomial -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_poly_compress(uint8_t *r, poly *a) { - uint8_t t[8]; - size_t k = 0; - - PQCLEAN_KYBER51290S_CLEAN_poly_csubq(a); - - for (size_t i = 0; i < KYBER_N; i += 8) { - for (size_t j = 0; j < 8; j++) { - t[j] = ((((uint32_t)a->coeffs[i + j] << 3) + KYBER_Q / 2) / KYBER_Q) & 7; - } - - r[k] = (uint8_t)( t[0] | (t[1] << 3) | (t[2] << 6)); - r[k + 1] = (uint8_t)((t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7)); - r[k + 2] = (uint8_t)((t[5] >> 1) | (t[6] << 2) | (t[7] << 5)); - k += 3; - } -} - -/************************************************* -* Name: poly_decompress -* -* Description: De-serialization and subsequent decompression of a polynomial; -* approximate inverse of poly_compress -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *a: pointer to input byte array (of length KYBER_POLYCOMPRESSEDBYTES bytes) -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_poly_decompress(poly *r, const uint8_t *a) { - for (size_t i = 0; i < KYBER_N; i += 8) { - r->coeffs[i + 0] = (int16_t)( (((a[0] & 7) * KYBER_Q) + 4) >> 3); - r->coeffs[i + 1] = (int16_t)(((((a[0] >> 3) & 7) * KYBER_Q) + 4) >> 3); - r->coeffs[i + 2] = (int16_t)(((((a[0] >> 6) | ((a[1] << 2) & 4)) * KYBER_Q) + 4) >> 3); - r->coeffs[i + 3] = (int16_t)(((((a[1] >> 1) & 7) * KYBER_Q) + 4) >> 3); - r->coeffs[i + 4] = (int16_t)(((((a[1] >> 4) & 7) * KYBER_Q) + 4) >> 3); - r->coeffs[i + 5] = (int16_t)(((((a[1] >> 7) | ((a[2] << 1) & 6)) * KYBER_Q) + 4) >> 3); - r->coeffs[i + 6] = (int16_t)(((((a[2] >> 2) & 7) * KYBER_Q) + 4) >> 3); - r->coeffs[i + 7] = (int16_t)(((((a[2] >> 5)) * KYBER_Q) + 4) >> 3); - a += 3; - } -} - -/************************************************* -* Name: poly_tobytes -* -* Description: Serialization of a polynomial -* -* Arguments: - uint8_t *r: pointer to output byte array (needs space for KYBER_POLYBYTES bytes) -* - const poly *a: pointer to input polynomial -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_poly_tobytes(uint8_t *r, poly *a) { - PQCLEAN_KYBER51290S_CLEAN_poly_csubq(a); - - for (size_t i = 0; i < KYBER_N / 2; i++) { - int16_t t0 = a->coeffs[2 * i]; - int16_t t1 = a->coeffs[2 * i + 1]; - r[3 * i] = t0 & 0xff; - r[3 * i + 1] = (uint8_t)((t0 >> 8) | ((t1 & 0xf) << 4)); - r[3 * i + 2] = (uint8_t)(t1 >> 4); - } -} - -/************************************************* -* Name: poly_frombytes -* -* Description: De-serialization of a polynomial; -* inverse of poly_tobytes -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *a: pointer to input byte array (of KYBER_POLYBYTES bytes) -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_poly_frombytes(poly *r, const uint8_t *a) { - for (size_t i = 0; i < KYBER_N / 2; i++) { - r->coeffs[2 * i] = (int16_t)(a[3 * i] | ((uint16_t)a[3 * i + 1] & 0x0f) << 8); - r->coeffs[2 * i + 1] = (int16_t)(a[3 * i + 1] >> 4 | ((uint16_t)a[3 * i + 2] & 0xff) << 4); - } -} - -/************************************************* -* Name: poly_getnoise -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter KYBER_ETA -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *seed: pointer to input seed (pointing to array of length KYBER_SYMBYTES bytes) -* - uint8_t nonce: one-byte input nonce -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_poly_getnoise(poly *r, const uint8_t *seed, uint8_t nonce) { - uint8_t buf[KYBER_ETA * KYBER_N / 4]; - - prf(buf, KYBER_ETA * KYBER_N / 4, seed, nonce); - PQCLEAN_KYBER51290S_CLEAN_cbd(r, buf); -} - -/************************************************* -* Name: poly_ntt -* -* Description: Computes negacyclic number-theoretic transform (NTT) of -* a polynomial in place; -* inputs assumed to be in normal order, output in bitreversed order -* -* Arguments: - uint16_t *r: pointer to in/output polynomial -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_poly_ntt(poly *r) { - PQCLEAN_KYBER51290S_CLEAN_ntt(r->coeffs); - PQCLEAN_KYBER51290S_CLEAN_poly_reduce(r); -} - -/************************************************* -* Name: poly_invntt -* -* Description: Computes inverse of negacyclic number-theoretic transform (NTT) of -* a polynomial in place; -* inputs assumed to be in bitreversed order, output in normal order -* -* Arguments: - uint16_t *a: pointer to in/output polynomial -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_poly_invntt(poly *r) { - PQCLEAN_KYBER51290S_CLEAN_invntt(r->coeffs); -} - -/************************************************* -* Name: poly_basemul -* -* Description: Multiplication of two polynomials in NTT domain -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_poly_basemul(poly *r, const poly *a, const poly *b) { - for (size_t i = 0; i < KYBER_N / 4; ++i) { - PQCLEAN_KYBER51290S_CLEAN_basemul( - r->coeffs + 4 * i, - a->coeffs + 4 * i, - b->coeffs + 4 * i, - PQCLEAN_KYBER51290S_CLEAN_zetas[64 + i]); - PQCLEAN_KYBER51290S_CLEAN_basemul( - r->coeffs + 4 * i + 2, - a->coeffs + 4 * i + 2, - b->coeffs + 4 * i + 2, - -PQCLEAN_KYBER51290S_CLEAN_zetas[64 + i]); - } -} - -/************************************************* -* Name: poly_frommont -* -* Description: Inplace conversion of all coefficients of a polynomial -* from Montgomery domain to normal domain -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_poly_frommont(poly *r) { - const int16_t f = (1ULL << 32) % KYBER_Q; - - for (size_t i = 0; i < KYBER_N; i++) { - r->coeffs[i] = PQCLEAN_KYBER51290S_CLEAN_montgomery_reduce( - (int32_t)r->coeffs[i] * f); - } -} - -/************************************************* -* Name: poly_reduce -* -* Description: Applies Barrett reduction to all coefficients of a polynomial -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_poly_reduce(poly *r) { - for (size_t i = 0; i < KYBER_N; i++) { - r->coeffs[i] = PQCLEAN_KYBER51290S_CLEAN_barrett_reduce(r->coeffs[i]); - } -} - -/************************************************* -* Name: poly_csubq -* -* Description: Applies conditional subtraction of q to each coefficient of a polynomial -* for details of conditional subtraction of q see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_poly_csubq(poly *r) { - for (size_t i = 0; i < KYBER_N; i++) { - r->coeffs[i] = PQCLEAN_KYBER51290S_CLEAN_csubq(r->coeffs[i]); - } -} - -/************************************************* -* Name: poly_add -* -* Description: Add two polynomials -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_poly_add(poly *r, const poly *a, const poly *b) { - for (size_t i = 0; i < KYBER_N; i++) { - r->coeffs[i] = a->coeffs[i] + b->coeffs[i]; - } -} - -/************************************************* -* Name: poly_sub -* -* Description: Subtract two polynomials -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_poly_sub(poly *r, const poly *a, const poly *b) { - for (size_t i = 0; i < KYBER_N; i++) { - r->coeffs[i] = a->coeffs[i] - b->coeffs[i]; - } -} - -/************************************************* -* Name: poly_frommsg -* -* Description: Convert 32-byte message to polynomial -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *msg: pointer to input message -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_poly_frommsg(poly *r, const uint8_t msg[KYBER_SYMBYTES]) { - uint16_t mask; - - for (size_t i = 0; i < KYBER_SYMBYTES; i++) { - for (size_t j = 0; j < 8; j++) { - mask = -((msg[i] >> j) & 1); - r->coeffs[8 * i + j] = mask & ((KYBER_Q + 1) / 2); - } - } -} - -/************************************************* -* Name: poly_tomsg -* -* Description: Convert polynomial to 32-byte message -* -* Arguments: - uint8_t *msg: pointer to output message -* - const poly *a: pointer to input polynomial -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_poly_tomsg(uint8_t msg[KYBER_SYMBYTES], poly *a) { - uint16_t t; - - PQCLEAN_KYBER51290S_CLEAN_poly_csubq(a); - - for (size_t i = 0; i < KYBER_SYMBYTES; i++) { - msg[i] = 0; - for (size_t j = 0; j < 8; j++) { - t = (((a->coeffs[8 * i + j] << 1) + KYBER_Q / 2) / KYBER_Q) & 1; - msg[i] |= t << j; - } - } -} +#include "cbd.h" +#include "ntt.h" +#include "params.h" +#include "poly.h" +#include "reduce.h" +#include "symmetric.h" + +#include <stdint.h> +/************************************************* +* Name: poly_compress +* +* Description: Compression and subsequent serialization of a polynomial +* +* Arguments: - uint8_t *r: pointer to output byte array (needs space for KYBER_POLYCOMPRESSEDBYTES bytes) +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_poly_compress(uint8_t *r, poly *a) { + uint8_t t[8]; + size_t k = 0; + + PQCLEAN_KYBER51290S_CLEAN_poly_csubq(a); + + for (size_t i = 0; i < KYBER_N; i += 8) { + for (size_t j = 0; j < 8; j++) { + t[j] = ((((uint32_t)a->coeffs[i + j] << 3) + KYBER_Q / 2) / KYBER_Q) & 7; + } + + r[k] = (uint8_t)( t[0] | (t[1] << 3) | (t[2] << 6)); + r[k + 1] = (uint8_t)((t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7)); + r[k + 2] = (uint8_t)((t[5] >> 1) | (t[6] << 2) | (t[7] << 5)); + k += 3; + } +} + +/************************************************* +* Name: poly_decompress +* +* Description: De-serialization and subsequent decompression of a polynomial; +* approximate inverse of poly_compress +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: pointer to input byte array (of length KYBER_POLYCOMPRESSEDBYTES bytes) +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_poly_decompress(poly *r, const uint8_t *a) { + for (size_t i = 0; i < KYBER_N; i += 8) { + r->coeffs[i + 0] = (int16_t)( (((a[0] & 7) * KYBER_Q) + 4) >> 3); + r->coeffs[i + 1] = (int16_t)(((((a[0] >> 3) & 7) * KYBER_Q) + 4) >> 3); + r->coeffs[i + 2] = (int16_t)(((((a[0] >> 6) | ((a[1] << 2) & 4)) * KYBER_Q) + 4) >> 3); + r->coeffs[i + 3] = (int16_t)(((((a[1] >> 1) & 7) * KYBER_Q) + 4) >> 3); + r->coeffs[i + 4] = (int16_t)(((((a[1] >> 4) & 7) * KYBER_Q) + 4) >> 3); + r->coeffs[i + 5] = (int16_t)(((((a[1] >> 7) | ((a[2] << 1) & 6)) * KYBER_Q) + 4) >> 3); + r->coeffs[i + 6] = (int16_t)(((((a[2] >> 2) & 7) * KYBER_Q) + 4) >> 3); + r->coeffs[i + 7] = (int16_t)(((((a[2] >> 5)) * KYBER_Q) + 4) >> 3); + a += 3; + } +} + +/************************************************* +* Name: poly_tobytes +* +* Description: Serialization of a polynomial +* +* Arguments: - uint8_t *r: pointer to output byte array (needs space for KYBER_POLYBYTES bytes) +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_poly_tobytes(uint8_t *r, poly *a) { + PQCLEAN_KYBER51290S_CLEAN_poly_csubq(a); + + for (size_t i = 0; i < KYBER_N / 2; i++) { + int16_t t0 = a->coeffs[2 * i]; + int16_t t1 = a->coeffs[2 * i + 1]; + r[3 * i] = t0 & 0xff; + r[3 * i + 1] = (uint8_t)((t0 >> 8) | ((t1 & 0xf) << 4)); + r[3 * i + 2] = (uint8_t)(t1 >> 4); + } +} + +/************************************************* +* Name: poly_frombytes +* +* Description: De-serialization of a polynomial; +* inverse of poly_tobytes +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: pointer to input byte array (of KYBER_POLYBYTES bytes) +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_poly_frombytes(poly *r, const uint8_t *a) { + for (size_t i = 0; i < KYBER_N / 2; i++) { + r->coeffs[2 * i] = (int16_t)(a[3 * i] | ((uint16_t)a[3 * i + 1] & 0x0f) << 8); + r->coeffs[2 * i + 1] = (int16_t)(a[3 * i + 1] >> 4 | ((uint16_t)a[3 * i + 2] & 0xff) << 4); + } +} + +/************************************************* +* Name: poly_getnoise +* +* Description: Sample a polynomial deterministically from a seed and a nonce, +* with output polynomial close to centered binomial distribution +* with parameter KYBER_ETA +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *seed: pointer to input seed (pointing to array of length KYBER_SYMBYTES bytes) +* - uint8_t nonce: one-byte input nonce +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_poly_getnoise(poly *r, const uint8_t *seed, uint8_t nonce) { + uint8_t buf[KYBER_ETA * KYBER_N / 4]; + + prf(buf, KYBER_ETA * KYBER_N / 4, seed, nonce); + PQCLEAN_KYBER51290S_CLEAN_cbd(r, buf); +} + +/************************************************* +* Name: poly_ntt +* +* Description: Computes negacyclic number-theoretic transform (NTT) of +* a polynomial in place; +* inputs assumed to be in normal order, output in bitreversed order +* +* Arguments: - uint16_t *r: pointer to in/output polynomial +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_poly_ntt(poly *r) { + PQCLEAN_KYBER51290S_CLEAN_ntt(r->coeffs); + PQCLEAN_KYBER51290S_CLEAN_poly_reduce(r); +} + +/************************************************* +* Name: poly_invntt +* +* Description: Computes inverse of negacyclic number-theoretic transform (NTT) of +* a polynomial in place; +* inputs assumed to be in bitreversed order, output in normal order +* +* Arguments: - uint16_t *a: pointer to in/output polynomial +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_poly_invntt(poly *r) { + PQCLEAN_KYBER51290S_CLEAN_invntt(r->coeffs); +} + +/************************************************* +* Name: poly_basemul +* +* Description: Multiplication of two polynomials in NTT domain +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_poly_basemul(poly *r, const poly *a, const poly *b) { + for (size_t i = 0; i < KYBER_N / 4; ++i) { + PQCLEAN_KYBER51290S_CLEAN_basemul( + r->coeffs + 4 * i, + a->coeffs + 4 * i, + b->coeffs + 4 * i, + PQCLEAN_KYBER51290S_CLEAN_zetas[64 + i]); + PQCLEAN_KYBER51290S_CLEAN_basemul( + r->coeffs + 4 * i + 2, + a->coeffs + 4 * i + 2, + b->coeffs + 4 * i + 2, + -PQCLEAN_KYBER51290S_CLEAN_zetas[64 + i]); + } +} + +/************************************************* +* Name: poly_frommont +* +* Description: Inplace conversion of all coefficients of a polynomial +* from Montgomery domain to normal domain +* +* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_poly_frommont(poly *r) { + const int16_t f = (1ULL << 32) % KYBER_Q; + + for (size_t i = 0; i < KYBER_N; i++) { + r->coeffs[i] = PQCLEAN_KYBER51290S_CLEAN_montgomery_reduce( + (int32_t)r->coeffs[i] * f); + } +} + +/************************************************* +* Name: poly_reduce +* +* Description: Applies Barrett reduction to all coefficients of a polynomial +* for details of the Barrett reduction see comments in reduce.c +* +* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_poly_reduce(poly *r) { + for (size_t i = 0; i < KYBER_N; i++) { + r->coeffs[i] = PQCLEAN_KYBER51290S_CLEAN_barrett_reduce(r->coeffs[i]); + } +} + +/************************************************* +* Name: poly_csubq +* +* Description: Applies conditional subtraction of q to each coefficient of a polynomial +* for details of conditional subtraction of q see comments in reduce.c +* +* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_poly_csubq(poly *r) { + for (size_t i = 0; i < KYBER_N; i++) { + r->coeffs[i] = PQCLEAN_KYBER51290S_CLEAN_csubq(r->coeffs[i]); + } +} + +/************************************************* +* Name: poly_add +* +* Description: Add two polynomials +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_poly_add(poly *r, const poly *a, const poly *b) { + for (size_t i = 0; i < KYBER_N; i++) { + r->coeffs[i] = a->coeffs[i] + b->coeffs[i]; + } +} + +/************************************************* +* Name: poly_sub +* +* Description: Subtract two polynomials +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_poly_sub(poly *r, const poly *a, const poly *b) { + for (size_t i = 0; i < KYBER_N; i++) { + r->coeffs[i] = a->coeffs[i] - b->coeffs[i]; + } +} + +/************************************************* +* Name: poly_frommsg +* +* Description: Convert 32-byte message to polynomial +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *msg: pointer to input message +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_poly_frommsg(poly *r, const uint8_t msg[KYBER_SYMBYTES]) { + uint16_t mask; + + for (size_t i = 0; i < KYBER_SYMBYTES; i++) { + for (size_t j = 0; j < 8; j++) { + mask = -((msg[i] >> j) & 1); + r->coeffs[8 * i + j] = mask & ((KYBER_Q + 1) / 2); + } + } +} + +/************************************************* +* Name: poly_tomsg +* +* Description: Convert polynomial to 32-byte message +* +* Arguments: - uint8_t *msg: pointer to output message +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_poly_tomsg(uint8_t msg[KYBER_SYMBYTES], poly *a) { + uint16_t t; + + PQCLEAN_KYBER51290S_CLEAN_poly_csubq(a); + + for (size_t i = 0; i < KYBER_SYMBYTES; i++) { + msg[i] = 0; + for (size_t j = 0; j < 8; j++) { + t = (((a->coeffs[8 * i + j] << 1) + KYBER_Q / 2) / KYBER_Q) & 1; + msg[i] |= t << j; + } + } +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/poly.h b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/poly.h index fbab1da702..bed9a1dcd1 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/poly.h +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/poly.h @@ -1,37 +1,37 @@ -#ifndef POLY_H -#define POLY_H - -#include "params.h" - -#include <stdint.h> -/* - * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial - * coeffs[0] + X*coeffs[1] + X^2*xoeffs[2] + ... + X^{n-1}*coeffs[n-1] - */ -typedef struct { - int16_t coeffs[KYBER_N]; -} poly; - -void PQCLEAN_KYBER51290S_CLEAN_poly_compress(uint8_t *r, poly *a); -void PQCLEAN_KYBER51290S_CLEAN_poly_decompress(poly *r, const uint8_t *a); - -void PQCLEAN_KYBER51290S_CLEAN_poly_tobytes(uint8_t *r, poly *a); -void PQCLEAN_KYBER51290S_CLEAN_poly_frombytes(poly *r, const uint8_t *a); - -void PQCLEAN_KYBER51290S_CLEAN_poly_frommsg(poly *r, const uint8_t msg[KYBER_SYMBYTES]); -void PQCLEAN_KYBER51290S_CLEAN_poly_tomsg(uint8_t msg[KYBER_SYMBYTES], poly *a); - -void PQCLEAN_KYBER51290S_CLEAN_poly_getnoise(poly *r, const uint8_t *seed, uint8_t nonce); - -void PQCLEAN_KYBER51290S_CLEAN_poly_ntt(poly *r); -void PQCLEAN_KYBER51290S_CLEAN_poly_invntt(poly *r); -void PQCLEAN_KYBER51290S_CLEAN_poly_basemul(poly *r, const poly *a, const poly *b); -void PQCLEAN_KYBER51290S_CLEAN_poly_frommont(poly *r); - -void PQCLEAN_KYBER51290S_CLEAN_poly_reduce(poly *r); -void PQCLEAN_KYBER51290S_CLEAN_poly_csubq(poly *r); - -void PQCLEAN_KYBER51290S_CLEAN_poly_add(poly *r, const poly *a, const poly *b); -void PQCLEAN_KYBER51290S_CLEAN_poly_sub(poly *r, const poly *a, const poly *b); - -#endif +#ifndef POLY_H +#define POLY_H + +#include "params.h" + +#include <stdint.h> +/* + * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial + * coeffs[0] + X*coeffs[1] + X^2*xoeffs[2] + ... + X^{n-1}*coeffs[n-1] + */ +typedef struct { + int16_t coeffs[KYBER_N]; +} poly; + +void PQCLEAN_KYBER51290S_CLEAN_poly_compress(uint8_t *r, poly *a); +void PQCLEAN_KYBER51290S_CLEAN_poly_decompress(poly *r, const uint8_t *a); + +void PQCLEAN_KYBER51290S_CLEAN_poly_tobytes(uint8_t *r, poly *a); +void PQCLEAN_KYBER51290S_CLEAN_poly_frombytes(poly *r, const uint8_t *a); + +void PQCLEAN_KYBER51290S_CLEAN_poly_frommsg(poly *r, const uint8_t msg[KYBER_SYMBYTES]); +void PQCLEAN_KYBER51290S_CLEAN_poly_tomsg(uint8_t msg[KYBER_SYMBYTES], poly *a); + +void PQCLEAN_KYBER51290S_CLEAN_poly_getnoise(poly *r, const uint8_t *seed, uint8_t nonce); + +void PQCLEAN_KYBER51290S_CLEAN_poly_ntt(poly *r); +void PQCLEAN_KYBER51290S_CLEAN_poly_invntt(poly *r); +void PQCLEAN_KYBER51290S_CLEAN_poly_basemul(poly *r, const poly *a, const poly *b); +void PQCLEAN_KYBER51290S_CLEAN_poly_frommont(poly *r); + +void PQCLEAN_KYBER51290S_CLEAN_poly_reduce(poly *r); +void PQCLEAN_KYBER51290S_CLEAN_poly_csubq(poly *r); + +void PQCLEAN_KYBER51290S_CLEAN_poly_add(poly *r, const poly *a, const poly *b); +void PQCLEAN_KYBER51290S_CLEAN_poly_sub(poly *r, const poly *a, const poly *b); + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/polyvec.c b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/polyvec.c index 8d90e7e0c7..36cfc56751 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/polyvec.c +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/polyvec.c @@ -1,175 +1,175 @@ -#include "polyvec.h" - -#include "poly.h" - -#include <stddef.h> -#include <stdint.h> -/************************************************* -* Name: polyvec_compress -* -* Description: Compress and serialize vector of polynomials -* -* Arguments: - uint8_t *r: pointer to output byte array (needs space for KYBER_POLYVECCOMPRESSEDBYTES) -* - const polyvec *a: pointer to input vector of polynomials -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_polyvec_compress(uint8_t *r, polyvec *a) { - PQCLEAN_KYBER51290S_CLEAN_polyvec_csubq(a); - - uint16_t t[4]; - for (size_t i = 0; i < KYBER_K; i++) { - for (size_t j = 0; j < KYBER_N / 4; j++) { - for (size_t k = 0; k < 4; k++) { - t[k] = ((((uint32_t)a->vec[i].coeffs[4 * j + k] << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff; - } - - r[5 * j + 0] = (uint8_t)t[0]; - r[5 * j + 1] = (uint8_t)((t[0] >> 8) | ((t[1] & 0x3f) << 2)); - r[5 * j + 2] = (uint8_t)((t[1] >> 6) | ((t[2] & 0x0f) << 4)); - r[5 * j + 3] = (uint8_t)((t[2] >> 4) | ((t[3] & 0x03) << 6)); - r[5 * j + 4] = (uint8_t)((t[3] >> 2)); - } - r += 320; - } -} - -/************************************************* -* Name: polyvec_decompress -* -* Description: De-serialize and decompress vector of polynomials; -* approximate inverse of polyvec_compress -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - uint8_t *a: pointer to input byte array (of length KYBER_POLYVECCOMPRESSEDBYTES) -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_polyvec_decompress(polyvec *r, const uint8_t *a) { - for (size_t i = 0; i < KYBER_K; i++) { - for (size_t j = 0; j < KYBER_N / 4; j++) { - r->vec[i].coeffs[4 * j + 0] = (int16_t)( (((a[5 * j + 0] | (((uint32_t)a[5 * j + 1] & 0x03) << 8)) * KYBER_Q) + 512) >> 10); - r->vec[i].coeffs[4 * j + 1] = (int16_t)(((((a[5 * j + 1] >> 2) | (((uint32_t)a[5 * j + 2] & 0x0f) << 6)) * KYBER_Q) + 512) >> 10); - r->vec[i].coeffs[4 * j + 2] = (int16_t)(((((a[5 * j + 2] >> 4) | (((uint32_t)a[5 * j + 3] & 0x3f) << 4)) * KYBER_Q) + 512) >> 10); - r->vec[i].coeffs[4 * j + 3] = (int16_t)(((((a[5 * j + 3] >> 6) | (((uint32_t)a[5 * j + 4] & 0xff) << 2)) * KYBER_Q) + 512) >> 10); - } - a += 320; - } -} - -/************************************************* -* Name: polyvec_tobytes -* -* Description: Serialize vector of polynomials -* -* Arguments: - uint8_t *r: pointer to output byte array (needs space for KYBER_POLYVECBYTES) -* - const polyvec *a: pointer to input vector of polynomials -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_polyvec_tobytes(uint8_t *r, polyvec *a) { - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER51290S_CLEAN_poly_tobytes(r + i * KYBER_POLYBYTES, &a->vec[i]); - } -} - -/************************************************* -* Name: polyvec_frombytes -* -* Description: De-serialize vector of polynomials; -* inverse of polyvec_tobytes -* -* Arguments: - uint8_t *r: pointer to output byte array -* - const polyvec *a: pointer to input vector of polynomials (of length KYBER_POLYVECBYTES) -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_polyvec_frombytes(polyvec *r, const uint8_t *a) { - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER51290S_CLEAN_poly_frombytes(&r->vec[i], a + i * KYBER_POLYBYTES); - } -} - -/************************************************* -* Name: polyvec_ntt -* -* Description: Apply forward NTT to all elements of a vector of polynomials -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_polyvec_ntt(polyvec *r) { - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER51290S_CLEAN_poly_ntt(&r->vec[i]); - } -} - -/************************************************* -* Name: polyvec_invntt -* -* Description: Apply inverse NTT to all elements of a vector of polynomials -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_polyvec_invntt(polyvec *r) { - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER51290S_CLEAN_poly_invntt(&r->vec[i]); - } -} - -/************************************************* -* Name: polyvec_pointwise_acc -* -* Description: Pointwise multiply elements of a and b and accumulate into r -* -* Arguments: - poly *r: pointer to output polynomial -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_polyvec_pointwise_acc(poly *r, const polyvec *a, const polyvec *b) { - poly t; - - PQCLEAN_KYBER51290S_CLEAN_poly_basemul(r, &a->vec[0], &b->vec[0]); - for (size_t i = 1; i < KYBER_K; i++) { - PQCLEAN_KYBER51290S_CLEAN_poly_basemul(&t, &a->vec[i], &b->vec[i]); - PQCLEAN_KYBER51290S_CLEAN_poly_add(r, r, &t); - } - - PQCLEAN_KYBER51290S_CLEAN_poly_reduce(r); -} - -/************************************************* -* Name: polyvec_reduce -* -* Description: Applies Barrett reduction to each coefficient -* of each element of a vector of polynomials -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_polyvec_reduce(polyvec *r) { - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER51290S_CLEAN_poly_reduce(&r->vec[i]); - } -} - -/************************************************* -* Name: polyvec_csubq -* -* Description: Applies conditional subtraction of q to each coefficient -* of each element of a vector of polynomials -* for details of conditional subtraction of q see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_polyvec_csubq(polyvec *r) { - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER51290S_CLEAN_poly_csubq(&r->vec[i]); - } -} - -/************************************************* -* Name: polyvec_add -* -* Description: Add vectors of polynomials -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) { - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER51290S_CLEAN_poly_add(&r->vec[i], &a->vec[i], &b->vec[i]); - } -} +#include "polyvec.h" + +#include "poly.h" + +#include <stddef.h> +#include <stdint.h> +/************************************************* +* Name: polyvec_compress +* +* Description: Compress and serialize vector of polynomials +* +* Arguments: - uint8_t *r: pointer to output byte array (needs space for KYBER_POLYVECCOMPRESSEDBYTES) +* - const polyvec *a: pointer to input vector of polynomials +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_polyvec_compress(uint8_t *r, polyvec *a) { + PQCLEAN_KYBER51290S_CLEAN_polyvec_csubq(a); + + uint16_t t[4]; + for (size_t i = 0; i < KYBER_K; i++) { + for (size_t j = 0; j < KYBER_N / 4; j++) { + for (size_t k = 0; k < 4; k++) { + t[k] = ((((uint32_t)a->vec[i].coeffs[4 * j + k] << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff; + } + + r[5 * j + 0] = (uint8_t)t[0]; + r[5 * j + 1] = (uint8_t)((t[0] >> 8) | ((t[1] & 0x3f) << 2)); + r[5 * j + 2] = (uint8_t)((t[1] >> 6) | ((t[2] & 0x0f) << 4)); + r[5 * j + 3] = (uint8_t)((t[2] >> 4) | ((t[3] & 0x03) << 6)); + r[5 * j + 4] = (uint8_t)((t[3] >> 2)); + } + r += 320; + } +} + +/************************************************* +* Name: polyvec_decompress +* +* Description: De-serialize and decompress vector of polynomials; +* approximate inverse of polyvec_compress +* +* Arguments: - polyvec *r: pointer to output vector of polynomials +* - uint8_t *a: pointer to input byte array (of length KYBER_POLYVECCOMPRESSEDBYTES) +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_polyvec_decompress(polyvec *r, const uint8_t *a) { + for (size_t i = 0; i < KYBER_K; i++) { + for (size_t j = 0; j < KYBER_N / 4; j++) { + r->vec[i].coeffs[4 * j + 0] = (int16_t)( (((a[5 * j + 0] | (((uint32_t)a[5 * j + 1] & 0x03) << 8)) * KYBER_Q) + 512) >> 10); + r->vec[i].coeffs[4 * j + 1] = (int16_t)(((((a[5 * j + 1] >> 2) | (((uint32_t)a[5 * j + 2] & 0x0f) << 6)) * KYBER_Q) + 512) >> 10); + r->vec[i].coeffs[4 * j + 2] = (int16_t)(((((a[5 * j + 2] >> 4) | (((uint32_t)a[5 * j + 3] & 0x3f) << 4)) * KYBER_Q) + 512) >> 10); + r->vec[i].coeffs[4 * j + 3] = (int16_t)(((((a[5 * j + 3] >> 6) | (((uint32_t)a[5 * j + 4] & 0xff) << 2)) * KYBER_Q) + 512) >> 10); + } + a += 320; + } +} + +/************************************************* +* Name: polyvec_tobytes +* +* Description: Serialize vector of polynomials +* +* Arguments: - uint8_t *r: pointer to output byte array (needs space for KYBER_POLYVECBYTES) +* - const polyvec *a: pointer to input vector of polynomials +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_polyvec_tobytes(uint8_t *r, polyvec *a) { + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER51290S_CLEAN_poly_tobytes(r + i * KYBER_POLYBYTES, &a->vec[i]); + } +} + +/************************************************* +* Name: polyvec_frombytes +* +* Description: De-serialize vector of polynomials; +* inverse of polyvec_tobytes +* +* Arguments: - uint8_t *r: pointer to output byte array +* - const polyvec *a: pointer to input vector of polynomials (of length KYBER_POLYVECBYTES) +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_polyvec_frombytes(polyvec *r, const uint8_t *a) { + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER51290S_CLEAN_poly_frombytes(&r->vec[i], a + i * KYBER_POLYBYTES); + } +} + +/************************************************* +* Name: polyvec_ntt +* +* Description: Apply forward NTT to all elements of a vector of polynomials +* +* Arguments: - polyvec *r: pointer to in/output vector of polynomials +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_polyvec_ntt(polyvec *r) { + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER51290S_CLEAN_poly_ntt(&r->vec[i]); + } +} + +/************************************************* +* Name: polyvec_invntt +* +* Description: Apply inverse NTT to all elements of a vector of polynomials +* +* Arguments: - polyvec *r: pointer to in/output vector of polynomials +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_polyvec_invntt(polyvec *r) { + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER51290S_CLEAN_poly_invntt(&r->vec[i]); + } +} + +/************************************************* +* Name: polyvec_pointwise_acc +* +* Description: Pointwise multiply elements of a and b and accumulate into r +* +* Arguments: - poly *r: pointer to output polynomial +* - const polyvec *a: pointer to first input vector of polynomials +* - const polyvec *b: pointer to second input vector of polynomials +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_polyvec_pointwise_acc(poly *r, const polyvec *a, const polyvec *b) { + poly t; + + PQCLEAN_KYBER51290S_CLEAN_poly_basemul(r, &a->vec[0], &b->vec[0]); + for (size_t i = 1; i < KYBER_K; i++) { + PQCLEAN_KYBER51290S_CLEAN_poly_basemul(&t, &a->vec[i], &b->vec[i]); + PQCLEAN_KYBER51290S_CLEAN_poly_add(r, r, &t); + } + + PQCLEAN_KYBER51290S_CLEAN_poly_reduce(r); +} + +/************************************************* +* Name: polyvec_reduce +* +* Description: Applies Barrett reduction to each coefficient +* of each element of a vector of polynomials +* for details of the Barrett reduction see comments in reduce.c +* +* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_polyvec_reduce(polyvec *r) { + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER51290S_CLEAN_poly_reduce(&r->vec[i]); + } +} + +/************************************************* +* Name: polyvec_csubq +* +* Description: Applies conditional subtraction of q to each coefficient +* of each element of a vector of polynomials +* for details of conditional subtraction of q see comments in reduce.c +* +* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_polyvec_csubq(polyvec *r) { + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER51290S_CLEAN_poly_csubq(&r->vec[i]); + } +} + +/************************************************* +* Name: polyvec_add +* +* Description: Add vectors of polynomials +* +* Arguments: - polyvec *r: pointer to output vector of polynomials +* - const polyvec *a: pointer to first input vector of polynomials +* - const polyvec *b: pointer to second input vector of polynomials +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) { + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER51290S_CLEAN_poly_add(&r->vec[i], &a->vec[i], &b->vec[i]); + } +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/polyvec.h b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/polyvec.h index abf3fb9bfc..bb4f45f594 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/polyvec.h +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/polyvec.h @@ -1,29 +1,29 @@ -#ifndef POLYVEC_H -#define POLYVEC_H - -#include "params.h" -#include "poly.h" - -#include <stdint.h> - -typedef struct { - poly vec[KYBER_K]; -} polyvec; - -void PQCLEAN_KYBER51290S_CLEAN_polyvec_compress(uint8_t *r, polyvec *a); -void PQCLEAN_KYBER51290S_CLEAN_polyvec_decompress(polyvec *r, const uint8_t *a); - -void PQCLEAN_KYBER51290S_CLEAN_polyvec_tobytes(uint8_t *r, polyvec *a); -void PQCLEAN_KYBER51290S_CLEAN_polyvec_frombytes(polyvec *r, const uint8_t *a); - -void PQCLEAN_KYBER51290S_CLEAN_polyvec_ntt(polyvec *r); -void PQCLEAN_KYBER51290S_CLEAN_polyvec_invntt(polyvec *r); - -void PQCLEAN_KYBER51290S_CLEAN_polyvec_pointwise_acc(poly *r, const polyvec *a, const polyvec *b); - -void PQCLEAN_KYBER51290S_CLEAN_polyvec_reduce(polyvec *r); -void PQCLEAN_KYBER51290S_CLEAN_polyvec_csubq(polyvec *r); - -void PQCLEAN_KYBER51290S_CLEAN_polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); - -#endif +#ifndef POLYVEC_H +#define POLYVEC_H + +#include "params.h" +#include "poly.h" + +#include <stdint.h> + +typedef struct { + poly vec[KYBER_K]; +} polyvec; + +void PQCLEAN_KYBER51290S_CLEAN_polyvec_compress(uint8_t *r, polyvec *a); +void PQCLEAN_KYBER51290S_CLEAN_polyvec_decompress(polyvec *r, const uint8_t *a); + +void PQCLEAN_KYBER51290S_CLEAN_polyvec_tobytes(uint8_t *r, polyvec *a); +void PQCLEAN_KYBER51290S_CLEAN_polyvec_frombytes(polyvec *r, const uint8_t *a); + +void PQCLEAN_KYBER51290S_CLEAN_polyvec_ntt(polyvec *r); +void PQCLEAN_KYBER51290S_CLEAN_polyvec_invntt(polyvec *r); + +void PQCLEAN_KYBER51290S_CLEAN_polyvec_pointwise_acc(poly *r, const polyvec *a, const polyvec *b); + +void PQCLEAN_KYBER51290S_CLEAN_polyvec_reduce(polyvec *r); +void PQCLEAN_KYBER51290S_CLEAN_polyvec_csubq(polyvec *r); + +void PQCLEAN_KYBER51290S_CLEAN_polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/reduce.c b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/reduce.c index 2447fef117..378a091bea 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/reduce.c +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/reduce.c @@ -1,61 +1,61 @@ -#include "reduce.h" - -#include "params.h" - -#include <stdint.h> -/************************************************* -* Name: montgomery_reduce -* -* Description: Montgomery reduction; given a 32-bit integer a, computes -* 16-bit integer congruent to a * R^-1 mod q, -* where R=2^16 -* -* Arguments: - int32_t a: input integer to be reduced; has to be in {-q2^15,...,q2^15-1} -* -* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q. -**************************************************/ -int16_t PQCLEAN_KYBER51290S_CLEAN_montgomery_reduce(int32_t a) { - int32_t t; - int16_t u; - - u = (int16_t)(a * (int64_t)QINV); - t = (int32_t)u * KYBER_Q; - t = a - t; - t >>= 16; - return (int16_t)t; -} - -/************************************************* -* Name: barrett_reduce -* -* Description: Barrett reduction; given a 16-bit integer a, computes -* 16-bit integer congruent to a mod q in {0,...,q} -* -* Arguments: - int16_t a: input integer to be reduced -* -* Returns: integer in {0,...,q} congruent to a modulo q. -**************************************************/ -int16_t PQCLEAN_KYBER51290S_CLEAN_barrett_reduce(int16_t a) { - int32_t t; - const int32_t v = (1U << 26) / KYBER_Q + 1; - - t = v * a; - t >>= 26; - t *= KYBER_Q; - return a - (int16_t)t; -} - -/************************************************* -* Name: csubq -* -* Description: Conditionallly subtract q -* -* Arguments: - int16_t a: input integer -* -* Returns: a - q if a >= q, else a -**************************************************/ -int16_t PQCLEAN_KYBER51290S_CLEAN_csubq(int16_t a) { - a -= KYBER_Q; - a += (a >> 15) & KYBER_Q; - return a; -} +#include "reduce.h" + +#include "params.h" + +#include <stdint.h> +/************************************************* +* Name: montgomery_reduce +* +* Description: Montgomery reduction; given a 32-bit integer a, computes +* 16-bit integer congruent to a * R^-1 mod q, +* where R=2^16 +* +* Arguments: - int32_t a: input integer to be reduced; has to be in {-q2^15,...,q2^15-1} +* +* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q. +**************************************************/ +int16_t PQCLEAN_KYBER51290S_CLEAN_montgomery_reduce(int32_t a) { + int32_t t; + int16_t u; + + u = (int16_t)(a * (int64_t)QINV); + t = (int32_t)u * KYBER_Q; + t = a - t; + t >>= 16; + return (int16_t)t; +} + +/************************************************* +* Name: barrett_reduce +* +* Description: Barrett reduction; given a 16-bit integer a, computes +* 16-bit integer congruent to a mod q in {0,...,q} +* +* Arguments: - int16_t a: input integer to be reduced +* +* Returns: integer in {0,...,q} congruent to a modulo q. +**************************************************/ +int16_t PQCLEAN_KYBER51290S_CLEAN_barrett_reduce(int16_t a) { + int32_t t; + const int32_t v = (1U << 26) / KYBER_Q + 1; + + t = v * a; + t >>= 26; + t *= KYBER_Q; + return a - (int16_t)t; +} + +/************************************************* +* Name: csubq +* +* Description: Conditionallly subtract q +* +* Arguments: - int16_t a: input integer +* +* Returns: a - q if a >= q, else a +**************************************************/ +int16_t PQCLEAN_KYBER51290S_CLEAN_csubq(int16_t a) { + a -= KYBER_Q; + a += (a >> 15) & KYBER_Q; + return a; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/reduce.h b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/reduce.h index f9a9b76213..4f6f892811 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/reduce.h +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/reduce.h @@ -1,15 +1,15 @@ -#ifndef REDUCE_H -#define REDUCE_H - -#include <stdint.h> - -#define MONT 2285 // 2^16 % Q -#define QINV 62209 // q^(-1) mod 2^16 - -int16_t PQCLEAN_KYBER51290S_CLEAN_montgomery_reduce(int32_t a); - -int16_t PQCLEAN_KYBER51290S_CLEAN_barrett_reduce(int16_t a); - -int16_t PQCLEAN_KYBER51290S_CLEAN_csubq(int16_t a); - -#endif +#ifndef REDUCE_H +#define REDUCE_H + +#include <stdint.h> + +#define MONT 2285 // 2^16 % Q +#define QINV 62209 // q^(-1) mod 2^16 + +int16_t PQCLEAN_KYBER51290S_CLEAN_montgomery_reduce(int32_t a); + +int16_t PQCLEAN_KYBER51290S_CLEAN_barrett_reduce(int16_t a); + +int16_t PQCLEAN_KYBER51290S_CLEAN_csubq(int16_t a); + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/sha2.h b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/sha2.h index 05d16074d4..5ddbf2d735 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/sha2.h +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/sha2.h @@ -1,254 +1,254 @@ -// SPDX-License-Identifier: MIT - -#ifndef SHA2_H -#define SHA2_H - -#include <stddef.h> -#include <stdint.h> - -#define PQC_SHA256CTX_BYTES 40 -#define PQC_SHA512CTX_BYTES 72 - -typedef struct { - uint8_t *ctx; -} sha256ctx; -#define sha256_inc_init oqs_sha2_sha256_inc_init -#define sha256_inc_ctx_clone oqs_sha2_sha256_inc_ctx_clone -#define sha256_inc_ctx_release oqs_sha2_sha256_inc_ctx_release -#define sha256_inc_blocks oqs_sha2_sha256_inc_blocks -#define sha256_inc_finalize oqs_sha2_sha256_inc_finalize -#define sha256 OQS_SHA2_sha256 - -typedef struct { - uint8_t *ctx; -} sha512ctx; -#define sha512_inc_init oqs_sha2_sha512_inc_init -#define sha512_inc_ctx_clone oqs_sha2_sha512_inc_ctx_clone -#define sha512_inc_ctx_release oqs_sha2_sha512_inc_ctx_release -#define sha512_inc_blocks oqs_sha2_sha512_inc_blocks -#define sha512_inc_finalize oqs_sha2_sha512_inc_finalize -#define sha512 OQS_SHA2_sha512 - -/** - * \brief Process a message with SHA-256 and return the hash code in the output byte array. - * - * \warning The output array must be at least 32 bytes in length. - * - * \param output The output byte array - * \param input The message input byte array - * \param inplen The number of message bytes to process - */ -void OQS_SHA2_sha256(uint8_t *output, const uint8_t *input, size_t inplen); - -/** Data structure for the state of the SHA-256 incremental hashing API. */ -typedef struct { - /** Internal state */ - void *ctx; -} OQS_SHA2_sha256_ctx; - -/** - * \brief Allocate and initialize the state for the SHA-256 incremental hashing API. - * - * \warning The state must be released by OQS_SHA2_sha256_inc_finalize - * or OQS_SHA2_sha256_inc_ctx_release. - * - * \param state Pointer to the state - */ -void OQS_SHA2_sha256_inc_init(OQS_SHA2_sha256_ctx *state); - -/** - * \brief Duplicate state for the SHA-256 incremental hashing API. - * - * \warning dest must be allocated by the caller. Caller is responsible - * for releasing dest by calling either OQS_SHA3_sha3_256_inc_finalize or - * OQS_SHA3_sha3_256_inc_ctx_release. - * - * \param dest The function state to copy into; must be initialized - * \param src The function state to copy; must be initialized - */ -void OQS_SHA2_sha256_inc_ctx_clone(OQS_SHA2_sha256_ctx *dest, const OQS_SHA2_sha256_ctx *src); - -/** - * \brief Process blocks with SHA-256 and update the state. - * - * \warning The state must be initialized by OQS_SHA2_sha256_inc_init or OQS_SHA2_sha256_inc_ctx_clone. - * - * \param state The state to update - * \param in Message input byte array - * \param inblocks The number of blocks of message bytes to process - */ -void OQS_SHA2_sha256_inc_blocks(OQS_SHA2_sha256_ctx *state, const uint8_t *in, size_t inblocks); - -/** - * \brief Process more message bytes with SHA-256 and return the hash code in the output byte array. - * - * \warning The output array must be at least 32 bytes in length. The state is - * deallocated by this function and can not be used again after calling - * this function without calling OQS_SHA2_sha256_inc_init again. - * - * \param out The output byte array - * \param state The state - * \param in Additional message input byte array - * \param inlen The number of additional message bytes to process - */ -void OQS_SHA2_sha256_inc_finalize(uint8_t *out, OQS_SHA2_sha256_ctx *state, const uint8_t *in, size_t inlen); - -/** - * \brief Destroy state. - * - * \warning The state is deallocated by this function and can not be used again after calling - * this function without calling OQS_SHA2_sha256_inc_init again. - * - * \param state The state - */ -void OQS_SHA2_sha256_inc_ctx_release(OQS_SHA2_sha256_ctx *state); - -/** - * \brief Process a message with SHA-384 and return the hash code in the output byte array. - * - * \warning The output array must be at least 48 bytes in length. - * - * \param output The output byte array - * \param input The message input byte array - * \param inplen The number of message bytes to process - */ -void OQS_SHA2_sha384(uint8_t *output, const uint8_t *input, size_t inplen); - -/** Data structure for the state of the SHA-384 incremental hashing API. */ -typedef struct { - /** Internal state. */ - void *ctx; -} OQS_SHA2_sha384_ctx; - -/** - * \brief Allocate and initialize the state for the SHA-384 incremental hashing API. - * - * \warning The state must be released by OQS_SHA2_sha384_inc_finalize - * or OQS_SHA2_sha384_inc_ctx_release. - * - * \param state Pointer to the state - */ -void OQS_SHA2_sha384_inc_init(OQS_SHA2_sha384_ctx *state); - -/** - * \brief Duplicate state for the SHA-384 incremental hashing API. - * - * \warning dest must be allocated by the caller. Caller is responsible - * for releasing dest by calling either OQS_SHA3_sha3_384_inc_finalize or - * OQS_SHA3_sha3_384_inc_ctx_release. - * - * \param dest The function state to copy into; must be initialized - * \param src The function state to copy; must be initialized - */ -void OQS_SHA2_sha384_inc_ctx_clone(OQS_SHA2_sha384_ctx *dest, const OQS_SHA2_sha384_ctx *src); - -/** - * \brief Process blocks with SHA-384 and update the state. - * - * \warning The state must be initialized by OQS_SHA2_sha384_inc_init or OQS_SHA2_sha384_inc_ctx_clone. - * - * \param state The state to update - * \param in Message input byte array - * \param inblocks The number of blocks of message bytes to process - */ -void OQS_SHA2_sha384_inc_blocks(OQS_SHA2_sha384_ctx *state, const uint8_t *in, size_t inblocks); - -/** - * \brief Process more message bytes with SHA-384 and return the hash code in the output byte array. - * - * \warning The output array must be at least 48 bytes in length. The state is - * deallocated by this function and can not be used again after calling - * this function without calling OQS_SHA2_sha384_inc_init again. - * - * \param out The output byte array - * \param state The state - * \param in Additional message input byte array - * \param inlen The number of additional message bytes to process - */ -void OQS_SHA2_sha384_inc_finalize(uint8_t *out, OQS_SHA2_sha384_ctx *state, const uint8_t *in, size_t inlen); - -/** - * \brief Destroy state. - * - * \warning The state is deallocated by this function and can not be used again after calling - * this function without calling OQS_SHA2_sha384_inc_init again. - * - * \param state The state - */ -void OQS_SHA2_sha384_inc_ctx_release(OQS_SHA2_sha384_ctx *state); - -/** - * \brief Process a message with SHA-512 and return the hash code in the output byte array. - * - * \warning The output array must be at least 64 bytes in length. - * - * \param output The output byte array - * \param input The message input byte array - * \param inplen The number of message bytes to process - */ -void OQS_SHA2_sha512(uint8_t *output, const uint8_t *input, size_t inplen); - -/** Data structure for the state of the SHA-512 incremental hashing API. */ -typedef struct { - /** Internal state. */ - void *ctx; -} OQS_SHA2_sha512_ctx; - -/** - * \brief Allocate and initialize the state for the SHA-512 incremental hashing API. - * - * \warning The state must be released by OQS_SHA2_sha512_inc_finalize - * or OQS_SHA2_sha512_inc_ctx_release. - * - * \param state Pointer to the state - */ -void OQS_SHA2_sha512_inc_init(OQS_SHA2_sha512_ctx *state); - -/** - * \brief Duplicate state for the SHA-512 incremental hashing API. - * - * \warning dest must be allocated by the caller. Caller is responsible - * for releasing dest by calling either OQS_SHA3_sha3_512_inc_finalize or - * OQS_SHA3_sha3_512_inc_ctx_release. - * - * \param dest The function state to copy into; must be initialized - * \param src The function state to copy; must be initialized - */ -void OQS_SHA2_sha512_inc_ctx_clone(OQS_SHA2_sha512_ctx *dest, const OQS_SHA2_sha512_ctx *src); - -/** - * \brief Process blocks with SHA-512 and update the state. - * - * \warning The state must be initialized by OQS_SHA2_sha512_inc_init or OQS_SHA2_sha512_inc_ctx_clone. - * - * \param state The state to update - * \param in Message input byte array - * \param inblocks The number of blocks of message bytes to process - */ -void OQS_SHA2_sha512_inc_blocks(OQS_SHA2_sha512_ctx *state, const uint8_t *in, size_t inblocks); - -/** - * \brief Process more message bytes with SHA-512 and return the hash code in the output byte array. - * - * \warning The output array must be at least 64 bytes in length. The state is - * deallocated by this function and can not be used again after calling - * this function without calling OQS_SHA2_sha512_inc_init again. - * - * \param out The output byte array - * \param state The state - * \param in Additional message input byte array - * \param inlen The number of additional message bytes to process - */ -void OQS_SHA2_sha512_inc_finalize(uint8_t *out, OQS_SHA2_sha512_ctx *state, const uint8_t *in, size_t inlen); - -/** - * \brief Destroy state. - * - * \warning The state is deallocated by this function and can not be used again after calling - * this function without calling OQS_SHA2_sha512_inc_init again. - * - * \param state The state - */ -void OQS_SHA2_sha512_inc_ctx_release(OQS_SHA2_sha512_ctx *state); - -#endif +// SPDX-License-Identifier: MIT + +#ifndef SHA2_H +#define SHA2_H + +#include <stddef.h> +#include <stdint.h> + +#define PQC_SHA256CTX_BYTES 40 +#define PQC_SHA512CTX_BYTES 72 + +typedef struct { + uint8_t *ctx; +} sha256ctx; +#define sha256_inc_init oqs_sha2_sha256_inc_init +#define sha256_inc_ctx_clone oqs_sha2_sha256_inc_ctx_clone +#define sha256_inc_ctx_release oqs_sha2_sha256_inc_ctx_release +#define sha256_inc_blocks oqs_sha2_sha256_inc_blocks +#define sha256_inc_finalize oqs_sha2_sha256_inc_finalize +#define sha256 OQS_SHA2_sha256 + +typedef struct { + uint8_t *ctx; +} sha512ctx; +#define sha512_inc_init oqs_sha2_sha512_inc_init +#define sha512_inc_ctx_clone oqs_sha2_sha512_inc_ctx_clone +#define sha512_inc_ctx_release oqs_sha2_sha512_inc_ctx_release +#define sha512_inc_blocks oqs_sha2_sha512_inc_blocks +#define sha512_inc_finalize oqs_sha2_sha512_inc_finalize +#define sha512 OQS_SHA2_sha512 + +/** + * \brief Process a message with SHA-256 and return the hash code in the output byte array. + * + * \warning The output array must be at least 32 bytes in length. + * + * \param output The output byte array + * \param input The message input byte array + * \param inplen The number of message bytes to process + */ +void OQS_SHA2_sha256(uint8_t *output, const uint8_t *input, size_t inplen); + +/** Data structure for the state of the SHA-256 incremental hashing API. */ +typedef struct { + /** Internal state */ + void *ctx; +} OQS_SHA2_sha256_ctx; + +/** + * \brief Allocate and initialize the state for the SHA-256 incremental hashing API. + * + * \warning The state must be released by OQS_SHA2_sha256_inc_finalize + * or OQS_SHA2_sha256_inc_ctx_release. + * + * \param state Pointer to the state + */ +void OQS_SHA2_sha256_inc_init(OQS_SHA2_sha256_ctx *state); + +/** + * \brief Duplicate state for the SHA-256 incremental hashing API. + * + * \warning dest must be allocated by the caller. Caller is responsible + * for releasing dest by calling either OQS_SHA3_sha3_256_inc_finalize or + * OQS_SHA3_sha3_256_inc_ctx_release. + * + * \param dest The function state to copy into; must be initialized + * \param src The function state to copy; must be initialized + */ +void OQS_SHA2_sha256_inc_ctx_clone(OQS_SHA2_sha256_ctx *dest, const OQS_SHA2_sha256_ctx *src); + +/** + * \brief Process blocks with SHA-256 and update the state. + * + * \warning The state must be initialized by OQS_SHA2_sha256_inc_init or OQS_SHA2_sha256_inc_ctx_clone. + * + * \param state The state to update + * \param in Message input byte array + * \param inblocks The number of blocks of message bytes to process + */ +void OQS_SHA2_sha256_inc_blocks(OQS_SHA2_sha256_ctx *state, const uint8_t *in, size_t inblocks); + +/** + * \brief Process more message bytes with SHA-256 and return the hash code in the output byte array. + * + * \warning The output array must be at least 32 bytes in length. The state is + * deallocated by this function and can not be used again after calling + * this function without calling OQS_SHA2_sha256_inc_init again. + * + * \param out The output byte array + * \param state The state + * \param in Additional message input byte array + * \param inlen The number of additional message bytes to process + */ +void OQS_SHA2_sha256_inc_finalize(uint8_t *out, OQS_SHA2_sha256_ctx *state, const uint8_t *in, size_t inlen); + +/** + * \brief Destroy state. + * + * \warning The state is deallocated by this function and can not be used again after calling + * this function without calling OQS_SHA2_sha256_inc_init again. + * + * \param state The state + */ +void OQS_SHA2_sha256_inc_ctx_release(OQS_SHA2_sha256_ctx *state); + +/** + * \brief Process a message with SHA-384 and return the hash code in the output byte array. + * + * \warning The output array must be at least 48 bytes in length. + * + * \param output The output byte array + * \param input The message input byte array + * \param inplen The number of message bytes to process + */ +void OQS_SHA2_sha384(uint8_t *output, const uint8_t *input, size_t inplen); + +/** Data structure for the state of the SHA-384 incremental hashing API. */ +typedef struct { + /** Internal state. */ + void *ctx; +} OQS_SHA2_sha384_ctx; + +/** + * \brief Allocate and initialize the state for the SHA-384 incremental hashing API. + * + * \warning The state must be released by OQS_SHA2_sha384_inc_finalize + * or OQS_SHA2_sha384_inc_ctx_release. + * + * \param state Pointer to the state + */ +void OQS_SHA2_sha384_inc_init(OQS_SHA2_sha384_ctx *state); + +/** + * \brief Duplicate state for the SHA-384 incremental hashing API. + * + * \warning dest must be allocated by the caller. Caller is responsible + * for releasing dest by calling either OQS_SHA3_sha3_384_inc_finalize or + * OQS_SHA3_sha3_384_inc_ctx_release. + * + * \param dest The function state to copy into; must be initialized + * \param src The function state to copy; must be initialized + */ +void OQS_SHA2_sha384_inc_ctx_clone(OQS_SHA2_sha384_ctx *dest, const OQS_SHA2_sha384_ctx *src); + +/** + * \brief Process blocks with SHA-384 and update the state. + * + * \warning The state must be initialized by OQS_SHA2_sha384_inc_init or OQS_SHA2_sha384_inc_ctx_clone. + * + * \param state The state to update + * \param in Message input byte array + * \param inblocks The number of blocks of message bytes to process + */ +void OQS_SHA2_sha384_inc_blocks(OQS_SHA2_sha384_ctx *state, const uint8_t *in, size_t inblocks); + +/** + * \brief Process more message bytes with SHA-384 and return the hash code in the output byte array. + * + * \warning The output array must be at least 48 bytes in length. The state is + * deallocated by this function and can not be used again after calling + * this function without calling OQS_SHA2_sha384_inc_init again. + * + * \param out The output byte array + * \param state The state + * \param in Additional message input byte array + * \param inlen The number of additional message bytes to process + */ +void OQS_SHA2_sha384_inc_finalize(uint8_t *out, OQS_SHA2_sha384_ctx *state, const uint8_t *in, size_t inlen); + +/** + * \brief Destroy state. + * + * \warning The state is deallocated by this function and can not be used again after calling + * this function without calling OQS_SHA2_sha384_inc_init again. + * + * \param state The state + */ +void OQS_SHA2_sha384_inc_ctx_release(OQS_SHA2_sha384_ctx *state); + +/** + * \brief Process a message with SHA-512 and return the hash code in the output byte array. + * + * \warning The output array must be at least 64 bytes in length. + * + * \param output The output byte array + * \param input The message input byte array + * \param inplen The number of message bytes to process + */ +void OQS_SHA2_sha512(uint8_t *output, const uint8_t *input, size_t inplen); + +/** Data structure for the state of the SHA-512 incremental hashing API. */ +typedef struct { + /** Internal state. */ + void *ctx; +} OQS_SHA2_sha512_ctx; + +/** + * \brief Allocate and initialize the state for the SHA-512 incremental hashing API. + * + * \warning The state must be released by OQS_SHA2_sha512_inc_finalize + * or OQS_SHA2_sha512_inc_ctx_release. + * + * \param state Pointer to the state + */ +void OQS_SHA2_sha512_inc_init(OQS_SHA2_sha512_ctx *state); + +/** + * \brief Duplicate state for the SHA-512 incremental hashing API. + * + * \warning dest must be allocated by the caller. Caller is responsible + * for releasing dest by calling either OQS_SHA3_sha3_512_inc_finalize or + * OQS_SHA3_sha3_512_inc_ctx_release. + * + * \param dest The function state to copy into; must be initialized + * \param src The function state to copy; must be initialized + */ +void OQS_SHA2_sha512_inc_ctx_clone(OQS_SHA2_sha512_ctx *dest, const OQS_SHA2_sha512_ctx *src); + +/** + * \brief Process blocks with SHA-512 and update the state. + * + * \warning The state must be initialized by OQS_SHA2_sha512_inc_init or OQS_SHA2_sha512_inc_ctx_clone. + * + * \param state The state to update + * \param in Message input byte array + * \param inblocks The number of blocks of message bytes to process + */ +void OQS_SHA2_sha512_inc_blocks(OQS_SHA2_sha512_ctx *state, const uint8_t *in, size_t inblocks); + +/** + * \brief Process more message bytes with SHA-512 and return the hash code in the output byte array. + * + * \warning The output array must be at least 64 bytes in length. The state is + * deallocated by this function and can not be used again after calling + * this function without calling OQS_SHA2_sha512_inc_init again. + * + * \param out The output byte array + * \param state The state + * \param in Additional message input byte array + * \param inlen The number of additional message bytes to process + */ +void OQS_SHA2_sha512_inc_finalize(uint8_t *out, OQS_SHA2_sha512_ctx *state, const uint8_t *in, size_t inlen); + +/** + * \brief Destroy state. + * + * \warning The state is deallocated by this function and can not be used again after calling + * this function without calling OQS_SHA2_sha512_inc_init again. + * + * \param state The state + */ +void OQS_SHA2_sha512_inc_ctx_release(OQS_SHA2_sha512_ctx *state); + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/sha2_c.c b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/sha2_c.c index 557ebcb861..254633ca5d 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/sha2_c.c +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/sha2_c.c @@ -1,706 +1,706 @@ -// SPDX-License-Identifier: Public domain - -#include "sha2.h" -#include <stdio.h> - -/* Based on the public domain implementation in - * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html - * by D. J. Bernstein */ - -#include <stddef.h> -#include <stdint.h> -#include <stdlib.h> -#include <string.h> - -static uint32_t load_bigendian_32(const uint8_t *x) { - return (uint32_t)(x[3]) | (((uint32_t)(x[2])) << 8) | - (((uint32_t)(x[1])) << 16) | (((uint32_t)(x[0])) << 24); -} - -static uint64_t load_bigendian_64(const uint8_t *x) { - return (uint64_t)(x[7]) | (((uint64_t)(x[6])) << 8) | - (((uint64_t)(x[5])) << 16) | (((uint64_t)(x[4])) << 24) | - (((uint64_t)(x[3])) << 32) | (((uint64_t)(x[2])) << 40) | - (((uint64_t)(x[1])) << 48) | (((uint64_t)(x[0])) << 56); -} - -static void store_bigendian_32(uint8_t *x, uint64_t u) { - x[3] = (uint8_t) u; - u >>= 8; - x[2] = (uint8_t) u; - u >>= 8; - x[1] = (uint8_t) u; - u >>= 8; - x[0] = (uint8_t) u; -} - -static void store_bigendian_64(uint8_t *x, uint64_t u) { - x[7] = (uint8_t) u; - u >>= 8; - x[6] = (uint8_t) u; - u >>= 8; - x[5] = (uint8_t) u; - u >>= 8; - x[4] = (uint8_t) u; - u >>= 8; - x[3] = (uint8_t) u; - u >>= 8; - x[2] = (uint8_t) u; - u >>= 8; - x[1] = (uint8_t) u; - u >>= 8; - x[0] = (uint8_t) u; -} - -#define SHR(x, c) ((x) >> (c)) -#define ROTR_32(x, c) (((x) >> (c)) | ((x) << (32 - (c)))) -#define ROTR_64(x, c) (((x) >> (c)) | ((x) << (64 - (c)))) - -#define Ch(x, y, z) (((x) & (y)) ^ (~(x) & (z))) -#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) - -#define Sigma0_32(x) (ROTR_32(x, 2) ^ ROTR_32(x,13) ^ ROTR_32(x,22)) -#define Sigma1_32(x) (ROTR_32(x, 6) ^ ROTR_32(x,11) ^ ROTR_32(x,25)) -#define sigma0_32(x) (ROTR_32(x, 7) ^ ROTR_32(x,18) ^ SHR(x, 3)) -#define sigma1_32(x) (ROTR_32(x,17) ^ ROTR_32(x,19) ^ SHR(x,10)) - -#define Sigma0_64(x) (ROTR_64(x, 28) ^ ROTR_64(x, 34) ^ ROTR_64(x, 39)) -#define Sigma1_64(x) (ROTR_64(x, 14) ^ ROTR_64(x, 18) ^ ROTR_64(x, 41)) -#define sigma0_64(x) (ROTR_64(x, 1) ^ ROTR_64(x, 8) ^ SHR(x, 7)) -#define sigma1_64(x) (ROTR_64(x, 19) ^ ROTR_64(x, 61) ^ SHR(x, 6)) - -#define M_32(w0, w14, w9, w1) w0 = sigma1_32(w14) + (w9) + sigma0_32(w1) + (w0); -#define M_64(w0, w14, w9, w1) w0 = sigma1_64(w14) + (w9) + sigma0_64(w1) + (w0); - -#define EXPAND_32 \ - M_32(w0, w14, w9, w1) \ - M_32(w1, w15, w10, w2) \ - M_32(w2, w0, w11, w3) \ - M_32(w3, w1, w12, w4) \ - M_32(w4, w2, w13, w5) \ - M_32(w5, w3, w14, w6) \ - M_32(w6, w4, w15, w7) \ - M_32(w7, w5, w0, w8) \ - M_32(w8, w6, w1, w9) \ - M_32(w9, w7, w2, w10) \ - M_32(w10, w8, w3, w11) \ - M_32(w11, w9, w4, w12) \ - M_32(w12, w10, w5, w13) \ - M_32(w13, w11, w6, w14) \ - M_32(w14, w12, w7, w15) \ - M_32(w15, w13, w8, w0) - -#define EXPAND_64 \ - M_64(w0, w14, w9, w1) \ - M_64(w1, w15, w10, w2) \ - M_64(w2, w0, w11, w3) \ - M_64(w3, w1, w12, w4) \ - M_64(w4, w2, w13, w5) \ - M_64(w5, w3, w14, w6) \ - M_64(w6, w4, w15, w7) \ - M_64(w7, w5, w0, w8) \ - M_64(w8, w6, w1, w9) \ - M_64(w9, w7, w2, w10) \ - M_64(w10, w8, w3, w11) \ - M_64(w11, w9, w4, w12) \ - M_64(w12, w10, w5, w13) \ - M_64(w13, w11, w6, w14) \ - M_64(w14, w12, w7, w15) \ - M_64(w15, w13, w8, w0) - -#define F_32(w, k) \ - T1 = h + Sigma1_32(e) + Ch(e, f, g) + (k) + (w); \ - T2 = Sigma0_32(a) + Maj(a, b, c); \ - h = g; \ - g = f; \ - f = e; \ - e = d + T1; \ - d = c; \ - c = b; \ - b = a; \ - a = T1 + T2; - -#define F_64(w, k) \ - T1 = h + Sigma1_64(e) + Ch(e, f, g) + (k) + (w); \ - T2 = Sigma0_64(a) + Maj(a, b, c); \ - h = g; \ - g = f; \ - f = e; \ - e = d + T1; \ - d = c; \ - c = b; \ - b = a; \ - a = T1 + T2; - -static size_t crypto_hashblocks_sha256(uint8_t *statebytes, - const uint8_t *in, size_t inlen) { - uint32_t state[8]; - uint32_t a; - uint32_t b; - uint32_t c; - uint32_t d; - uint32_t e; - uint32_t f; - uint32_t g; - uint32_t h; - - a = load_bigendian_32(statebytes + 0); - state[0] = a; - b = load_bigendian_32(statebytes + 4); - state[1] = b; - c = load_bigendian_32(statebytes + 8); - state[2] = c; - d = load_bigendian_32(statebytes + 12); - state[3] = d; - e = load_bigendian_32(statebytes + 16); - state[4] = e; - f = load_bigendian_32(statebytes + 20); - state[5] = f; - g = load_bigendian_32(statebytes + 24); - state[6] = g; - h = load_bigendian_32(statebytes + 28); - state[7] = h; - - while (inlen >= 64) { - uint32_t w0 = load_bigendian_32(in + 0); - uint32_t w1 = load_bigendian_32(in + 4); - uint32_t w2 = load_bigendian_32(in + 8); - uint32_t w3 = load_bigendian_32(in + 12); - uint32_t w4 = load_bigendian_32(in + 16); - uint32_t w5 = load_bigendian_32(in + 20); - uint32_t w6 = load_bigendian_32(in + 24); - uint32_t w7 = load_bigendian_32(in + 28); - uint32_t w8 = load_bigendian_32(in + 32); - uint32_t w9 = load_bigendian_32(in + 36); - uint32_t w10 = load_bigendian_32(in + 40); - uint32_t w11 = load_bigendian_32(in + 44); - uint32_t w12 = load_bigendian_32(in + 48); - uint32_t w13 = load_bigendian_32(in + 52); - uint32_t w14 = load_bigendian_32(in + 56); - uint32_t w15 = load_bigendian_32(in + 60); - - uint32_t T1; - uint32_t T2; - - F_32(w0, 0x428a2f98) - F_32(w1, 0x71374491) - F_32(w2, 0xb5c0fbcf) - F_32(w3, 0xe9b5dba5) - F_32(w4, 0x3956c25b) - F_32(w5, 0x59f111f1) - F_32(w6, 0x923f82a4) - F_32(w7, 0xab1c5ed5) - F_32(w8, 0xd807aa98) - F_32(w9, 0x12835b01) - F_32(w10, 0x243185be) - F_32(w11, 0x550c7dc3) - F_32(w12, 0x72be5d74) - F_32(w13, 0x80deb1fe) - F_32(w14, 0x9bdc06a7) - F_32(w15, 0xc19bf174) - - EXPAND_32 - - F_32(w0, 0xe49b69c1) - F_32(w1, 0xefbe4786) - F_32(w2, 0x0fc19dc6) - F_32(w3, 0x240ca1cc) - F_32(w4, 0x2de92c6f) - F_32(w5, 0x4a7484aa) - F_32(w6, 0x5cb0a9dc) - F_32(w7, 0x76f988da) - F_32(w8, 0x983e5152) - F_32(w9, 0xa831c66d) - F_32(w10, 0xb00327c8) - F_32(w11, 0xbf597fc7) - F_32(w12, 0xc6e00bf3) - F_32(w13, 0xd5a79147) - F_32(w14, 0x06ca6351) - F_32(w15, 0x14292967) - - EXPAND_32 - - F_32(w0, 0x27b70a85) - F_32(w1, 0x2e1b2138) - F_32(w2, 0x4d2c6dfc) - F_32(w3, 0x53380d13) - F_32(w4, 0x650a7354) - F_32(w5, 0x766a0abb) - F_32(w6, 0x81c2c92e) - F_32(w7, 0x92722c85) - F_32(w8, 0xa2bfe8a1) - F_32(w9, 0xa81a664b) - F_32(w10, 0xc24b8b70) - F_32(w11, 0xc76c51a3) - F_32(w12, 0xd192e819) - F_32(w13, 0xd6990624) - F_32(w14, 0xf40e3585) - F_32(w15, 0x106aa070) - - EXPAND_32 - - F_32(w0, 0x19a4c116) - F_32(w1, 0x1e376c08) - F_32(w2, 0x2748774c) - F_32(w3, 0x34b0bcb5) - F_32(w4, 0x391c0cb3) - F_32(w5, 0x4ed8aa4a) - F_32(w6, 0x5b9cca4f) - F_32(w7, 0x682e6ff3) - F_32(w8, 0x748f82ee) - F_32(w9, 0x78a5636f) - F_32(w10, 0x84c87814) - F_32(w11, 0x8cc70208) - F_32(w12, 0x90befffa) - F_32(w13, 0xa4506ceb) - F_32(w14, 0xbef9a3f7) - F_32(w15, 0xc67178f2) - - a += state[0]; - b += state[1]; - c += state[2]; - d += state[3]; - e += state[4]; - f += state[5]; - g += state[6]; - h += state[7]; - - state[0] = a; - state[1] = b; - state[2] = c; - state[3] = d; - state[4] = e; - state[5] = f; - state[6] = g; - state[7] = h; - - in += 64; - inlen -= 64; - } - - store_bigendian_32(statebytes + 0, state[0]); - store_bigendian_32(statebytes + 4, state[1]); - store_bigendian_32(statebytes + 8, state[2]); - store_bigendian_32(statebytes + 12, state[3]); - store_bigendian_32(statebytes + 16, state[4]); - store_bigendian_32(statebytes + 20, state[5]); - store_bigendian_32(statebytes + 24, state[6]); - store_bigendian_32(statebytes + 28, state[7]); - - return inlen; -} - -static size_t crypto_hashblocks_sha512(uint8_t *statebytes, - const uint8_t *in, size_t inlen) { - uint64_t state[8]; - uint64_t a; - uint64_t b; - uint64_t c; - uint64_t d; - uint64_t e; - uint64_t f; - uint64_t g; - uint64_t h; - - a = load_bigendian_64(statebytes + 0); - state[0] = a; - b = load_bigendian_64(statebytes + 8); - state[1] = b; - c = load_bigendian_64(statebytes + 16); - state[2] = c; - d = load_bigendian_64(statebytes + 24); - state[3] = d; - e = load_bigendian_64(statebytes + 32); - state[4] = e; - f = load_bigendian_64(statebytes + 40); - state[5] = f; - g = load_bigendian_64(statebytes + 48); - state[6] = g; - h = load_bigendian_64(statebytes + 56); - state[7] = h; - - while (inlen >= 128) { - uint64_t w0 = load_bigendian_64(in + 0); - uint64_t w1 = load_bigendian_64(in + 8); - uint64_t w2 = load_bigendian_64(in + 16); - uint64_t w3 = load_bigendian_64(in + 24); - uint64_t w4 = load_bigendian_64(in + 32); - uint64_t w5 = load_bigendian_64(in + 40); - uint64_t w6 = load_bigendian_64(in + 48); - uint64_t w7 = load_bigendian_64(in + 56); - uint64_t w8 = load_bigendian_64(in + 64); - uint64_t w9 = load_bigendian_64(in + 72); - uint64_t w10 = load_bigendian_64(in + 80); - uint64_t w11 = load_bigendian_64(in + 88); - uint64_t w12 = load_bigendian_64(in + 96); - uint64_t w13 = load_bigendian_64(in + 104); - uint64_t w14 = load_bigendian_64(in + 112); - uint64_t w15 = load_bigendian_64(in + 120); - - uint64_t T1; - uint64_t T2; - - F_64(w0, 0x428a2f98d728ae22ULL) - F_64(w1, 0x7137449123ef65cdULL) - F_64(w2, 0xb5c0fbcfec4d3b2fULL) - F_64(w3, 0xe9b5dba58189dbbcULL) - F_64(w4, 0x3956c25bf348b538ULL) - F_64(w5, 0x59f111f1b605d019ULL) - F_64(w6, 0x923f82a4af194f9bULL) - F_64(w7, 0xab1c5ed5da6d8118ULL) - F_64(w8, 0xd807aa98a3030242ULL) - F_64(w9, 0x12835b0145706fbeULL) - F_64(w10, 0x243185be4ee4b28cULL) - F_64(w11, 0x550c7dc3d5ffb4e2ULL) - F_64(w12, 0x72be5d74f27b896fULL) - F_64(w13, 0x80deb1fe3b1696b1ULL) - F_64(w14, 0x9bdc06a725c71235ULL) - F_64(w15, 0xc19bf174cf692694ULL) - - EXPAND_64 - - F_64(w0, 0xe49b69c19ef14ad2ULL) - F_64(w1, 0xefbe4786384f25e3ULL) - F_64(w2, 0x0fc19dc68b8cd5b5ULL) - F_64(w3, 0x240ca1cc77ac9c65ULL) - F_64(w4, 0x2de92c6f592b0275ULL) - F_64(w5, 0x4a7484aa6ea6e483ULL) - F_64(w6, 0x5cb0a9dcbd41fbd4ULL) - F_64(w7, 0x76f988da831153b5ULL) - F_64(w8, 0x983e5152ee66dfabULL) - F_64(w9, 0xa831c66d2db43210ULL) - F_64(w10, 0xb00327c898fb213fULL) - F_64(w11, 0xbf597fc7beef0ee4ULL) - F_64(w12, 0xc6e00bf33da88fc2ULL) - F_64(w13, 0xd5a79147930aa725ULL) - F_64(w14, 0x06ca6351e003826fULL) - F_64(w15, 0x142929670a0e6e70ULL) - - EXPAND_64 - - F_64(w0, 0x27b70a8546d22ffcULL) - F_64(w1, 0x2e1b21385c26c926ULL) - F_64(w2, 0x4d2c6dfc5ac42aedULL) - F_64(w3, 0x53380d139d95b3dfULL) - F_64(w4, 0x650a73548baf63deULL) - F_64(w5, 0x766a0abb3c77b2a8ULL) - F_64(w6, 0x81c2c92e47edaee6ULL) - F_64(w7, 0x92722c851482353bULL) - F_64(w8, 0xa2bfe8a14cf10364ULL) - F_64(w9, 0xa81a664bbc423001ULL) - F_64(w10, 0xc24b8b70d0f89791ULL) - F_64(w11, 0xc76c51a30654be30ULL) - F_64(w12, 0xd192e819d6ef5218ULL) - F_64(w13, 0xd69906245565a910ULL) - F_64(w14, 0xf40e35855771202aULL) - F_64(w15, 0x106aa07032bbd1b8ULL) - - EXPAND_64 - - F_64(w0, 0x19a4c116b8d2d0c8ULL) - F_64(w1, 0x1e376c085141ab53ULL) - F_64(w2, 0x2748774cdf8eeb99ULL) - F_64(w3, 0x34b0bcb5e19b48a8ULL) - F_64(w4, 0x391c0cb3c5c95a63ULL) - F_64(w5, 0x4ed8aa4ae3418acbULL) - F_64(w6, 0x5b9cca4f7763e373ULL) - F_64(w7, 0x682e6ff3d6b2b8a3ULL) - F_64(w8, 0x748f82ee5defb2fcULL) - F_64(w9, 0x78a5636f43172f60ULL) - F_64(w10, 0x84c87814a1f0ab72ULL) - F_64(w11, 0x8cc702081a6439ecULL) - F_64(w12, 0x90befffa23631e28ULL) - F_64(w13, 0xa4506cebde82bde9ULL) - F_64(w14, 0xbef9a3f7b2c67915ULL) - F_64(w15, 0xc67178f2e372532bULL) - - EXPAND_64 - - F_64(w0, 0xca273eceea26619cULL) - F_64(w1, 0xd186b8c721c0c207ULL) - F_64(w2, 0xeada7dd6cde0eb1eULL) - F_64(w3, 0xf57d4f7fee6ed178ULL) - F_64(w4, 0x06f067aa72176fbaULL) - F_64(w5, 0x0a637dc5a2c898a6ULL) - F_64(w6, 0x113f9804bef90daeULL) - F_64(w7, 0x1b710b35131c471bULL) - F_64(w8, 0x28db77f523047d84ULL) - F_64(w9, 0x32caab7b40c72493ULL) - F_64(w10, 0x3c9ebe0a15c9bebcULL) - F_64(w11, 0x431d67c49c100d4cULL) - F_64(w12, 0x4cc5d4becb3e42b6ULL) - F_64(w13, 0x597f299cfc657e2aULL) - F_64(w14, 0x5fcb6fab3ad6faecULL) - F_64(w15, 0x6c44198c4a475817ULL) - - a += state[0]; - b += state[1]; - c += state[2]; - d += state[3]; - e += state[4]; - f += state[5]; - g += state[6]; - h += state[7]; - - state[0] = a; - state[1] = b; - state[2] = c; - state[3] = d; - state[4] = e; - state[5] = f; - state[6] = g; - state[7] = h; - - in += 128; - inlen -= 128; - } - - store_bigendian_64(statebytes + 0, state[0]); - store_bigendian_64(statebytes + 8, state[1]); - store_bigendian_64(statebytes + 16, state[2]); - store_bigendian_64(statebytes + 24, state[3]); - store_bigendian_64(statebytes + 32, state[4]); - store_bigendian_64(statebytes + 40, state[5]); - store_bigendian_64(statebytes + 48, state[6]); - store_bigendian_64(statebytes + 56, state[7]); - - return inlen; -} - -static const uint8_t iv_256[32] = { - 0x6a, 0x09, 0xe6, 0x67, 0xbb, 0x67, 0xae, 0x85, - 0x3c, 0x6e, 0xf3, 0x72, 0xa5, 0x4f, 0xf5, 0x3a, - 0x51, 0x0e, 0x52, 0x7f, 0x9b, 0x05, 0x68, 0x8c, - 0x1f, 0x83, 0xd9, 0xab, 0x5b, 0xe0, 0xcd, 0x19 -}; - -static const uint8_t iv_512[64] = { - 0x6a, 0x09, 0xe6, 0x67, 0xf3, 0xbc, 0xc9, 0x08, 0xbb, 0x67, 0xae, - 0x85, 0x84, 0xca, 0xa7, 0x3b, 0x3c, 0x6e, 0xf3, 0x72, 0xfe, 0x94, - 0xf8, 0x2b, 0xa5, 0x4f, 0xf5, 0x3a, 0x5f, 0x1d, 0x36, 0xf1, 0x51, - 0x0e, 0x52, 0x7f, 0xad, 0xe6, 0x82, 0xd1, 0x9b, 0x05, 0x68, 0x8c, - 0x2b, 0x3e, 0x6c, 0x1f, 0x1f, 0x83, 0xd9, 0xab, 0xfb, 0x41, 0xbd, - 0x6b, 0x5b, 0xe0, 0xcd, 0x19, 0x13, 0x7e, 0x21, 0x79 -}; - -void sha256_inc_init(sha256ctx *state) { - state->ctx = malloc(PQC_SHA256CTX_BYTES); - if (state->ctx == NULL) { - exit(111); - } - for (size_t i = 0; i < 32; ++i) { - state->ctx[i] = iv_256[i]; - } - for (size_t i = 32; i < 40; ++i) { - state->ctx[i] = 0; - } -} - -void sha512_inc_init(sha512ctx *state) { - state->ctx = malloc(PQC_SHA512CTX_BYTES); - if (state->ctx == NULL) { - exit(111); - } - for (size_t i = 0; i < 64; ++i) { - state->ctx[i] = iv_512[i]; - } - for (size_t i = 64; i < 72; ++i) { - state->ctx[i] = 0; - } -} - -void sha256_inc_ctx_clone(sha256ctx *stateout, const sha256ctx *statein) { - stateout->ctx = malloc(PQC_SHA256CTX_BYTES); - if (stateout->ctx == NULL) { - exit(111); - } - memcpy(stateout->ctx, statein->ctx, PQC_SHA256CTX_BYTES); -} - -void sha512_inc_ctx_clone(sha512ctx *stateout, const sha512ctx *statein) { - stateout->ctx = malloc(PQC_SHA512CTX_BYTES); - if (stateout->ctx == NULL) { - exit(111); - } - memcpy(stateout->ctx, statein->ctx, PQC_SHA512CTX_BYTES); -} - -/* Destroy the hash state. */ -void sha256_inc_ctx_release(sha256ctx *state) { - free(state->ctx); // IGNORE free-check -} - -/* Destroy the hash state. */ -void sha512_inc_ctx_release(sha512ctx *state) { - free(state->ctx); // IGNORE free-check -} - -void sha256_inc_blocks(sha256ctx *state, const uint8_t *in, size_t inblocks) { - uint64_t bytes = load_bigendian_64(state->ctx + 32); - - crypto_hashblocks_sha256(state->ctx, in, 64 * inblocks); - bytes += 64 * inblocks; - - store_bigendian_64(state->ctx + 32, bytes); -} - -void sha512_inc_blocks(sha512ctx *state, const uint8_t *in, size_t inblocks) { - uint64_t bytes = load_bigendian_64(state->ctx + 64); - - crypto_hashblocks_sha512(state->ctx, in, 128 * inblocks); - bytes += 128 * inblocks; - - store_bigendian_64(state->ctx + 64, bytes); -} - -void sha256_inc_finalize(uint8_t *out, sha256ctx *state, const uint8_t *in, size_t inlen) { - uint8_t padded[128]; - uint64_t bytes = load_bigendian_64(state->ctx + 32) + inlen; - - crypto_hashblocks_sha256(state->ctx, in, inlen); - in += inlen; - inlen &= 63; - in -= inlen; - - for (size_t i = 0; i < inlen; ++i) { - padded[i] = in[i]; - } - padded[inlen] = 0x80; - - if (inlen < 56) { - for (size_t i = inlen + 1; i < 56; ++i) { - padded[i] = 0; - } - padded[56] = (uint8_t) (bytes >> 53); - padded[57] = (uint8_t) (bytes >> 45); - padded[58] = (uint8_t) (bytes >> 37); - padded[59] = (uint8_t) (bytes >> 29); - padded[60] = (uint8_t) (bytes >> 21); - padded[61] = (uint8_t) (bytes >> 13); - padded[62] = (uint8_t) (bytes >> 5); - padded[63] = (uint8_t) (bytes << 3); - crypto_hashblocks_sha256(state->ctx, padded, 64); - } else { - for (size_t i = inlen + 1; i < 120; ++i) { - padded[i] = 0; - } - padded[120] = (uint8_t) (bytes >> 53); - padded[121] = (uint8_t) (bytes >> 45); - padded[122] = (uint8_t) (bytes >> 37); - padded[123] = (uint8_t) (bytes >> 29); - padded[124] = (uint8_t) (bytes >> 21); - padded[125] = (uint8_t) (bytes >> 13); - padded[126] = (uint8_t) (bytes >> 5); - padded[127] = (uint8_t) (bytes << 3); - crypto_hashblocks_sha256(state->ctx, padded, 128); - } - - for (size_t i = 0; i < 32; ++i) { - out[i] = state->ctx[i]; - } - sha256_inc_ctx_release(state); -} - -void sha512_inc_finalize(uint8_t *out, sha512ctx *state, const uint8_t *in, size_t inlen) { - uint8_t padded[256]; - uint64_t bytes = load_bigendian_64(state->ctx + 64) + inlen; - - crypto_hashblocks_sha512(state->ctx, in, inlen); - in += inlen; - inlen &= 127; - in -= inlen; - - for (size_t i = 0; i < inlen; ++i) { - padded[i] = in[i]; - } - padded[inlen] = 0x80; - - if (inlen < 112) { - for (size_t i = inlen + 1; i < 119; ++i) { - padded[i] = 0; - } - padded[119] = (uint8_t) (bytes >> 61); - padded[120] = (uint8_t) (bytes >> 53); - padded[121] = (uint8_t) (bytes >> 45); - padded[122] = (uint8_t) (bytes >> 37); - padded[123] = (uint8_t) (bytes >> 29); - padded[124] = (uint8_t) (bytes >> 21); - padded[125] = (uint8_t) (bytes >> 13); - padded[126] = (uint8_t) (bytes >> 5); - padded[127] = (uint8_t) (bytes << 3); - crypto_hashblocks_sha512(state->ctx, padded, 128); - } else { - for (size_t i = inlen + 1; i < 247; ++i) { - padded[i] = 0; - } - padded[247] = (uint8_t) (bytes >> 61); - padded[248] = (uint8_t) (bytes >> 53); - padded[249] = (uint8_t) (bytes >> 45); - padded[250] = (uint8_t) (bytes >> 37); - padded[251] = (uint8_t) (bytes >> 29); - padded[252] = (uint8_t) (bytes >> 21); - padded[253] = (uint8_t) (bytes >> 13); - padded[254] = (uint8_t) (bytes >> 5); - padded[255] = (uint8_t) (bytes << 3); - crypto_hashblocks_sha512(state->ctx, padded, 256); - } - - for (size_t i = 0; i < 64; ++i) { - out[i] = state->ctx[i]; - } - sha512_inc_ctx_release(state); -} - -void sha256(uint8_t *out, const uint8_t *in, size_t inlen) { - sha256ctx state; - - sha256_inc_init(&state); - sha256_inc_finalize(out, &state, in, inlen); -} - -void sha512(uint8_t *out, const uint8_t *in, size_t inlen) { - sha512ctx state; - - sha512_inc_init(&state); - sha512_inc_finalize(out, &state, in, inlen); -} - -void OQS_SHA2_sha256_inc_init(OQS_SHA2_sha256_ctx *state) { - oqs_sha2_sha256_inc_init((sha256ctx *) state); -} - -void OQS_SHA2_sha256_inc_ctx_clone(OQS_SHA2_sha256_ctx *dest, const OQS_SHA2_sha256_ctx *src) { - oqs_sha2_sha256_inc_ctx_clone((sha256ctx *) dest, (const sha256ctx *) src); -} - -void OQS_SHA2_sha256_inc_ctx_release(OQS_SHA2_sha256_ctx *state) { - oqs_sha2_sha256_inc_ctx_release((sha256ctx *) state); -} - -void OQS_SHA2_sha256_inc_blocks(OQS_SHA2_sha256_ctx *state, const uint8_t *in, size_t inblocks) { - oqs_sha2_sha256_inc_blocks((sha256ctx *) state, in, inblocks); -} - -void OQS_SHA2_sha256_inc_finalize(uint8_t *out, OQS_SHA2_sha256_ctx *state, const uint8_t *in, size_t inlen) { - oqs_sha2_sha256_inc_finalize(out, (sha256ctx *) state, in, inlen); -} - -void OQS_SHA2_sha512_inc_init(OQS_SHA2_sha512_ctx *state) { - oqs_sha2_sha512_inc_init((sha512ctx *) state); -} - -void OQS_SHA2_sha512_inc_ctx_clone(OQS_SHA2_sha512_ctx *dest, const OQS_SHA2_sha512_ctx *src) { - oqs_sha2_sha512_inc_ctx_clone((sha512ctx *) dest, (const sha512ctx *) src); -} - -void OQS_SHA2_sha512_inc_ctx_release(OQS_SHA2_sha512_ctx *state) { - oqs_sha2_sha512_inc_ctx_release((sha512ctx *) state); -} - -void OQS_SHA2_sha512_inc_blocks(OQS_SHA2_sha512_ctx *state, const uint8_t *in, size_t inblocks) { - oqs_sha2_sha512_inc_blocks((sha512ctx *) state, in, inblocks); -} - -void OQS_SHA2_sha512_inc_finalize(uint8_t *out, OQS_SHA2_sha512_ctx *state, const uint8_t *in, size_t inlen) { - oqs_sha2_sha512_inc_finalize(out, (sha512ctx *) state, in, inlen); -} +// SPDX-License-Identifier: Public domain + +#include "sha2.h" +#include <stdio.h> + +/* Based on the public domain implementation in + * crypto_hash/sha512/ref/ from http://bench.cr.yp.to/supercop.html + * by D. J. Bernstein */ + +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +static uint32_t load_bigendian_32(const uint8_t *x) { + return (uint32_t)(x[3]) | (((uint32_t)(x[2])) << 8) | + (((uint32_t)(x[1])) << 16) | (((uint32_t)(x[0])) << 24); +} + +static uint64_t load_bigendian_64(const uint8_t *x) { + return (uint64_t)(x[7]) | (((uint64_t)(x[6])) << 8) | + (((uint64_t)(x[5])) << 16) | (((uint64_t)(x[4])) << 24) | + (((uint64_t)(x[3])) << 32) | (((uint64_t)(x[2])) << 40) | + (((uint64_t)(x[1])) << 48) | (((uint64_t)(x[0])) << 56); +} + +static void store_bigendian_32(uint8_t *x, uint64_t u) { + x[3] = (uint8_t) u; + u >>= 8; + x[2] = (uint8_t) u; + u >>= 8; + x[1] = (uint8_t) u; + u >>= 8; + x[0] = (uint8_t) u; +} + +static void store_bigendian_64(uint8_t *x, uint64_t u) { + x[7] = (uint8_t) u; + u >>= 8; + x[6] = (uint8_t) u; + u >>= 8; + x[5] = (uint8_t) u; + u >>= 8; + x[4] = (uint8_t) u; + u >>= 8; + x[3] = (uint8_t) u; + u >>= 8; + x[2] = (uint8_t) u; + u >>= 8; + x[1] = (uint8_t) u; + u >>= 8; + x[0] = (uint8_t) u; +} + +#define SHR(x, c) ((x) >> (c)) +#define ROTR_32(x, c) (((x) >> (c)) | ((x) << (32 - (c)))) +#define ROTR_64(x, c) (((x) >> (c)) | ((x) << (64 - (c)))) + +#define Ch(x, y, z) (((x) & (y)) ^ (~(x) & (z))) +#define Maj(x, y, z) (((x) & (y)) ^ ((x) & (z)) ^ ((y) & (z))) + +#define Sigma0_32(x) (ROTR_32(x, 2) ^ ROTR_32(x,13) ^ ROTR_32(x,22)) +#define Sigma1_32(x) (ROTR_32(x, 6) ^ ROTR_32(x,11) ^ ROTR_32(x,25)) +#define sigma0_32(x) (ROTR_32(x, 7) ^ ROTR_32(x,18) ^ SHR(x, 3)) +#define sigma1_32(x) (ROTR_32(x,17) ^ ROTR_32(x,19) ^ SHR(x,10)) + +#define Sigma0_64(x) (ROTR_64(x, 28) ^ ROTR_64(x, 34) ^ ROTR_64(x, 39)) +#define Sigma1_64(x) (ROTR_64(x, 14) ^ ROTR_64(x, 18) ^ ROTR_64(x, 41)) +#define sigma0_64(x) (ROTR_64(x, 1) ^ ROTR_64(x, 8) ^ SHR(x, 7)) +#define sigma1_64(x) (ROTR_64(x, 19) ^ ROTR_64(x, 61) ^ SHR(x, 6)) + +#define M_32(w0, w14, w9, w1) w0 = sigma1_32(w14) + (w9) + sigma0_32(w1) + (w0); +#define M_64(w0, w14, w9, w1) w0 = sigma1_64(w14) + (w9) + sigma0_64(w1) + (w0); + +#define EXPAND_32 \ + M_32(w0, w14, w9, w1) \ + M_32(w1, w15, w10, w2) \ + M_32(w2, w0, w11, w3) \ + M_32(w3, w1, w12, w4) \ + M_32(w4, w2, w13, w5) \ + M_32(w5, w3, w14, w6) \ + M_32(w6, w4, w15, w7) \ + M_32(w7, w5, w0, w8) \ + M_32(w8, w6, w1, w9) \ + M_32(w9, w7, w2, w10) \ + M_32(w10, w8, w3, w11) \ + M_32(w11, w9, w4, w12) \ + M_32(w12, w10, w5, w13) \ + M_32(w13, w11, w6, w14) \ + M_32(w14, w12, w7, w15) \ + M_32(w15, w13, w8, w0) + +#define EXPAND_64 \ + M_64(w0, w14, w9, w1) \ + M_64(w1, w15, w10, w2) \ + M_64(w2, w0, w11, w3) \ + M_64(w3, w1, w12, w4) \ + M_64(w4, w2, w13, w5) \ + M_64(w5, w3, w14, w6) \ + M_64(w6, w4, w15, w7) \ + M_64(w7, w5, w0, w8) \ + M_64(w8, w6, w1, w9) \ + M_64(w9, w7, w2, w10) \ + M_64(w10, w8, w3, w11) \ + M_64(w11, w9, w4, w12) \ + M_64(w12, w10, w5, w13) \ + M_64(w13, w11, w6, w14) \ + M_64(w14, w12, w7, w15) \ + M_64(w15, w13, w8, w0) + +#define F_32(w, k) \ + T1 = h + Sigma1_32(e) + Ch(e, f, g) + (k) + (w); \ + T2 = Sigma0_32(a) + Maj(a, b, c); \ + h = g; \ + g = f; \ + f = e; \ + e = d + T1; \ + d = c; \ + c = b; \ + b = a; \ + a = T1 + T2; + +#define F_64(w, k) \ + T1 = h + Sigma1_64(e) + Ch(e, f, g) + (k) + (w); \ + T2 = Sigma0_64(a) + Maj(a, b, c); \ + h = g; \ + g = f; \ + f = e; \ + e = d + T1; \ + d = c; \ + c = b; \ + b = a; \ + a = T1 + T2; + +static size_t crypto_hashblocks_sha256(uint8_t *statebytes, + const uint8_t *in, size_t inlen) { + uint32_t state[8]; + uint32_t a; + uint32_t b; + uint32_t c; + uint32_t d; + uint32_t e; + uint32_t f; + uint32_t g; + uint32_t h; + + a = load_bigendian_32(statebytes + 0); + state[0] = a; + b = load_bigendian_32(statebytes + 4); + state[1] = b; + c = load_bigendian_32(statebytes + 8); + state[2] = c; + d = load_bigendian_32(statebytes + 12); + state[3] = d; + e = load_bigendian_32(statebytes + 16); + state[4] = e; + f = load_bigendian_32(statebytes + 20); + state[5] = f; + g = load_bigendian_32(statebytes + 24); + state[6] = g; + h = load_bigendian_32(statebytes + 28); + state[7] = h; + + while (inlen >= 64) { + uint32_t w0 = load_bigendian_32(in + 0); + uint32_t w1 = load_bigendian_32(in + 4); + uint32_t w2 = load_bigendian_32(in + 8); + uint32_t w3 = load_bigendian_32(in + 12); + uint32_t w4 = load_bigendian_32(in + 16); + uint32_t w5 = load_bigendian_32(in + 20); + uint32_t w6 = load_bigendian_32(in + 24); + uint32_t w7 = load_bigendian_32(in + 28); + uint32_t w8 = load_bigendian_32(in + 32); + uint32_t w9 = load_bigendian_32(in + 36); + uint32_t w10 = load_bigendian_32(in + 40); + uint32_t w11 = load_bigendian_32(in + 44); + uint32_t w12 = load_bigendian_32(in + 48); + uint32_t w13 = load_bigendian_32(in + 52); + uint32_t w14 = load_bigendian_32(in + 56); + uint32_t w15 = load_bigendian_32(in + 60); + + uint32_t T1; + uint32_t T2; + + F_32(w0, 0x428a2f98) + F_32(w1, 0x71374491) + F_32(w2, 0xb5c0fbcf) + F_32(w3, 0xe9b5dba5) + F_32(w4, 0x3956c25b) + F_32(w5, 0x59f111f1) + F_32(w6, 0x923f82a4) + F_32(w7, 0xab1c5ed5) + F_32(w8, 0xd807aa98) + F_32(w9, 0x12835b01) + F_32(w10, 0x243185be) + F_32(w11, 0x550c7dc3) + F_32(w12, 0x72be5d74) + F_32(w13, 0x80deb1fe) + F_32(w14, 0x9bdc06a7) + F_32(w15, 0xc19bf174) + + EXPAND_32 + + F_32(w0, 0xe49b69c1) + F_32(w1, 0xefbe4786) + F_32(w2, 0x0fc19dc6) + F_32(w3, 0x240ca1cc) + F_32(w4, 0x2de92c6f) + F_32(w5, 0x4a7484aa) + F_32(w6, 0x5cb0a9dc) + F_32(w7, 0x76f988da) + F_32(w8, 0x983e5152) + F_32(w9, 0xa831c66d) + F_32(w10, 0xb00327c8) + F_32(w11, 0xbf597fc7) + F_32(w12, 0xc6e00bf3) + F_32(w13, 0xd5a79147) + F_32(w14, 0x06ca6351) + F_32(w15, 0x14292967) + + EXPAND_32 + + F_32(w0, 0x27b70a85) + F_32(w1, 0x2e1b2138) + F_32(w2, 0x4d2c6dfc) + F_32(w3, 0x53380d13) + F_32(w4, 0x650a7354) + F_32(w5, 0x766a0abb) + F_32(w6, 0x81c2c92e) + F_32(w7, 0x92722c85) + F_32(w8, 0xa2bfe8a1) + F_32(w9, 0xa81a664b) + F_32(w10, 0xc24b8b70) + F_32(w11, 0xc76c51a3) + F_32(w12, 0xd192e819) + F_32(w13, 0xd6990624) + F_32(w14, 0xf40e3585) + F_32(w15, 0x106aa070) + + EXPAND_32 + + F_32(w0, 0x19a4c116) + F_32(w1, 0x1e376c08) + F_32(w2, 0x2748774c) + F_32(w3, 0x34b0bcb5) + F_32(w4, 0x391c0cb3) + F_32(w5, 0x4ed8aa4a) + F_32(w6, 0x5b9cca4f) + F_32(w7, 0x682e6ff3) + F_32(w8, 0x748f82ee) + F_32(w9, 0x78a5636f) + F_32(w10, 0x84c87814) + F_32(w11, 0x8cc70208) + F_32(w12, 0x90befffa) + F_32(w13, 0xa4506ceb) + F_32(w14, 0xbef9a3f7) + F_32(w15, 0xc67178f2) + + a += state[0]; + b += state[1]; + c += state[2]; + d += state[3]; + e += state[4]; + f += state[5]; + g += state[6]; + h += state[7]; + + state[0] = a; + state[1] = b; + state[2] = c; + state[3] = d; + state[4] = e; + state[5] = f; + state[6] = g; + state[7] = h; + + in += 64; + inlen -= 64; + } + + store_bigendian_32(statebytes + 0, state[0]); + store_bigendian_32(statebytes + 4, state[1]); + store_bigendian_32(statebytes + 8, state[2]); + store_bigendian_32(statebytes + 12, state[3]); + store_bigendian_32(statebytes + 16, state[4]); + store_bigendian_32(statebytes + 20, state[5]); + store_bigendian_32(statebytes + 24, state[6]); + store_bigendian_32(statebytes + 28, state[7]); + + return inlen; +} + +static size_t crypto_hashblocks_sha512(uint8_t *statebytes, + const uint8_t *in, size_t inlen) { + uint64_t state[8]; + uint64_t a; + uint64_t b; + uint64_t c; + uint64_t d; + uint64_t e; + uint64_t f; + uint64_t g; + uint64_t h; + + a = load_bigendian_64(statebytes + 0); + state[0] = a; + b = load_bigendian_64(statebytes + 8); + state[1] = b; + c = load_bigendian_64(statebytes + 16); + state[2] = c; + d = load_bigendian_64(statebytes + 24); + state[3] = d; + e = load_bigendian_64(statebytes + 32); + state[4] = e; + f = load_bigendian_64(statebytes + 40); + state[5] = f; + g = load_bigendian_64(statebytes + 48); + state[6] = g; + h = load_bigendian_64(statebytes + 56); + state[7] = h; + + while (inlen >= 128) { + uint64_t w0 = load_bigendian_64(in + 0); + uint64_t w1 = load_bigendian_64(in + 8); + uint64_t w2 = load_bigendian_64(in + 16); + uint64_t w3 = load_bigendian_64(in + 24); + uint64_t w4 = load_bigendian_64(in + 32); + uint64_t w5 = load_bigendian_64(in + 40); + uint64_t w6 = load_bigendian_64(in + 48); + uint64_t w7 = load_bigendian_64(in + 56); + uint64_t w8 = load_bigendian_64(in + 64); + uint64_t w9 = load_bigendian_64(in + 72); + uint64_t w10 = load_bigendian_64(in + 80); + uint64_t w11 = load_bigendian_64(in + 88); + uint64_t w12 = load_bigendian_64(in + 96); + uint64_t w13 = load_bigendian_64(in + 104); + uint64_t w14 = load_bigendian_64(in + 112); + uint64_t w15 = load_bigendian_64(in + 120); + + uint64_t T1; + uint64_t T2; + + F_64(w0, 0x428a2f98d728ae22ULL) + F_64(w1, 0x7137449123ef65cdULL) + F_64(w2, 0xb5c0fbcfec4d3b2fULL) + F_64(w3, 0xe9b5dba58189dbbcULL) + F_64(w4, 0x3956c25bf348b538ULL) + F_64(w5, 0x59f111f1b605d019ULL) + F_64(w6, 0x923f82a4af194f9bULL) + F_64(w7, 0xab1c5ed5da6d8118ULL) + F_64(w8, 0xd807aa98a3030242ULL) + F_64(w9, 0x12835b0145706fbeULL) + F_64(w10, 0x243185be4ee4b28cULL) + F_64(w11, 0x550c7dc3d5ffb4e2ULL) + F_64(w12, 0x72be5d74f27b896fULL) + F_64(w13, 0x80deb1fe3b1696b1ULL) + F_64(w14, 0x9bdc06a725c71235ULL) + F_64(w15, 0xc19bf174cf692694ULL) + + EXPAND_64 + + F_64(w0, 0xe49b69c19ef14ad2ULL) + F_64(w1, 0xefbe4786384f25e3ULL) + F_64(w2, 0x0fc19dc68b8cd5b5ULL) + F_64(w3, 0x240ca1cc77ac9c65ULL) + F_64(w4, 0x2de92c6f592b0275ULL) + F_64(w5, 0x4a7484aa6ea6e483ULL) + F_64(w6, 0x5cb0a9dcbd41fbd4ULL) + F_64(w7, 0x76f988da831153b5ULL) + F_64(w8, 0x983e5152ee66dfabULL) + F_64(w9, 0xa831c66d2db43210ULL) + F_64(w10, 0xb00327c898fb213fULL) + F_64(w11, 0xbf597fc7beef0ee4ULL) + F_64(w12, 0xc6e00bf33da88fc2ULL) + F_64(w13, 0xd5a79147930aa725ULL) + F_64(w14, 0x06ca6351e003826fULL) + F_64(w15, 0x142929670a0e6e70ULL) + + EXPAND_64 + + F_64(w0, 0x27b70a8546d22ffcULL) + F_64(w1, 0x2e1b21385c26c926ULL) + F_64(w2, 0x4d2c6dfc5ac42aedULL) + F_64(w3, 0x53380d139d95b3dfULL) + F_64(w4, 0x650a73548baf63deULL) + F_64(w5, 0x766a0abb3c77b2a8ULL) + F_64(w6, 0x81c2c92e47edaee6ULL) + F_64(w7, 0x92722c851482353bULL) + F_64(w8, 0xa2bfe8a14cf10364ULL) + F_64(w9, 0xa81a664bbc423001ULL) + F_64(w10, 0xc24b8b70d0f89791ULL) + F_64(w11, 0xc76c51a30654be30ULL) + F_64(w12, 0xd192e819d6ef5218ULL) + F_64(w13, 0xd69906245565a910ULL) + F_64(w14, 0xf40e35855771202aULL) + F_64(w15, 0x106aa07032bbd1b8ULL) + + EXPAND_64 + + F_64(w0, 0x19a4c116b8d2d0c8ULL) + F_64(w1, 0x1e376c085141ab53ULL) + F_64(w2, 0x2748774cdf8eeb99ULL) + F_64(w3, 0x34b0bcb5e19b48a8ULL) + F_64(w4, 0x391c0cb3c5c95a63ULL) + F_64(w5, 0x4ed8aa4ae3418acbULL) + F_64(w6, 0x5b9cca4f7763e373ULL) + F_64(w7, 0x682e6ff3d6b2b8a3ULL) + F_64(w8, 0x748f82ee5defb2fcULL) + F_64(w9, 0x78a5636f43172f60ULL) + F_64(w10, 0x84c87814a1f0ab72ULL) + F_64(w11, 0x8cc702081a6439ecULL) + F_64(w12, 0x90befffa23631e28ULL) + F_64(w13, 0xa4506cebde82bde9ULL) + F_64(w14, 0xbef9a3f7b2c67915ULL) + F_64(w15, 0xc67178f2e372532bULL) + + EXPAND_64 + + F_64(w0, 0xca273eceea26619cULL) + F_64(w1, 0xd186b8c721c0c207ULL) + F_64(w2, 0xeada7dd6cde0eb1eULL) + F_64(w3, 0xf57d4f7fee6ed178ULL) + F_64(w4, 0x06f067aa72176fbaULL) + F_64(w5, 0x0a637dc5a2c898a6ULL) + F_64(w6, 0x113f9804bef90daeULL) + F_64(w7, 0x1b710b35131c471bULL) + F_64(w8, 0x28db77f523047d84ULL) + F_64(w9, 0x32caab7b40c72493ULL) + F_64(w10, 0x3c9ebe0a15c9bebcULL) + F_64(w11, 0x431d67c49c100d4cULL) + F_64(w12, 0x4cc5d4becb3e42b6ULL) + F_64(w13, 0x597f299cfc657e2aULL) + F_64(w14, 0x5fcb6fab3ad6faecULL) + F_64(w15, 0x6c44198c4a475817ULL) + + a += state[0]; + b += state[1]; + c += state[2]; + d += state[3]; + e += state[4]; + f += state[5]; + g += state[6]; + h += state[7]; + + state[0] = a; + state[1] = b; + state[2] = c; + state[3] = d; + state[4] = e; + state[5] = f; + state[6] = g; + state[7] = h; + + in += 128; + inlen -= 128; + } + + store_bigendian_64(statebytes + 0, state[0]); + store_bigendian_64(statebytes + 8, state[1]); + store_bigendian_64(statebytes + 16, state[2]); + store_bigendian_64(statebytes + 24, state[3]); + store_bigendian_64(statebytes + 32, state[4]); + store_bigendian_64(statebytes + 40, state[5]); + store_bigendian_64(statebytes + 48, state[6]); + store_bigendian_64(statebytes + 56, state[7]); + + return inlen; +} + +static const uint8_t iv_256[32] = { + 0x6a, 0x09, 0xe6, 0x67, 0xbb, 0x67, 0xae, 0x85, + 0x3c, 0x6e, 0xf3, 0x72, 0xa5, 0x4f, 0xf5, 0x3a, + 0x51, 0x0e, 0x52, 0x7f, 0x9b, 0x05, 0x68, 0x8c, + 0x1f, 0x83, 0xd9, 0xab, 0x5b, 0xe0, 0xcd, 0x19 +}; + +static const uint8_t iv_512[64] = { + 0x6a, 0x09, 0xe6, 0x67, 0xf3, 0xbc, 0xc9, 0x08, 0xbb, 0x67, 0xae, + 0x85, 0x84, 0xca, 0xa7, 0x3b, 0x3c, 0x6e, 0xf3, 0x72, 0xfe, 0x94, + 0xf8, 0x2b, 0xa5, 0x4f, 0xf5, 0x3a, 0x5f, 0x1d, 0x36, 0xf1, 0x51, + 0x0e, 0x52, 0x7f, 0xad, 0xe6, 0x82, 0xd1, 0x9b, 0x05, 0x68, 0x8c, + 0x2b, 0x3e, 0x6c, 0x1f, 0x1f, 0x83, 0xd9, 0xab, 0xfb, 0x41, 0xbd, + 0x6b, 0x5b, 0xe0, 0xcd, 0x19, 0x13, 0x7e, 0x21, 0x79 +}; + +void sha256_inc_init(sha256ctx *state) { + state->ctx = malloc(PQC_SHA256CTX_BYTES); + if (state->ctx == NULL) { + exit(111); + } + for (size_t i = 0; i < 32; ++i) { + state->ctx[i] = iv_256[i]; + } + for (size_t i = 32; i < 40; ++i) { + state->ctx[i] = 0; + } +} + +void sha512_inc_init(sha512ctx *state) { + state->ctx = malloc(PQC_SHA512CTX_BYTES); + if (state->ctx == NULL) { + exit(111); + } + for (size_t i = 0; i < 64; ++i) { + state->ctx[i] = iv_512[i]; + } + for (size_t i = 64; i < 72; ++i) { + state->ctx[i] = 0; + } +} + +void sha256_inc_ctx_clone(sha256ctx *stateout, const sha256ctx *statein) { + stateout->ctx = malloc(PQC_SHA256CTX_BYTES); + if (stateout->ctx == NULL) { + exit(111); + } + memcpy(stateout->ctx, statein->ctx, PQC_SHA256CTX_BYTES); +} + +void sha512_inc_ctx_clone(sha512ctx *stateout, const sha512ctx *statein) { + stateout->ctx = malloc(PQC_SHA512CTX_BYTES); + if (stateout->ctx == NULL) { + exit(111); + } + memcpy(stateout->ctx, statein->ctx, PQC_SHA512CTX_BYTES); +} + +/* Destroy the hash state. */ +void sha256_inc_ctx_release(sha256ctx *state) { + free(state->ctx); // IGNORE free-check +} + +/* Destroy the hash state. */ +void sha512_inc_ctx_release(sha512ctx *state) { + free(state->ctx); // IGNORE free-check +} + +void sha256_inc_blocks(sha256ctx *state, const uint8_t *in, size_t inblocks) { + uint64_t bytes = load_bigendian_64(state->ctx + 32); + + crypto_hashblocks_sha256(state->ctx, in, 64 * inblocks); + bytes += 64 * inblocks; + + store_bigendian_64(state->ctx + 32, bytes); +} + +void sha512_inc_blocks(sha512ctx *state, const uint8_t *in, size_t inblocks) { + uint64_t bytes = load_bigendian_64(state->ctx + 64); + + crypto_hashblocks_sha512(state->ctx, in, 128 * inblocks); + bytes += 128 * inblocks; + + store_bigendian_64(state->ctx + 64, bytes); +} + +void sha256_inc_finalize(uint8_t *out, sha256ctx *state, const uint8_t *in, size_t inlen) { + uint8_t padded[128]; + uint64_t bytes = load_bigendian_64(state->ctx + 32) + inlen; + + crypto_hashblocks_sha256(state->ctx, in, inlen); + in += inlen; + inlen &= 63; + in -= inlen; + + for (size_t i = 0; i < inlen; ++i) { + padded[i] = in[i]; + } + padded[inlen] = 0x80; + + if (inlen < 56) { + for (size_t i = inlen + 1; i < 56; ++i) { + padded[i] = 0; + } + padded[56] = (uint8_t) (bytes >> 53); + padded[57] = (uint8_t) (bytes >> 45); + padded[58] = (uint8_t) (bytes >> 37); + padded[59] = (uint8_t) (bytes >> 29); + padded[60] = (uint8_t) (bytes >> 21); + padded[61] = (uint8_t) (bytes >> 13); + padded[62] = (uint8_t) (bytes >> 5); + padded[63] = (uint8_t) (bytes << 3); + crypto_hashblocks_sha256(state->ctx, padded, 64); + } else { + for (size_t i = inlen + 1; i < 120; ++i) { + padded[i] = 0; + } + padded[120] = (uint8_t) (bytes >> 53); + padded[121] = (uint8_t) (bytes >> 45); + padded[122] = (uint8_t) (bytes >> 37); + padded[123] = (uint8_t) (bytes >> 29); + padded[124] = (uint8_t) (bytes >> 21); + padded[125] = (uint8_t) (bytes >> 13); + padded[126] = (uint8_t) (bytes >> 5); + padded[127] = (uint8_t) (bytes << 3); + crypto_hashblocks_sha256(state->ctx, padded, 128); + } + + for (size_t i = 0; i < 32; ++i) { + out[i] = state->ctx[i]; + } + sha256_inc_ctx_release(state); +} + +void sha512_inc_finalize(uint8_t *out, sha512ctx *state, const uint8_t *in, size_t inlen) { + uint8_t padded[256]; + uint64_t bytes = load_bigendian_64(state->ctx + 64) + inlen; + + crypto_hashblocks_sha512(state->ctx, in, inlen); + in += inlen; + inlen &= 127; + in -= inlen; + + for (size_t i = 0; i < inlen; ++i) { + padded[i] = in[i]; + } + padded[inlen] = 0x80; + + if (inlen < 112) { + for (size_t i = inlen + 1; i < 119; ++i) { + padded[i] = 0; + } + padded[119] = (uint8_t) (bytes >> 61); + padded[120] = (uint8_t) (bytes >> 53); + padded[121] = (uint8_t) (bytes >> 45); + padded[122] = (uint8_t) (bytes >> 37); + padded[123] = (uint8_t) (bytes >> 29); + padded[124] = (uint8_t) (bytes >> 21); + padded[125] = (uint8_t) (bytes >> 13); + padded[126] = (uint8_t) (bytes >> 5); + padded[127] = (uint8_t) (bytes << 3); + crypto_hashblocks_sha512(state->ctx, padded, 128); + } else { + for (size_t i = inlen + 1; i < 247; ++i) { + padded[i] = 0; + } + padded[247] = (uint8_t) (bytes >> 61); + padded[248] = (uint8_t) (bytes >> 53); + padded[249] = (uint8_t) (bytes >> 45); + padded[250] = (uint8_t) (bytes >> 37); + padded[251] = (uint8_t) (bytes >> 29); + padded[252] = (uint8_t) (bytes >> 21); + padded[253] = (uint8_t) (bytes >> 13); + padded[254] = (uint8_t) (bytes >> 5); + padded[255] = (uint8_t) (bytes << 3); + crypto_hashblocks_sha512(state->ctx, padded, 256); + } + + for (size_t i = 0; i < 64; ++i) { + out[i] = state->ctx[i]; + } + sha512_inc_ctx_release(state); +} + +void sha256(uint8_t *out, const uint8_t *in, size_t inlen) { + sha256ctx state; + + sha256_inc_init(&state); + sha256_inc_finalize(out, &state, in, inlen); +} + +void sha512(uint8_t *out, const uint8_t *in, size_t inlen) { + sha512ctx state; + + sha512_inc_init(&state); + sha512_inc_finalize(out, &state, in, inlen); +} + +void OQS_SHA2_sha256_inc_init(OQS_SHA2_sha256_ctx *state) { + oqs_sha2_sha256_inc_init((sha256ctx *) state); +} + +void OQS_SHA2_sha256_inc_ctx_clone(OQS_SHA2_sha256_ctx *dest, const OQS_SHA2_sha256_ctx *src) { + oqs_sha2_sha256_inc_ctx_clone((sha256ctx *) dest, (const sha256ctx *) src); +} + +void OQS_SHA2_sha256_inc_ctx_release(OQS_SHA2_sha256_ctx *state) { + oqs_sha2_sha256_inc_ctx_release((sha256ctx *) state); +} + +void OQS_SHA2_sha256_inc_blocks(OQS_SHA2_sha256_ctx *state, const uint8_t *in, size_t inblocks) { + oqs_sha2_sha256_inc_blocks((sha256ctx *) state, in, inblocks); +} + +void OQS_SHA2_sha256_inc_finalize(uint8_t *out, OQS_SHA2_sha256_ctx *state, const uint8_t *in, size_t inlen) { + oqs_sha2_sha256_inc_finalize(out, (sha256ctx *) state, in, inlen); +} + +void OQS_SHA2_sha512_inc_init(OQS_SHA2_sha512_ctx *state) { + oqs_sha2_sha512_inc_init((sha512ctx *) state); +} + +void OQS_SHA2_sha512_inc_ctx_clone(OQS_SHA2_sha512_ctx *dest, const OQS_SHA2_sha512_ctx *src) { + oqs_sha2_sha512_inc_ctx_clone((sha512ctx *) dest, (const sha512ctx *) src); +} + +void OQS_SHA2_sha512_inc_ctx_release(OQS_SHA2_sha512_ctx *state) { + oqs_sha2_sha512_inc_ctx_release((sha512ctx *) state); +} + +void OQS_SHA2_sha512_inc_blocks(OQS_SHA2_sha512_ctx *state, const uint8_t *in, size_t inblocks) { + oqs_sha2_sha512_inc_blocks((sha512ctx *) state, in, inblocks); +} + +void OQS_SHA2_sha512_inc_finalize(uint8_t *out, OQS_SHA2_sha512_ctx *state, const uint8_t *in, size_t inlen) { + oqs_sha2_sha512_inc_finalize(out, (sha512ctx *) state, in, inlen); +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/symmetric.h b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/symmetric.h index e1bc27433a..900bd47d9f 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/symmetric.h +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/symmetric.h @@ -1,23 +1,23 @@ -#ifndef SYMMETRIC_H -#define SYMMETRIC_H - -#include "params.h" - - -#include "aes256ctr.h" -#include "sha2.h" - -#define hash_h(OUT, IN, INBYTES) sha256(OUT, IN, INBYTES) -#define hash_g(OUT, IN, INBYTES) sha512(OUT, IN, INBYTES) -#define xof_absorb(STATE, IN, X, Y) PQCLEAN_KYBER51290S_CLEAN_aes256xof_absorb(STATE, IN, X, Y) -#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) PQCLEAN_KYBER51290S_CLEAN_aes256xof_squeezeblocks(OUT, OUTBLOCKS, STATE) -#define xof_ctx_release(STATE) PQCLEAN_KYBER51290S_CLEAN_aes256xof_ctx_release(STATE) -#define prf(OUT, OUTBYTES, KEY, NONCE) PQCLEAN_KYBER51290S_CLEAN_aes256_prf(OUT, OUTBYTES, KEY, NONCE) -#define kdf(OUT, IN, INBYTES) sha256(OUT, IN, INBYTES) - -#define XOF_BLOCKBYTES 64 - -typedef aes256xof_ctx xof_state; - - -#endif /* SYMMETRIC_H */ +#ifndef SYMMETRIC_H +#define SYMMETRIC_H + +#include "params.h" + + +#include "aes256ctr.h" +#include "sha2.h" + +#define hash_h(OUT, IN, INBYTES) sha256(OUT, IN, INBYTES) +#define hash_g(OUT, IN, INBYTES) sha512(OUT, IN, INBYTES) +#define xof_absorb(STATE, IN, X, Y) PQCLEAN_KYBER51290S_CLEAN_aes256xof_absorb(STATE, IN, X, Y) +#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) PQCLEAN_KYBER51290S_CLEAN_aes256xof_squeezeblocks(OUT, OUTBLOCKS, STATE) +#define xof_ctx_release(STATE) PQCLEAN_KYBER51290S_CLEAN_aes256xof_ctx_release(STATE) +#define prf(OUT, OUTBYTES, KEY, NONCE) PQCLEAN_KYBER51290S_CLEAN_aes256_prf(OUT, OUTBYTES, KEY, NONCE) +#define kdf(OUT, IN, INBYTES) sha256(OUT, IN, INBYTES) + +#define XOF_BLOCKBYTES 64 + +typedef aes256xof_ctx xof_state; + + +#endif /* SYMMETRIC_H */ diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/verify.c b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/verify.c index 35867a9920..c8110f6820 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/verify.c +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/verify.c @@ -1,50 +1,50 @@ -#include "verify.h" - -#include <stddef.h> -#include <stdint.h> - -/************************************************* -* Name: verify -* -* Description: Compare two arrays for equality in constant time. -* -* Arguments: const uint8_t *a: pointer to first byte array -* const uint8_t *b: pointer to second byte array -* size_t len: length of the byte arrays -* -* Returns 0 if the byte arrays are equal, 1 otherwise -**************************************************/ -uint8_t PQCLEAN_KYBER51290S_CLEAN_verify(const uint8_t *a, const uint8_t *b, size_t len) { - uint64_t r; - size_t i; - r = 0; - - for (i = 0; i < len; i++) { - r |= a[i] ^ b[i]; - } - - r = (-r) >> 63; - return (uint8_t)r; -} - -/************************************************* -* Name: cmov -* -* Description: Copy len bytes from x to r if b is 1; -* don't modify x if b is 0. Requires b to be in {0,1}; -* assumes two's complement representation of negative integers. -* Runs in constant time. -* -* Arguments: uint8_t *r: pointer to output byte array -* const uint8_t *x: pointer to input byte array -* size_t len: Amount of bytes to be copied -* uint8_t b: Condition bit; has to be in {0,1} -**************************************************/ -void PQCLEAN_KYBER51290S_CLEAN_cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b) { - size_t i; - - b = -b; - for (i = 0; i < len; i++) { - r[i] ^= b & (x[i] ^ r[i]); - } -} +#include "verify.h" + +#include <stddef.h> +#include <stdint.h> + +/************************************************* +* Name: verify +* +* Description: Compare two arrays for equality in constant time. +* +* Arguments: const uint8_t *a: pointer to first byte array +* const uint8_t *b: pointer to second byte array +* size_t len: length of the byte arrays +* +* Returns 0 if the byte arrays are equal, 1 otherwise +**************************************************/ +uint8_t PQCLEAN_KYBER51290S_CLEAN_verify(const uint8_t *a, const uint8_t *b, size_t len) { + uint64_t r; + size_t i; + r = 0; + + for (i = 0; i < len; i++) { + r |= a[i] ^ b[i]; + } + + r = (-r) >> 63; + return (uint8_t)r; +} + +/************************************************* +* Name: cmov +* +* Description: Copy len bytes from x to r if b is 1; +* don't modify x if b is 0. Requires b to be in {0,1}; +* assumes two's complement representation of negative integers. +* Runs in constant time. +* +* Arguments: uint8_t *r: pointer to output byte array +* const uint8_t *x: pointer to input byte array +* size_t len: Amount of bytes to be copied +* uint8_t b: Condition bit; has to be in {0,1} +**************************************************/ +void PQCLEAN_KYBER51290S_CLEAN_cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b) { + size_t i; + + b = -b; + for (i = 0; i < len; i++) { + r[i] ^= b & (x[i] ^ r[i]); + } +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/verify.h b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/verify.h index 7ece5735a8..d0dc100410 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/verify.h +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_90s_r2/verify.h @@ -1,11 +1,11 @@ -#ifndef VERIFY_H -#define VERIFY_H - -#include <stddef.h> -#include <stdint.h> - -uint8_t PQCLEAN_KYBER51290S_CLEAN_verify(const uint8_t *a, const uint8_t *b, size_t len); - -void PQCLEAN_KYBER51290S_CLEAN_cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b); - -#endif +#ifndef VERIFY_H +#define VERIFY_H + +#include <stddef.h> +#include <stdint.h> + +uint8_t PQCLEAN_KYBER51290S_CLEAN_verify(const uint8_t *a, const uint8_t *b, size_t len); + +void PQCLEAN_KYBER51290S_CLEAN_cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b); + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/cbd.c b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/cbd.c index b4fc010ca9..4e86526aaa 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/cbd.c +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/cbd.c @@ -1,51 +1,51 @@ -#include "cbd.h" -#include "params.h" - -#include <stddef.h> -#include <stdint.h> - -/************************************************* -* Name: load32_littleendian -* -* Description: load bytes into a 32-bit integer -* in little-endian order -* -* Arguments: - const uint8_t *x: pointer to input byte array -* -* Returns 32-bit unsigned integer loaded from x -**************************************************/ -static uint32_t load32_littleendian(const uint8_t *x) { - uint32_t r; - r = (uint32_t)x[0]; - r |= (uint32_t)x[1] << 8; - r |= (uint32_t)x[2] << 16; - r |= (uint32_t)x[3] << 24; - return r; -} - -/************************************************* -* Name: cbd -* -* Description: Given an array of uniformly random bytes, compute -* polynomial with coefficients distributed according to -* a centered binomial distribution with parameter KYBER_ETA -* specialized for KYBER_ETA=2 -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *buf: pointer to input byte array -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_cbd(poly *r, const uint8_t *buf) { - int16_t a, b; - - for (size_t i = 0; i < KYBER_N / 8; i++) { - uint32_t t = load32_littleendian(buf + 4 * i); - uint32_t d = t & 0x55555555; - d += (t >> 1) & 0x55555555; - - for (size_t j = 0; j < 8; j++) { - a = (d >> 4 * j) & 0x3; - b = (d >> (4 * j + 2)) & 0x3; - r->coeffs[8 * i + j] = a - b; - } - } -} +#include "cbd.h" +#include "params.h" + +#include <stddef.h> +#include <stdint.h> + +/************************************************* +* Name: load32_littleendian +* +* Description: load bytes into a 32-bit integer +* in little-endian order +* +* Arguments: - const uint8_t *x: pointer to input byte array +* +* Returns 32-bit unsigned integer loaded from x +**************************************************/ +static uint32_t load32_littleendian(const uint8_t *x) { + uint32_t r; + r = (uint32_t)x[0]; + r |= (uint32_t)x[1] << 8; + r |= (uint32_t)x[2] << 16; + r |= (uint32_t)x[3] << 24; + return r; +} + +/************************************************* +* Name: cbd +* +* Description: Given an array of uniformly random bytes, compute +* polynomial with coefficients distributed according to +* a centered binomial distribution with parameter KYBER_ETA +* specialized for KYBER_ETA=2 +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *buf: pointer to input byte array +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_cbd(poly *r, const uint8_t *buf) { + int16_t a, b; + + for (size_t i = 0; i < KYBER_N / 8; i++) { + uint32_t t = load32_littleendian(buf + 4 * i); + uint32_t d = t & 0x55555555; + d += (t >> 1) & 0x55555555; + + for (size_t j = 0; j < 8; j++) { + a = (d >> 4 * j) & 0x3; + b = (d >> (4 * j + 2)) & 0x3; + r->coeffs[8 * i + j] = a - b; + } + } +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/cbd.h b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/cbd.h index 2eb5dc89cc..0891560aab 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/cbd.h +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/cbd.h @@ -1,8 +1,8 @@ -#ifndef CBD_H -#define CBD_H - -#include "poly.h" - -void PQCLEAN_KYBER512_CLEAN_cbd(poly *r, const uint8_t *buf); - -#endif +#ifndef CBD_H +#define CBD_H + +#include "poly.h" + +void PQCLEAN_KYBER512_CLEAN_cbd(poly *r, const uint8_t *buf); + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/fips202_kyber_r2.c b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/fips202_kyber_r2.c index 2ba848f951..776bb21398 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/fips202_kyber_r2.c +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/fips202_kyber_r2.c @@ -1,619 +1,619 @@ -/* Based on the public domain implementation in - * crypto_hash/keccakc512/simple/ from http://bench.cr.yp.to/supercop.html - * by Ronny Van Keer - * and the public domain "TweetFips202" implementation - * from https://twitter.com/tweetfips202 - * by Gilles Van Assche, Daniel J. Bernstein, and Peter Schwabe - * SPDX-License-Identifier: Public domain - */ - -#include "fips202_kyber_r2.h" -#include "symmetric.h" - -#include <stddef.h> -#include <stdint.h> -#include <stdlib.h> -#include <string.h> - -#define PQC_SHAKEINCCTX_BYTES (sizeof(uint64_t)*26) -#define PQC_SHAKECTX_BYTES (sizeof(uint64_t)*25) - -#define NROUNDS 24 -#define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64 - (offset)))) - -/************************************************* - * Name: load64 - * - * Description: Load 8 bytes into uint64_t in little-endian order - * - * Arguments: - const uint8_t *x: pointer to input byte array - * - * Returns the loaded 64-bit unsigned integer - **************************************************/ -static uint64_t load64(const uint8_t *x) { - uint64_t r = 0; - for (size_t i = 0; i < 8; ++i) { - r |= (uint64_t)x[i] << 8 * i; - } - - return r; -} - -/************************************************* - * Name: store64 - * - * Description: Store a 64-bit integer to a byte array in little-endian order - * - * Arguments: - uint8_t *x: pointer to the output byte array - * - uint64_t u: input 64-bit unsigned integer - **************************************************/ -static void store64(uint8_t *x, uint64_t u) { - for (size_t i = 0; i < 8; ++i) { - x[i] = (uint8_t) (u >> 8 * i); - } -} - -/* Keccak round constants */ -static const uint64_t KeccakF_RoundConstants[NROUNDS] = { - 0x0000000000000001ULL, 0x0000000000008082ULL, - 0x800000000000808aULL, 0x8000000080008000ULL, - 0x000000000000808bULL, 0x0000000080000001ULL, - 0x8000000080008081ULL, 0x8000000000008009ULL, - 0x000000000000008aULL, 0x0000000000000088ULL, - 0x0000000080008009ULL, 0x000000008000000aULL, - 0x000000008000808bULL, 0x800000000000008bULL, - 0x8000000000008089ULL, 0x8000000000008003ULL, - 0x8000000000008002ULL, 0x8000000000000080ULL, - 0x000000000000800aULL, 0x800000008000000aULL, - 0x8000000080008081ULL, 0x8000000000008080ULL, - 0x0000000080000001ULL, 0x8000000080008008ULL -}; - -/************************************************* - * Name: KeccakF1600_StatePermute - * - * Description: The Keccak F1600 Permutation - * - * Arguments: - uint64_t *state: pointer to input/output Keccak state - **************************************************/ -static void KeccakF1600_StatePermute(uint64_t *state) { - int round; - - uint64_t Aba, Abe, Abi, Abo, Abu; - uint64_t Aga, Age, Agi, Ago, Agu; - uint64_t Aka, Ake, Aki, Ako, Aku; - uint64_t Ama, Ame, Ami, Amo, Amu; - uint64_t Asa, Ase, Asi, Aso, Asu; - uint64_t BCa, BCe, BCi, BCo, BCu; - - // copyFromState(A, state) - Aba = state[0]; - Abe = state[1]; - Abi = state[2]; - Abo = state[3]; - Abu = state[4]; - Aga = state[5]; - Age = state[6]; - Agi = state[7]; - Ago = state[8]; - Agu = state[9]; - Aka = state[10]; - Ake = state[11]; - Aki = state[12]; - Ako = state[13]; - Aku = state[14]; - Ama = state[15]; - Ame = state[16]; - Ami = state[17]; - Amo = state[18]; - Amu = state[19]; - Asa = state[20]; - Ase = state[21]; - Asi = state[22]; - Aso = state[23]; - Asu = state[24]; - - for (round = 0; round < NROUNDS; round += 2) { - uint64_t Da, De, Di, Do, Du; - uint64_t Eba, Ebe, Ebi, Ebo, Ebu; - uint64_t Ega, Ege, Egi, Ego, Egu; - uint64_t Eka, Eke, Eki, Eko, Eku; - uint64_t Ema, Eme, Emi, Emo, Emu; - uint64_t Esa, Ese, Esi, Eso, Esu; - - // prepareTheta - BCa = Aba ^ Aga ^ Aka ^ Ama ^ Asa; - BCe = Abe ^ Age ^ Ake ^ Ame ^ Ase; - BCi = Abi ^ Agi ^ Aki ^ Ami ^ Asi; - BCo = Abo ^ Ago ^ Ako ^ Amo ^ Aso; - BCu = Abu ^ Agu ^ Aku ^ Amu ^ Asu; - - // thetaRhoPiChiIotaPrepareTheta(round , A, E) - Da = BCu ^ ROL(BCe, 1); - De = BCa ^ ROL(BCi, 1); - Di = BCe ^ ROL(BCo, 1); - Do = BCi ^ ROL(BCu, 1); - Du = BCo ^ ROL(BCa, 1); - - Aba ^= Da; - BCa = Aba; - Age ^= De; - BCe = ROL(Age, 44); - Aki ^= Di; - BCi = ROL(Aki, 43); - Amo ^= Do; - BCo = ROL(Amo, 21); - Asu ^= Du; - BCu = ROL(Asu, 14); - Eba = BCa ^ ((~BCe) & BCi); - Eba ^= KeccakF_RoundConstants[round]; - Ebe = BCe ^ ((~BCi) & BCo); - Ebi = BCi ^ ((~BCo) & BCu); - Ebo = BCo ^ ((~BCu) & BCa); - Ebu = BCu ^ ((~BCa) & BCe); - - Abo ^= Do; - BCa = ROL(Abo, 28); - Agu ^= Du; - BCe = ROL(Agu, 20); - Aka ^= Da; - BCi = ROL(Aka, 3); - Ame ^= De; - BCo = ROL(Ame, 45); - Asi ^= Di; - BCu = ROL(Asi, 61); - Ega = BCa ^ ((~BCe) & BCi); - Ege = BCe ^ ((~BCi) & BCo); - Egi = BCi ^ ((~BCo) & BCu); - Ego = BCo ^ ((~BCu) & BCa); - Egu = BCu ^ ((~BCa) & BCe); - - Abe ^= De; - BCa = ROL(Abe, 1); - Agi ^= Di; - BCe = ROL(Agi, 6); - Ako ^= Do; - BCi = ROL(Ako, 25); - Amu ^= Du; - BCo = ROL(Amu, 8); - Asa ^= Da; - BCu = ROL(Asa, 18); - Eka = BCa ^ ((~BCe) & BCi); - Eke = BCe ^ ((~BCi) & BCo); - Eki = BCi ^ ((~BCo) & BCu); - Eko = BCo ^ ((~BCu) & BCa); - Eku = BCu ^ ((~BCa) & BCe); - - Abu ^= Du; - BCa = ROL(Abu, 27); - Aga ^= Da; - BCe = ROL(Aga, 36); - Ake ^= De; - BCi = ROL(Ake, 10); - Ami ^= Di; - BCo = ROL(Ami, 15); - Aso ^= Do; - BCu = ROL(Aso, 56); - Ema = BCa ^ ((~BCe) & BCi); - Eme = BCe ^ ((~BCi) & BCo); - Emi = BCi ^ ((~BCo) & BCu); - Emo = BCo ^ ((~BCu) & BCa); - Emu = BCu ^ ((~BCa) & BCe); - - Abi ^= Di; - BCa = ROL(Abi, 62); - Ago ^= Do; - BCe = ROL(Ago, 55); - Aku ^= Du; - BCi = ROL(Aku, 39); - Ama ^= Da; - BCo = ROL(Ama, 41); - Ase ^= De; - BCu = ROL(Ase, 2); - Esa = BCa ^ ((~BCe) & BCi); - Ese = BCe ^ ((~BCi) & BCo); - Esi = BCi ^ ((~BCo) & BCu); - Eso = BCo ^ ((~BCu) & BCa); - Esu = BCu ^ ((~BCa) & BCe); - - // prepareTheta - BCa = Eba ^ Ega ^ Eka ^ Ema ^ Esa; - BCe = Ebe ^ Ege ^ Eke ^ Eme ^ Ese; - BCi = Ebi ^ Egi ^ Eki ^ Emi ^ Esi; - BCo = Ebo ^ Ego ^ Eko ^ Emo ^ Eso; - BCu = Ebu ^ Egu ^ Eku ^ Emu ^ Esu; - - // thetaRhoPiChiIotaPrepareTheta(round+1, E, A) - Da = BCu ^ ROL(BCe, 1); - De = BCa ^ ROL(BCi, 1); - Di = BCe ^ ROL(BCo, 1); - Do = BCi ^ ROL(BCu, 1); - Du = BCo ^ ROL(BCa, 1); - - Eba ^= Da; - BCa = Eba; - Ege ^= De; - BCe = ROL(Ege, 44); - Eki ^= Di; - BCi = ROL(Eki, 43); - Emo ^= Do; - BCo = ROL(Emo, 21); - Esu ^= Du; - BCu = ROL(Esu, 14); - Aba = BCa ^ ((~BCe) & BCi); - Aba ^= KeccakF_RoundConstants[round + 1]; - Abe = BCe ^ ((~BCi) & BCo); - Abi = BCi ^ ((~BCo) & BCu); - Abo = BCo ^ ((~BCu) & BCa); - Abu = BCu ^ ((~BCa) & BCe); - - Ebo ^= Do; - BCa = ROL(Ebo, 28); - Egu ^= Du; - BCe = ROL(Egu, 20); - Eka ^= Da; - BCi = ROL(Eka, 3); - Eme ^= De; - BCo = ROL(Eme, 45); - Esi ^= Di; - BCu = ROL(Esi, 61); - Aga = BCa ^ ((~BCe) & BCi); - Age = BCe ^ ((~BCi) & BCo); - Agi = BCi ^ ((~BCo) & BCu); - Ago = BCo ^ ((~BCu) & BCa); - Agu = BCu ^ ((~BCa) & BCe); - - Ebe ^= De; - BCa = ROL(Ebe, 1); - Egi ^= Di; - BCe = ROL(Egi, 6); - Eko ^= Do; - BCi = ROL(Eko, 25); - Emu ^= Du; - BCo = ROL(Emu, 8); - Esa ^= Da; - BCu = ROL(Esa, 18); - Aka = BCa ^ ((~BCe) & BCi); - Ake = BCe ^ ((~BCi) & BCo); - Aki = BCi ^ ((~BCo) & BCu); - Ako = BCo ^ ((~BCu) & BCa); - Aku = BCu ^ ((~BCa) & BCe); - - Ebu ^= Du; - BCa = ROL(Ebu, 27); - Ega ^= Da; - BCe = ROL(Ega, 36); - Eke ^= De; - BCi = ROL(Eke, 10); - Emi ^= Di; - BCo = ROL(Emi, 15); - Eso ^= Do; - BCu = ROL(Eso, 56); - Ama = BCa ^ ((~BCe) & BCi); - Ame = BCe ^ ((~BCi) & BCo); - Ami = BCi ^ ((~BCo) & BCu); - Amo = BCo ^ ((~BCu) & BCa); - Amu = BCu ^ ((~BCa) & BCe); - - Ebi ^= Di; - BCa = ROL(Ebi, 62); - Ego ^= Do; - BCe = ROL(Ego, 55); - Eku ^= Du; - BCi = ROL(Eku, 39); - Ema ^= Da; - BCo = ROL(Ema, 41); - Ese ^= De; - BCu = ROL(Ese, 2); - Asa = BCa ^ ((~BCe) & BCi); - Ase = BCe ^ ((~BCi) & BCo); - Asi = BCi ^ ((~BCo) & BCu); - Aso = BCo ^ ((~BCu) & BCa); - Asu = BCu ^ ((~BCa) & BCe); - } - - // copyToState(state, A) - state[0] = Aba; - state[1] = Abe; - state[2] = Abi; - state[3] = Abo; - state[4] = Abu; - state[5] = Aga; - state[6] = Age; - state[7] = Agi; - state[8] = Ago; - state[9] = Agu; - state[10] = Aka; - state[11] = Ake; - state[12] = Aki; - state[13] = Ako; - state[14] = Aku; - state[15] = Ama; - state[16] = Ame; - state[17] = Ami; - state[18] = Amo; - state[19] = Amu; - state[20] = Asa; - state[21] = Ase; - state[22] = Asi; - state[23] = Aso; - state[24] = Asu; -} - -/************************************************* - * Name: keccak_absorb - * - * Description: Absorb step of Keccak; - * non-incremental, starts by zeroeing the state. - * - * Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state - * - uint32_t r: rate in bytes (e.g., 168 for SHAKE128) - * - const uint8_t *m: pointer to input to be absorbed into s - * - size_t mlen: length of input in bytes - * - uint8_t p: domain-separation byte for different - * Keccak-derived functions - **************************************************/ -static void keccak_absorb(uint64_t *s, uint32_t r, const uint8_t *m, - size_t mlen, uint8_t p) { - size_t i; - uint8_t t[200]; - - /* Zero state */ - for (i = 0; i < 25; ++i) { - s[i] = 0; - } - - while (mlen >= r) { - for (i = 0; i < r / 8; ++i) { - s[i] ^= load64(m + 8 * i); - } - - KeccakF1600_StatePermute(s); - mlen -= r; - m += r; - } - - for (i = 0; i < r; ++i) { - t[i] = 0; - } - for (i = 0; i < mlen; ++i) { - t[i] = m[i]; - } - t[i] = p; - t[r - 1] |= 128; - for (i = 0; i < r / 8; ++i) { - s[i] ^= load64(t + 8 * i); - } -} - -/************************************************* - * Name: keccak_squeezeblocks - * - * Description: Squeeze step of Keccak. Squeezes full blocks of r bytes each. - * Modifies the state. Can be called multiple times to keep - * squeezing, i.e., is incremental. - * - * Arguments: - uint8_t *h: pointer to output blocks - * - size_t nblocks: number of blocks to be - * squeezed (written to h) - * - uint64_t *s: pointer to input/output Keccak state - * - uint32_t r: rate in bytes (e.g., 168 for SHAKE128) - **************************************************/ -static void keccak_squeezeblocks(uint8_t *h, size_t nblocks, - uint64_t *s, uint32_t r) { - while (nblocks > 0) { - KeccakF1600_StatePermute(s); - for (size_t i = 0; i < (r >> 3); i++) { - store64(h + 8 * i, s[i]); - } - h += r; - nblocks--; - } -} - -/************************************************* - * Name: shake128_absorb - * - * Description: Absorb step of the SHAKE128 XOF. - * non-incremental, starts by zeroeing the state. - * - * Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state - * - const uint8_t *input: pointer to input to be absorbed - * into s - * - size_t inlen: length of input in bytes - **************************************************/ -void shake128_absorb(shake128ctx *state, const uint8_t *input, size_t inlen) { - state->ctx = malloc(PQC_SHAKECTX_BYTES); - if (state->ctx == NULL) { - exit(111); - } - keccak_absorb(state->ctx, SHAKE128_RATE, input, inlen, 0x1F); -} - -/************************************************* - * Name: shake128_squeezeblocks - * - * Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of - * SHAKE128_RATE bytes each. Modifies the state. Can be called - * multiple times to keep squeezing, i.e., is incremental. - * - * Arguments: - uint8_t *output: pointer to output blocks - * - size_t nblocks: number of blocks to be squeezed - * (written to output) - * - shake128ctx *state: pointer to input/output Keccak state - **************************************************/ -void shake128_squeezeblocks(uint8_t *output, size_t nblocks, shake128ctx *state) { - keccak_squeezeblocks(output, nblocks, state->ctx, SHAKE128_RATE); -} - -void shake128_ctx_clone(shake128ctx *dest, const shake128ctx *src) { - dest->ctx = malloc(PQC_SHAKECTX_BYTES); - if (dest->ctx == NULL) { - exit(111); - } - memcpy(dest->ctx, src->ctx, PQC_SHAKECTX_BYTES); -} - -/** Release the allocated state. Call only once. */ -void shake128_ctx_release(shake128ctx *state) { - free(state->ctx); // IGNORE free-check -} - -/************************************************* - * Name: shake256_absorb - * - * Description: Absorb step of the SHAKE256 XOF. - * non-incremental, starts by zeroeing the state. - * - * Arguments: - shake256ctx *state: pointer to (uninitialized) output Keccak state - * - const uint8_t *input: pointer to input to be absorbed - * into s - * - size_t inlen: length of input in bytes - **************************************************/ -void shake256_absorb(shake256ctx *state, const uint8_t *input, size_t inlen) { - state->ctx = malloc(PQC_SHAKECTX_BYTES); - if (state->ctx == NULL) { - exit(111); - } - keccak_absorb(state->ctx, SHAKE256_RATE, input, inlen, 0x1F); -} - -/************************************************* - * Name: shake256_squeezeblocks - * - * Description: Squeeze step of SHAKE256 XOF. Squeezes full blocks of - * SHAKE256_RATE bytes each. Modifies the state. Can be called - * multiple times to keep squeezing, i.e., is incremental. - * - * Arguments: - uint8_t *output: pointer to output blocks - * - size_t nblocks: number of blocks to be squeezed - * (written to output) - * - shake256ctx *state: pointer to input/output Keccak state - **************************************************/ -void shake256_squeezeblocks(uint8_t *output, size_t nblocks, shake256ctx *state) { - keccak_squeezeblocks(output, nblocks, state->ctx, SHAKE256_RATE); -} - -void shake256_ctx_clone(shake256ctx *dest, const shake256ctx *src) { - dest->ctx = malloc(PQC_SHAKECTX_BYTES); - if (dest->ctx == NULL) { - exit(111); - } - memcpy(dest->ctx, src->ctx, PQC_SHAKECTX_BYTES); -} - -/** Release the allocated state. Call only once. */ -void shake256_ctx_release(shake256ctx *state) { - free(state->ctx); // IGNORE free-check -} - -/************************************************* - * Name: shake128 - * - * Description: SHAKE128 XOF with non-incremental API - * - * Arguments: - uint8_t *output: pointer to output - * - size_t outlen: requested output length in bytes - * - const uint8_t *input: pointer to input - * - size_t inlen: length of input in bytes - **************************************************/ -void shake128(uint8_t *output, size_t outlen, - const uint8_t *input, size_t inlen) { - size_t nblocks = outlen / SHAKE128_RATE; - uint8_t t[SHAKE128_RATE]; - shake128ctx s; - - shake128_absorb(&s, input, inlen); - shake128_squeezeblocks(output, nblocks, &s); - - output += nblocks * SHAKE128_RATE; - outlen -= nblocks * SHAKE128_RATE; - - if (outlen) { - shake128_squeezeblocks(t, 1, &s); - for (size_t i = 0; i < outlen; ++i) { - output[i] = t[i]; - } - } - shake128_ctx_release(&s); -} - -/************************************************* - * Name: shake256 - * - * Description: SHAKE256 XOF with non-incremental API - * - * Arguments: - uint8_t *output: pointer to output - * - size_t outlen: requested output length in bytes - * - const uint8_t *input: pointer to input - * - size_t inlen: length of input in bytes - **************************************************/ -void shake256_kyber(uint8_t *output, size_t outlen, - const uint8_t *input, size_t inlen) { - size_t nblocks = outlen / SHAKE256_RATE; - uint8_t t[SHAKE256_RATE]; - shake256ctx s; - - shake256_absorb(&s, input, inlen); - shake256_squeezeblocks(output, nblocks, &s); - - output += nblocks * SHAKE256_RATE; - outlen -= nblocks * SHAKE256_RATE; - - if (outlen) { - shake256_squeezeblocks(t, 1, &s); - for (size_t i = 0; i < outlen; ++i) { - output[i] = t[i]; - } - } - shake256_ctx_release(&s); -} - -/************************************************* - * Name: sha3_256 - * - * Description: SHA3-256 with non-incremental API - * - * Arguments: - uint8_t *output: pointer to output - * - const uint8_t *input: pointer to input - * - size_t inlen: length of input in bytes - **************************************************/ -void sha3_256(uint8_t *output, const uint8_t *input, size_t inlen) { - uint64_t s[25]; - uint8_t t[SHA3_256_RATE]; - - /* Absorb input */ - keccak_absorb(s, SHA3_256_RATE, input, inlen, 0x06); - - /* Squeeze output */ - keccak_squeezeblocks(t, 1, s, SHA3_256_RATE); - - for (size_t i = 0; i < 32; i++) { - output[i] = t[i]; - } -} - - -/************************************************* - * Name: sha3_512 - * - * Description: SHA3-512 with non-incremental API - * - * Arguments: - uint8_t *output: pointer to output - * - const uint8_t *input: pointer to input - * - size_t inlen: length of input in bytes - **************************************************/ -void sha3_512(uint8_t *output, const uint8_t *input, size_t inlen) { - uint64_t s[25]; - uint8_t t[SHA3_512_RATE]; - - /* Absorb input */ - keccak_absorb(s, SHA3_512_RATE, input, inlen, 0x06); - - /* Squeeze output */ - keccak_squeezeblocks(t, 1, s, SHA3_512_RATE); - - for (size_t i = 0; i < 64; i++) { - output[i] = t[i]; - } -} +/* Based on the public domain implementation in + * crypto_hash/keccakc512/simple/ from http://bench.cr.yp.to/supercop.html + * by Ronny Van Keer + * and the public domain "TweetFips202" implementation + * from https://twitter.com/tweetfips202 + * by Gilles Van Assche, Daniel J. Bernstein, and Peter Schwabe + * SPDX-License-Identifier: Public domain + */ + +#include "fips202_kyber_r2.h" +#include "symmetric.h" + +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#define PQC_SHAKEINCCTX_BYTES (sizeof(uint64_t)*26) +#define PQC_SHAKECTX_BYTES (sizeof(uint64_t)*25) + +#define NROUNDS 24 +#define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64 - (offset)))) + +/************************************************* + * Name: load64 + * + * Description: Load 8 bytes into uint64_t in little-endian order + * + * Arguments: - const uint8_t *x: pointer to input byte array + * + * Returns the loaded 64-bit unsigned integer + **************************************************/ +static uint64_t load64(const uint8_t *x) { + uint64_t r = 0; + for (size_t i = 0; i < 8; ++i) { + r |= (uint64_t)x[i] << 8 * i; + } + + return r; +} + +/************************************************* + * Name: store64 + * + * Description: Store a 64-bit integer to a byte array in little-endian order + * + * Arguments: - uint8_t *x: pointer to the output byte array + * - uint64_t u: input 64-bit unsigned integer + **************************************************/ +static void store64(uint8_t *x, uint64_t u) { + for (size_t i = 0; i < 8; ++i) { + x[i] = (uint8_t) (u >> 8 * i); + } +} + +/* Keccak round constants */ +static const uint64_t KeccakF_RoundConstants[NROUNDS] = { + 0x0000000000000001ULL, 0x0000000000008082ULL, + 0x800000000000808aULL, 0x8000000080008000ULL, + 0x000000000000808bULL, 0x0000000080000001ULL, + 0x8000000080008081ULL, 0x8000000000008009ULL, + 0x000000000000008aULL, 0x0000000000000088ULL, + 0x0000000080008009ULL, 0x000000008000000aULL, + 0x000000008000808bULL, 0x800000000000008bULL, + 0x8000000000008089ULL, 0x8000000000008003ULL, + 0x8000000000008002ULL, 0x8000000000000080ULL, + 0x000000000000800aULL, 0x800000008000000aULL, + 0x8000000080008081ULL, 0x8000000000008080ULL, + 0x0000000080000001ULL, 0x8000000080008008ULL +}; + +/************************************************* + * Name: KeccakF1600_StatePermute + * + * Description: The Keccak F1600 Permutation + * + * Arguments: - uint64_t *state: pointer to input/output Keccak state + **************************************************/ +static void KeccakF1600_StatePermute(uint64_t *state) { + int round; + + uint64_t Aba, Abe, Abi, Abo, Abu; + uint64_t Aga, Age, Agi, Ago, Agu; + uint64_t Aka, Ake, Aki, Ako, Aku; + uint64_t Ama, Ame, Ami, Amo, Amu; + uint64_t Asa, Ase, Asi, Aso, Asu; + uint64_t BCa, BCe, BCi, BCo, BCu; + + // copyFromState(A, state) + Aba = state[0]; + Abe = state[1]; + Abi = state[2]; + Abo = state[3]; + Abu = state[4]; + Aga = state[5]; + Age = state[6]; + Agi = state[7]; + Ago = state[8]; + Agu = state[9]; + Aka = state[10]; + Ake = state[11]; + Aki = state[12]; + Ako = state[13]; + Aku = state[14]; + Ama = state[15]; + Ame = state[16]; + Ami = state[17]; + Amo = state[18]; + Amu = state[19]; + Asa = state[20]; + Ase = state[21]; + Asi = state[22]; + Aso = state[23]; + Asu = state[24]; + + for (round = 0; round < NROUNDS; round += 2) { + uint64_t Da, De, Di, Do, Du; + uint64_t Eba, Ebe, Ebi, Ebo, Ebu; + uint64_t Ega, Ege, Egi, Ego, Egu; + uint64_t Eka, Eke, Eki, Eko, Eku; + uint64_t Ema, Eme, Emi, Emo, Emu; + uint64_t Esa, Ese, Esi, Eso, Esu; + + // prepareTheta + BCa = Aba ^ Aga ^ Aka ^ Ama ^ Asa; + BCe = Abe ^ Age ^ Ake ^ Ame ^ Ase; + BCi = Abi ^ Agi ^ Aki ^ Ami ^ Asi; + BCo = Abo ^ Ago ^ Ako ^ Amo ^ Aso; + BCu = Abu ^ Agu ^ Aku ^ Amu ^ Asu; + + // thetaRhoPiChiIotaPrepareTheta(round , A, E) + Da = BCu ^ ROL(BCe, 1); + De = BCa ^ ROL(BCi, 1); + Di = BCe ^ ROL(BCo, 1); + Do = BCi ^ ROL(BCu, 1); + Du = BCo ^ ROL(BCa, 1); + + Aba ^= Da; + BCa = Aba; + Age ^= De; + BCe = ROL(Age, 44); + Aki ^= Di; + BCi = ROL(Aki, 43); + Amo ^= Do; + BCo = ROL(Amo, 21); + Asu ^= Du; + BCu = ROL(Asu, 14); + Eba = BCa ^ ((~BCe) & BCi); + Eba ^= KeccakF_RoundConstants[round]; + Ebe = BCe ^ ((~BCi) & BCo); + Ebi = BCi ^ ((~BCo) & BCu); + Ebo = BCo ^ ((~BCu) & BCa); + Ebu = BCu ^ ((~BCa) & BCe); + + Abo ^= Do; + BCa = ROL(Abo, 28); + Agu ^= Du; + BCe = ROL(Agu, 20); + Aka ^= Da; + BCi = ROL(Aka, 3); + Ame ^= De; + BCo = ROL(Ame, 45); + Asi ^= Di; + BCu = ROL(Asi, 61); + Ega = BCa ^ ((~BCe) & BCi); + Ege = BCe ^ ((~BCi) & BCo); + Egi = BCi ^ ((~BCo) & BCu); + Ego = BCo ^ ((~BCu) & BCa); + Egu = BCu ^ ((~BCa) & BCe); + + Abe ^= De; + BCa = ROL(Abe, 1); + Agi ^= Di; + BCe = ROL(Agi, 6); + Ako ^= Do; + BCi = ROL(Ako, 25); + Amu ^= Du; + BCo = ROL(Amu, 8); + Asa ^= Da; + BCu = ROL(Asa, 18); + Eka = BCa ^ ((~BCe) & BCi); + Eke = BCe ^ ((~BCi) & BCo); + Eki = BCi ^ ((~BCo) & BCu); + Eko = BCo ^ ((~BCu) & BCa); + Eku = BCu ^ ((~BCa) & BCe); + + Abu ^= Du; + BCa = ROL(Abu, 27); + Aga ^= Da; + BCe = ROL(Aga, 36); + Ake ^= De; + BCi = ROL(Ake, 10); + Ami ^= Di; + BCo = ROL(Ami, 15); + Aso ^= Do; + BCu = ROL(Aso, 56); + Ema = BCa ^ ((~BCe) & BCi); + Eme = BCe ^ ((~BCi) & BCo); + Emi = BCi ^ ((~BCo) & BCu); + Emo = BCo ^ ((~BCu) & BCa); + Emu = BCu ^ ((~BCa) & BCe); + + Abi ^= Di; + BCa = ROL(Abi, 62); + Ago ^= Do; + BCe = ROL(Ago, 55); + Aku ^= Du; + BCi = ROL(Aku, 39); + Ama ^= Da; + BCo = ROL(Ama, 41); + Ase ^= De; + BCu = ROL(Ase, 2); + Esa = BCa ^ ((~BCe) & BCi); + Ese = BCe ^ ((~BCi) & BCo); + Esi = BCi ^ ((~BCo) & BCu); + Eso = BCo ^ ((~BCu) & BCa); + Esu = BCu ^ ((~BCa) & BCe); + + // prepareTheta + BCa = Eba ^ Ega ^ Eka ^ Ema ^ Esa; + BCe = Ebe ^ Ege ^ Eke ^ Eme ^ Ese; + BCi = Ebi ^ Egi ^ Eki ^ Emi ^ Esi; + BCo = Ebo ^ Ego ^ Eko ^ Emo ^ Eso; + BCu = Ebu ^ Egu ^ Eku ^ Emu ^ Esu; + + // thetaRhoPiChiIotaPrepareTheta(round+1, E, A) + Da = BCu ^ ROL(BCe, 1); + De = BCa ^ ROL(BCi, 1); + Di = BCe ^ ROL(BCo, 1); + Do = BCi ^ ROL(BCu, 1); + Du = BCo ^ ROL(BCa, 1); + + Eba ^= Da; + BCa = Eba; + Ege ^= De; + BCe = ROL(Ege, 44); + Eki ^= Di; + BCi = ROL(Eki, 43); + Emo ^= Do; + BCo = ROL(Emo, 21); + Esu ^= Du; + BCu = ROL(Esu, 14); + Aba = BCa ^ ((~BCe) & BCi); + Aba ^= KeccakF_RoundConstants[round + 1]; + Abe = BCe ^ ((~BCi) & BCo); + Abi = BCi ^ ((~BCo) & BCu); + Abo = BCo ^ ((~BCu) & BCa); + Abu = BCu ^ ((~BCa) & BCe); + + Ebo ^= Do; + BCa = ROL(Ebo, 28); + Egu ^= Du; + BCe = ROL(Egu, 20); + Eka ^= Da; + BCi = ROL(Eka, 3); + Eme ^= De; + BCo = ROL(Eme, 45); + Esi ^= Di; + BCu = ROL(Esi, 61); + Aga = BCa ^ ((~BCe) & BCi); + Age = BCe ^ ((~BCi) & BCo); + Agi = BCi ^ ((~BCo) & BCu); + Ago = BCo ^ ((~BCu) & BCa); + Agu = BCu ^ ((~BCa) & BCe); + + Ebe ^= De; + BCa = ROL(Ebe, 1); + Egi ^= Di; + BCe = ROL(Egi, 6); + Eko ^= Do; + BCi = ROL(Eko, 25); + Emu ^= Du; + BCo = ROL(Emu, 8); + Esa ^= Da; + BCu = ROL(Esa, 18); + Aka = BCa ^ ((~BCe) & BCi); + Ake = BCe ^ ((~BCi) & BCo); + Aki = BCi ^ ((~BCo) & BCu); + Ako = BCo ^ ((~BCu) & BCa); + Aku = BCu ^ ((~BCa) & BCe); + + Ebu ^= Du; + BCa = ROL(Ebu, 27); + Ega ^= Da; + BCe = ROL(Ega, 36); + Eke ^= De; + BCi = ROL(Eke, 10); + Emi ^= Di; + BCo = ROL(Emi, 15); + Eso ^= Do; + BCu = ROL(Eso, 56); + Ama = BCa ^ ((~BCe) & BCi); + Ame = BCe ^ ((~BCi) & BCo); + Ami = BCi ^ ((~BCo) & BCu); + Amo = BCo ^ ((~BCu) & BCa); + Amu = BCu ^ ((~BCa) & BCe); + + Ebi ^= Di; + BCa = ROL(Ebi, 62); + Ego ^= Do; + BCe = ROL(Ego, 55); + Eku ^= Du; + BCi = ROL(Eku, 39); + Ema ^= Da; + BCo = ROL(Ema, 41); + Ese ^= De; + BCu = ROL(Ese, 2); + Asa = BCa ^ ((~BCe) & BCi); + Ase = BCe ^ ((~BCi) & BCo); + Asi = BCi ^ ((~BCo) & BCu); + Aso = BCo ^ ((~BCu) & BCa); + Asu = BCu ^ ((~BCa) & BCe); + } + + // copyToState(state, A) + state[0] = Aba; + state[1] = Abe; + state[2] = Abi; + state[3] = Abo; + state[4] = Abu; + state[5] = Aga; + state[6] = Age; + state[7] = Agi; + state[8] = Ago; + state[9] = Agu; + state[10] = Aka; + state[11] = Ake; + state[12] = Aki; + state[13] = Ako; + state[14] = Aku; + state[15] = Ama; + state[16] = Ame; + state[17] = Ami; + state[18] = Amo; + state[19] = Amu; + state[20] = Asa; + state[21] = Ase; + state[22] = Asi; + state[23] = Aso; + state[24] = Asu; +} + +/************************************************* + * Name: keccak_absorb + * + * Description: Absorb step of Keccak; + * non-incremental, starts by zeroeing the state. + * + * Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state + * - uint32_t r: rate in bytes (e.g., 168 for SHAKE128) + * - const uint8_t *m: pointer to input to be absorbed into s + * - size_t mlen: length of input in bytes + * - uint8_t p: domain-separation byte for different + * Keccak-derived functions + **************************************************/ +static void keccak_absorb(uint64_t *s, uint32_t r, const uint8_t *m, + size_t mlen, uint8_t p) { + size_t i; + uint8_t t[200]; + + /* Zero state */ + for (i = 0; i < 25; ++i) { + s[i] = 0; + } + + while (mlen >= r) { + for (i = 0; i < r / 8; ++i) { + s[i] ^= load64(m + 8 * i); + } + + KeccakF1600_StatePermute(s); + mlen -= r; + m += r; + } + + for (i = 0; i < r; ++i) { + t[i] = 0; + } + for (i = 0; i < mlen; ++i) { + t[i] = m[i]; + } + t[i] = p; + t[r - 1] |= 128; + for (i = 0; i < r / 8; ++i) { + s[i] ^= load64(t + 8 * i); + } +} + +/************************************************* + * Name: keccak_squeezeblocks + * + * Description: Squeeze step of Keccak. Squeezes full blocks of r bytes each. + * Modifies the state. Can be called multiple times to keep + * squeezing, i.e., is incremental. + * + * Arguments: - uint8_t *h: pointer to output blocks + * - size_t nblocks: number of blocks to be + * squeezed (written to h) + * - uint64_t *s: pointer to input/output Keccak state + * - uint32_t r: rate in bytes (e.g., 168 for SHAKE128) + **************************************************/ +static void keccak_squeezeblocks(uint8_t *h, size_t nblocks, + uint64_t *s, uint32_t r) { + while (nblocks > 0) { + KeccakF1600_StatePermute(s); + for (size_t i = 0; i < (r >> 3); i++) { + store64(h + 8 * i, s[i]); + } + h += r; + nblocks--; + } +} + +/************************************************* + * Name: shake128_absorb + * + * Description: Absorb step of the SHAKE128 XOF. + * non-incremental, starts by zeroeing the state. + * + * Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state + * - const uint8_t *input: pointer to input to be absorbed + * into s + * - size_t inlen: length of input in bytes + **************************************************/ +void shake128_absorb(shake128ctx *state, const uint8_t *input, size_t inlen) { + state->ctx = malloc(PQC_SHAKECTX_BYTES); + if (state->ctx == NULL) { + exit(111); + } + keccak_absorb(state->ctx, SHAKE128_RATE, input, inlen, 0x1F); +} + +/************************************************* + * Name: shake128_squeezeblocks + * + * Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of + * SHAKE128_RATE bytes each. Modifies the state. Can be called + * multiple times to keep squeezing, i.e., is incremental. + * + * Arguments: - uint8_t *output: pointer to output blocks + * - size_t nblocks: number of blocks to be squeezed + * (written to output) + * - shake128ctx *state: pointer to input/output Keccak state + **************************************************/ +void shake128_squeezeblocks(uint8_t *output, size_t nblocks, shake128ctx *state) { + keccak_squeezeblocks(output, nblocks, state->ctx, SHAKE128_RATE); +} + +void shake128_ctx_clone(shake128ctx *dest, const shake128ctx *src) { + dest->ctx = malloc(PQC_SHAKECTX_BYTES); + if (dest->ctx == NULL) { + exit(111); + } + memcpy(dest->ctx, src->ctx, PQC_SHAKECTX_BYTES); +} + +/** Release the allocated state. Call only once. */ +void shake128_ctx_release(shake128ctx *state) { + free(state->ctx); // IGNORE free-check +} + +/************************************************* + * Name: shake256_absorb + * + * Description: Absorb step of the SHAKE256 XOF. + * non-incremental, starts by zeroeing the state. + * + * Arguments: - shake256ctx *state: pointer to (uninitialized) output Keccak state + * - const uint8_t *input: pointer to input to be absorbed + * into s + * - size_t inlen: length of input in bytes + **************************************************/ +void shake256_absorb(shake256ctx *state, const uint8_t *input, size_t inlen) { + state->ctx = malloc(PQC_SHAKECTX_BYTES); + if (state->ctx == NULL) { + exit(111); + } + keccak_absorb(state->ctx, SHAKE256_RATE, input, inlen, 0x1F); +} + +/************************************************* + * Name: shake256_squeezeblocks + * + * Description: Squeeze step of SHAKE256 XOF. Squeezes full blocks of + * SHAKE256_RATE bytes each. Modifies the state. Can be called + * multiple times to keep squeezing, i.e., is incremental. + * + * Arguments: - uint8_t *output: pointer to output blocks + * - size_t nblocks: number of blocks to be squeezed + * (written to output) + * - shake256ctx *state: pointer to input/output Keccak state + **************************************************/ +void shake256_squeezeblocks(uint8_t *output, size_t nblocks, shake256ctx *state) { + keccak_squeezeblocks(output, nblocks, state->ctx, SHAKE256_RATE); +} + +void shake256_ctx_clone(shake256ctx *dest, const shake256ctx *src) { + dest->ctx = malloc(PQC_SHAKECTX_BYTES); + if (dest->ctx == NULL) { + exit(111); + } + memcpy(dest->ctx, src->ctx, PQC_SHAKECTX_BYTES); +} + +/** Release the allocated state. Call only once. */ +void shake256_ctx_release(shake256ctx *state) { + free(state->ctx); // IGNORE free-check +} + +/************************************************* + * Name: shake128 + * + * Description: SHAKE128 XOF with non-incremental API + * + * Arguments: - uint8_t *output: pointer to output + * - size_t outlen: requested output length in bytes + * - const uint8_t *input: pointer to input + * - size_t inlen: length of input in bytes + **************************************************/ +void shake128(uint8_t *output, size_t outlen, + const uint8_t *input, size_t inlen) { + size_t nblocks = outlen / SHAKE128_RATE; + uint8_t t[SHAKE128_RATE]; + shake128ctx s; + + shake128_absorb(&s, input, inlen); + shake128_squeezeblocks(output, nblocks, &s); + + output += nblocks * SHAKE128_RATE; + outlen -= nblocks * SHAKE128_RATE; + + if (outlen) { + shake128_squeezeblocks(t, 1, &s); + for (size_t i = 0; i < outlen; ++i) { + output[i] = t[i]; + } + } + shake128_ctx_release(&s); +} + +/************************************************* + * Name: shake256 + * + * Description: SHAKE256 XOF with non-incremental API + * + * Arguments: - uint8_t *output: pointer to output + * - size_t outlen: requested output length in bytes + * - const uint8_t *input: pointer to input + * - size_t inlen: length of input in bytes + **************************************************/ +void shake256_kyber(uint8_t *output, size_t outlen, + const uint8_t *input, size_t inlen) { + size_t nblocks = outlen / SHAKE256_RATE; + uint8_t t[SHAKE256_RATE]; + shake256ctx s; + + shake256_absorb(&s, input, inlen); + shake256_squeezeblocks(output, nblocks, &s); + + output += nblocks * SHAKE256_RATE; + outlen -= nblocks * SHAKE256_RATE; + + if (outlen) { + shake256_squeezeblocks(t, 1, &s); + for (size_t i = 0; i < outlen; ++i) { + output[i] = t[i]; + } + } + shake256_ctx_release(&s); +} + +/************************************************* + * Name: sha3_256 + * + * Description: SHA3-256 with non-incremental API + * + * Arguments: - uint8_t *output: pointer to output + * - const uint8_t *input: pointer to input + * - size_t inlen: length of input in bytes + **************************************************/ +void sha3_256(uint8_t *output, const uint8_t *input, size_t inlen) { + uint64_t s[25]; + uint8_t t[SHA3_256_RATE]; + + /* Absorb input */ + keccak_absorb(s, SHA3_256_RATE, input, inlen, 0x06); + + /* Squeeze output */ + keccak_squeezeblocks(t, 1, s, SHA3_256_RATE); + + for (size_t i = 0; i < 32; i++) { + output[i] = t[i]; + } +} + + +/************************************************* + * Name: sha3_512 + * + * Description: SHA3-512 with non-incremental API + * + * Arguments: - uint8_t *output: pointer to output + * - const uint8_t *input: pointer to input + * - size_t inlen: length of input in bytes + **************************************************/ +void sha3_512(uint8_t *output, const uint8_t *input, size_t inlen) { + uint64_t s[25]; + uint8_t t[SHA3_512_RATE]; + + /* Absorb input */ + keccak_absorb(s, SHA3_512_RATE, input, inlen, 0x06); + + /* Squeeze output */ + keccak_squeezeblocks(t, 1, s, SHA3_512_RATE); + + for (size_t i = 0; i < 64; i++) { + output[i] = t[i]; + } +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/fips202_kyber_r2.h b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/fips202_kyber_r2.h index d9a39e2ec1..80788b3e10 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/fips202_kyber_r2.h +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/fips202_kyber_r2.h @@ -1,36 +1,36 @@ -// SPDX-License-Identifier: MIT - -#ifndef FIPS202_KYBER_R2_H -#define FIPS202_KYBER_R2_H - -#include <stdint.h> -#include <stddef.h> - -/** Data structure for the state of the SHAKE128 non-incremental hashing API. */ -typedef struct { - /** Internal state. */ - void *ctx; -} shake128ctx; - -/** Data structure for the state of the SHAKE256 non-incremental hashing API. */ -typedef struct { - /** Internal state. */ - void *ctx; -} shake256ctx; - -typedef shake128ctx keccak_state; - - -#define SHAKE128_RATE 168 -#define SHAKE256_RATE 136 -#define SHA3_256_RATE 136 -#define SHA3_512_RATE 72 - -void shake128_absorb(shake128ctx *state, const uint8_t *input, size_t inlen); -void shake128_squeezeblocks(uint8_t *output, size_t nblocks, shake128ctx *state); -void shake256_kyber(uint8_t *output, size_t outlen, const uint8_t *input, size_t inlen); -void sha3_256(uint8_t *output, const uint8_t *input, size_t inlen); -void sha3_512(uint8_t *output, const uint8_t *input, size_t inlen); -void shake128_ctx_release(shake128ctx *state); - -#endif // FIPS202_KYBER_R2_H +// SPDX-License-Identifier: MIT + +#ifndef FIPS202_KYBER_R2_H +#define FIPS202_KYBER_R2_H + +#include <stdint.h> +#include <stddef.h> + +/** Data structure for the state of the SHAKE128 non-incremental hashing API. */ +typedef struct { + /** Internal state. */ + void *ctx; +} shake128ctx; + +/** Data structure for the state of the SHAKE256 non-incremental hashing API. */ +typedef struct { + /** Internal state. */ + void *ctx; +} shake256ctx; + +typedef shake128ctx keccak_state; + + +#define SHAKE128_RATE 168 +#define SHAKE256_RATE 136 +#define SHA3_256_RATE 136 +#define SHA3_512_RATE 72 + +void shake128_absorb(shake128ctx *state, const uint8_t *input, size_t inlen); +void shake128_squeezeblocks(uint8_t *output, size_t nblocks, shake128ctx *state); +void shake256_kyber(uint8_t *output, size_t outlen, const uint8_t *input, size_t inlen); +void sha3_256(uint8_t *output, const uint8_t *input, size_t inlen); +void sha3_512(uint8_t *output, const uint8_t *input, size_t inlen); +void shake128_ctx_release(shake128ctx *state); + +#endif // FIPS202_KYBER_R2_H diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/indcpa.c b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/indcpa.c index 233b5d8515..18a980978d 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/indcpa.c +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/indcpa.c @@ -1,301 +1,301 @@ -#include "indcpa.h" -#include "ntt.h" -#include "params.h" -#include "poly.h" -#include "polyvec.h" -#include "../s2n_pq_random.h" -#include "utils/s2n_safety.h" -#include "symmetric.h" - -#include <stdint.h> - -/************************************************* -* Name: pack_pk -* -* Description: Serialize the public key as concatenation of the -* serialized vector of polynomials pk -* and the public seed used to generate the matrix A. -* -* Arguments: uint8_t *r: pointer to the output serialized public key -* const poly *pk: pointer to the input public-key polynomial -* const uint8_t *seed: pointer to the input public seed -**************************************************/ -static void pack_pk(uint8_t *r, polyvec *pk, const uint8_t *seed) { - PQCLEAN_KYBER512_CLEAN_polyvec_tobytes(r, pk); - for (size_t i = 0; i < KYBER_SYMBYTES; i++) { - r[i + KYBER_POLYVECBYTES] = seed[i]; - } -} - -/************************************************* -* Name: unpack_pk -* -* Description: De-serialize public key from a byte array; -* approximate inverse of pack_pk -* -* Arguments: - polyvec *pk: pointer to output public-key vector of polynomials -* - uint8_t *seed: pointer to output seed to generate matrix A -* - const uint8_t *packedpk: pointer to input serialized public key -**************************************************/ -static void unpack_pk(polyvec *pk, uint8_t *seed, const uint8_t *packedpk) { - PQCLEAN_KYBER512_CLEAN_polyvec_frombytes(pk, packedpk); - for (size_t i = 0; i < KYBER_SYMBYTES; i++) { - seed[i] = packedpk[i + KYBER_POLYVECBYTES]; - } -} - -/************************************************* -* Name: pack_sk -* -* Description: Serialize the secret key -* -* Arguments: - uint8_t *r: pointer to output serialized secret key -* - const polyvec *sk: pointer to input vector of polynomials (secret key) -**************************************************/ -static void pack_sk(uint8_t *r, polyvec *sk) { - PQCLEAN_KYBER512_CLEAN_polyvec_tobytes(r, sk); -} - -/************************************************* -* Name: unpack_sk -* -* Description: De-serialize the secret key; -* inverse of pack_sk -* -* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key) -* - const uint8_t *packedsk: pointer to input serialized secret key -**************************************************/ -static void unpack_sk(polyvec *sk, const uint8_t *packedsk) { - PQCLEAN_KYBER512_CLEAN_polyvec_frombytes(sk, packedsk); -} - -/************************************************* -* Name: pack_ciphertext -* -* Description: Serialize the ciphertext as concatenation of the -* compressed and serialized vector of polynomials b -* and the compressed and serialized polynomial v -* -* Arguments: uint8_t *r: pointer to the output serialized ciphertext -* const poly *pk: pointer to the input vector of polynomials b -* const uint8_t *seed: pointer to the input polynomial v -**************************************************/ -static void pack_ciphertext(uint8_t *r, polyvec *b, poly *v) { - PQCLEAN_KYBER512_CLEAN_polyvec_compress(r, b); - PQCLEAN_KYBER512_CLEAN_poly_compress(r + KYBER_POLYVECCOMPRESSEDBYTES, v); -} - -/************************************************* -* Name: unpack_ciphertext -* -* Description: De-serialize and decompress ciphertext from a byte array; -* approximate inverse of pack_ciphertext -* -* Arguments: - polyvec *b: pointer to the output vector of polynomials b -* - poly *v: pointer to the output polynomial v -* - const uint8_t *c: pointer to the input serialized ciphertext -**************************************************/ -static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t *c) { - PQCLEAN_KYBER512_CLEAN_polyvec_decompress(b, c); - PQCLEAN_KYBER512_CLEAN_poly_decompress(v, c + KYBER_POLYVECCOMPRESSEDBYTES); -} - -/************************************************* -* Name: rej_uniform -* -* Description: Run rejection sampling on uniform random bytes to generate -* uniform random integers mod q -* -* Arguments: - int16_t *r: pointer to output buffer -* - size_t len: requested number of 16-bit integers (uniform mod q) -* - const uint8_t *buf: pointer to input buffer (assumed to be uniform random bytes) -* - size_t buflen: length of input buffer in bytes -* -* Returns number of sampled 16-bit integers (at most len) -**************************************************/ -static size_t rej_uniform(int16_t *r, size_t len, const uint8_t *buf, size_t buflen) { - size_t ctr, pos; - - ctr = pos = 0; - while (ctr < len && pos + 2 <= buflen) { - uint16_t val = (uint16_t)(buf[pos] | ((uint16_t)buf[pos + 1] << 8)); - pos += 2; - - if (val < 19 * KYBER_Q) { - val -= (uint16_t)((val >> 12) * KYBER_Q); // Barrett reduction - r[ctr++] = (int16_t)val; - } - } - - return ctr; -} - -#define gen_a(A,B) gen_matrix(A,B,0) -#define gen_at(A,B) gen_matrix(A,B,1) - -/************************************************* -* Name: gen_matrix -* -* Description: Deterministically generate matrix A (or the transpose of A) -* from a seed. Entries of the matrix are polynomials that look -* uniformly random. Performs rejection sampling on output of -* a XOF -* -* Arguments: - polyvec *a: pointer to ouptput matrix A -* - const uint8_t *seed: pointer to input seed -* - int transposed: boolean deciding whether A or A^T is generated -**************************************************/ -#define MAXNBLOCKS ((530+XOF_BLOCKBYTES)/XOF_BLOCKBYTES) /* 530 is expected number of required bytes */ -static void gen_matrix(polyvec *a, const uint8_t *seed, int transposed) { - size_t ctr; - uint8_t i, j; - uint8_t buf[XOF_BLOCKBYTES * MAXNBLOCKS + 1]; - xof_state state; - - for (i = 0; i < KYBER_K; i++) { - for (j = 0; j < KYBER_K; j++) { - if (transposed) { - xof_absorb(&state, seed, i, j); - } else { - xof_absorb(&state, seed, j, i); - } - - xof_squeezeblocks(buf, MAXNBLOCKS, &state); - ctr = rej_uniform(a[i].vec[j].coeffs, KYBER_N, buf, MAXNBLOCKS * XOF_BLOCKBYTES); - - while (ctr < KYBER_N) { - xof_squeezeblocks(buf, 1, &state); - ctr += rej_uniform(a[i].vec[j].coeffs + ctr, KYBER_N - ctr, buf, XOF_BLOCKBYTES); - } - xof_ctx_release(&state); - } - } -} - -/************************************************* -* Name: indcpa_keypair -* -* Description: Generates public and private key for the CPA-secure -* public-key encryption scheme underlying Kyber -* -* Arguments: - uint8_t *pk: pointer to output public key (of length KYBER_INDCPA_PUBLICKEYBYTES bytes) -* - uint8_t *sk: pointer to output private key (of length KYBER_INDCPA_SECRETKEYBYTES bytes) -**************************************************/ -int PQCLEAN_KYBER512_CLEAN_indcpa_keypair(uint8_t *pk, uint8_t *sk) { - polyvec a[KYBER_K], e, pkpv, skpv; - uint8_t buf[2 * KYBER_SYMBYTES]; - uint8_t *publicseed = buf; - uint8_t *noiseseed = buf + KYBER_SYMBYTES; - uint8_t nonce = 0; - - GUARD_AS_POSIX(s2n_get_random_bytes(buf, KYBER_SYMBYTES)); - hash_g(buf, buf, KYBER_SYMBYTES); - - gen_a(a, publicseed); - - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER512_CLEAN_poly_getnoise(skpv.vec + i, noiseseed, nonce++); - } - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER512_CLEAN_poly_getnoise(e.vec + i, noiseseed, nonce++); - } - - PQCLEAN_KYBER512_CLEAN_polyvec_ntt(&skpv); - PQCLEAN_KYBER512_CLEAN_polyvec_ntt(&e); - - // matrix-vector multiplication - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER512_CLEAN_polyvec_pointwise_acc(&pkpv.vec[i], &a[i], &skpv); - PQCLEAN_KYBER512_CLEAN_poly_frommont(&pkpv.vec[i]); - } - - PQCLEAN_KYBER512_CLEAN_polyvec_add(&pkpv, &pkpv, &e); - PQCLEAN_KYBER512_CLEAN_polyvec_reduce(&pkpv); - - pack_sk(sk, &skpv); - pack_pk(pk, &pkpv, publicseed); - return 0; -} - -/************************************************* -* Name: indcpa_enc -* -* Description: Encryption function of the CPA-secure -* public-key encryption scheme underlying Kyber. -* -* Arguments: - uint8_t *c: pointer to output ciphertext (of length KYBER_INDCPA_BYTES bytes) -* - const uint8_t *m: pointer to input message (of length KYBER_INDCPA_MSGBYTES bytes) -* - const uint8_t *pk: pointer to input public key (of length KYBER_INDCPA_PUBLICKEYBYTES bytes) -* - const uint8_t *coin: pointer to input random coins used as seed (of length KYBER_SYMBYTES bytes) -* to deterministically generate all randomness -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_indcpa_enc(uint8_t *c, - const uint8_t *m, - const uint8_t *pk, - const uint8_t *coins) { - polyvec sp, pkpv, ep, at[KYBER_K], bp; - poly v, k, epp; - uint8_t seed[KYBER_SYMBYTES]; - uint8_t nonce = 0; - - unpack_pk(&pkpv, seed, pk); - PQCLEAN_KYBER512_CLEAN_poly_frommsg(&k, m); - gen_at(at, seed); - - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER512_CLEAN_poly_getnoise(sp.vec + i, coins, nonce++); - } - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER512_CLEAN_poly_getnoise(ep.vec + i, coins, nonce++); - } - PQCLEAN_KYBER512_CLEAN_poly_getnoise(&epp, coins, nonce++); - - PQCLEAN_KYBER512_CLEAN_polyvec_ntt(&sp); - - // matrix-vector multiplication - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER512_CLEAN_polyvec_pointwise_acc(&bp.vec[i], &at[i], &sp); - } - - PQCLEAN_KYBER512_CLEAN_polyvec_pointwise_acc(&v, &pkpv, &sp); - - PQCLEAN_KYBER512_CLEAN_polyvec_invntt(&bp); - PQCLEAN_KYBER512_CLEAN_poly_invntt(&v); - - PQCLEAN_KYBER512_CLEAN_polyvec_add(&bp, &bp, &ep); - PQCLEAN_KYBER512_CLEAN_poly_add(&v, &v, &epp); - PQCLEAN_KYBER512_CLEAN_poly_add(&v, &v, &k); - PQCLEAN_KYBER512_CLEAN_polyvec_reduce(&bp); - PQCLEAN_KYBER512_CLEAN_poly_reduce(&v); - - pack_ciphertext(c, &bp, &v); -} - -/************************************************* -* Name: indcpa_dec -* -* Description: Decryption function of the CPA-secure -* public-key encryption scheme underlying Kyber. -* -* Arguments: - uint8_t *m: pointer to output decrypted message (of length KYBER_INDCPA_MSGBYTES) -* - const uint8_t *c: pointer to input ciphertext (of length KYBER_INDCPA_BYTES) -* - const uint8_t *sk: pointer to input secret key (of length KYBER_INDCPA_SECRETKEYBYTES) -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_indcpa_dec(uint8_t *m, - const uint8_t *c, - const uint8_t *sk) { - polyvec bp, skpv; - poly v, mp; - - unpack_ciphertext(&bp, &v, c); - unpack_sk(&skpv, sk); - - PQCLEAN_KYBER512_CLEAN_polyvec_ntt(&bp); - PQCLEAN_KYBER512_CLEAN_polyvec_pointwise_acc(&mp, &skpv, &bp); - PQCLEAN_KYBER512_CLEAN_poly_invntt(&mp); - - PQCLEAN_KYBER512_CLEAN_poly_sub(&mp, &v, &mp); - PQCLEAN_KYBER512_CLEAN_poly_reduce(&mp); - - PQCLEAN_KYBER512_CLEAN_poly_tomsg(m, &mp); -} +#include "indcpa.h" +#include "ntt.h" +#include "params.h" +#include "poly.h" +#include "polyvec.h" +#include "../s2n_pq_random.h" +#include "utils/s2n_safety.h" +#include "symmetric.h" + +#include <stdint.h> + +/************************************************* +* Name: pack_pk +* +* Description: Serialize the public key as concatenation of the +* serialized vector of polynomials pk +* and the public seed used to generate the matrix A. +* +* Arguments: uint8_t *r: pointer to the output serialized public key +* const poly *pk: pointer to the input public-key polynomial +* const uint8_t *seed: pointer to the input public seed +**************************************************/ +static void pack_pk(uint8_t *r, polyvec *pk, const uint8_t *seed) { + PQCLEAN_KYBER512_CLEAN_polyvec_tobytes(r, pk); + for (size_t i = 0; i < KYBER_SYMBYTES; i++) { + r[i + KYBER_POLYVECBYTES] = seed[i]; + } +} + +/************************************************* +* Name: unpack_pk +* +* Description: De-serialize public key from a byte array; +* approximate inverse of pack_pk +* +* Arguments: - polyvec *pk: pointer to output public-key vector of polynomials +* - uint8_t *seed: pointer to output seed to generate matrix A +* - const uint8_t *packedpk: pointer to input serialized public key +**************************************************/ +static void unpack_pk(polyvec *pk, uint8_t *seed, const uint8_t *packedpk) { + PQCLEAN_KYBER512_CLEAN_polyvec_frombytes(pk, packedpk); + for (size_t i = 0; i < KYBER_SYMBYTES; i++) { + seed[i] = packedpk[i + KYBER_POLYVECBYTES]; + } +} + +/************************************************* +* Name: pack_sk +* +* Description: Serialize the secret key +* +* Arguments: - uint8_t *r: pointer to output serialized secret key +* - const polyvec *sk: pointer to input vector of polynomials (secret key) +**************************************************/ +static void pack_sk(uint8_t *r, polyvec *sk) { + PQCLEAN_KYBER512_CLEAN_polyvec_tobytes(r, sk); +} + +/************************************************* +* Name: unpack_sk +* +* Description: De-serialize the secret key; +* inverse of pack_sk +* +* Arguments: - polyvec *sk: pointer to output vector of polynomials (secret key) +* - const uint8_t *packedsk: pointer to input serialized secret key +**************************************************/ +static void unpack_sk(polyvec *sk, const uint8_t *packedsk) { + PQCLEAN_KYBER512_CLEAN_polyvec_frombytes(sk, packedsk); +} + +/************************************************* +* Name: pack_ciphertext +* +* Description: Serialize the ciphertext as concatenation of the +* compressed and serialized vector of polynomials b +* and the compressed and serialized polynomial v +* +* Arguments: uint8_t *r: pointer to the output serialized ciphertext +* const poly *pk: pointer to the input vector of polynomials b +* const uint8_t *seed: pointer to the input polynomial v +**************************************************/ +static void pack_ciphertext(uint8_t *r, polyvec *b, poly *v) { + PQCLEAN_KYBER512_CLEAN_polyvec_compress(r, b); + PQCLEAN_KYBER512_CLEAN_poly_compress(r + KYBER_POLYVECCOMPRESSEDBYTES, v); +} + +/************************************************* +* Name: unpack_ciphertext +* +* Description: De-serialize and decompress ciphertext from a byte array; +* approximate inverse of pack_ciphertext +* +* Arguments: - polyvec *b: pointer to the output vector of polynomials b +* - poly *v: pointer to the output polynomial v +* - const uint8_t *c: pointer to the input serialized ciphertext +**************************************************/ +static void unpack_ciphertext(polyvec *b, poly *v, const uint8_t *c) { + PQCLEAN_KYBER512_CLEAN_polyvec_decompress(b, c); + PQCLEAN_KYBER512_CLEAN_poly_decompress(v, c + KYBER_POLYVECCOMPRESSEDBYTES); +} + +/************************************************* +* Name: rej_uniform +* +* Description: Run rejection sampling on uniform random bytes to generate +* uniform random integers mod q +* +* Arguments: - int16_t *r: pointer to output buffer +* - size_t len: requested number of 16-bit integers (uniform mod q) +* - const uint8_t *buf: pointer to input buffer (assumed to be uniform random bytes) +* - size_t buflen: length of input buffer in bytes +* +* Returns number of sampled 16-bit integers (at most len) +**************************************************/ +static size_t rej_uniform(int16_t *r, size_t len, const uint8_t *buf, size_t buflen) { + size_t ctr, pos; + + ctr = pos = 0; + while (ctr < len && pos + 2 <= buflen) { + uint16_t val = (uint16_t)(buf[pos] | ((uint16_t)buf[pos + 1] << 8)); + pos += 2; + + if (val < 19 * KYBER_Q) { + val -= (uint16_t)((val >> 12) * KYBER_Q); // Barrett reduction + r[ctr++] = (int16_t)val; + } + } + + return ctr; +} + +#define gen_a(A,B) gen_matrix(A,B,0) +#define gen_at(A,B) gen_matrix(A,B,1) + +/************************************************* +* Name: gen_matrix +* +* Description: Deterministically generate matrix A (or the transpose of A) +* from a seed. Entries of the matrix are polynomials that look +* uniformly random. Performs rejection sampling on output of +* a XOF +* +* Arguments: - polyvec *a: pointer to ouptput matrix A +* - const uint8_t *seed: pointer to input seed +* - int transposed: boolean deciding whether A or A^T is generated +**************************************************/ +#define MAXNBLOCKS ((530+XOF_BLOCKBYTES)/XOF_BLOCKBYTES) /* 530 is expected number of required bytes */ +static void gen_matrix(polyvec *a, const uint8_t *seed, int transposed) { + size_t ctr; + uint8_t i, j; + uint8_t buf[XOF_BLOCKBYTES * MAXNBLOCKS + 1]; + xof_state state; + + for (i = 0; i < KYBER_K; i++) { + for (j = 0; j < KYBER_K; j++) { + if (transposed) { + xof_absorb(&state, seed, i, j); + } else { + xof_absorb(&state, seed, j, i); + } + + xof_squeezeblocks(buf, MAXNBLOCKS, &state); + ctr = rej_uniform(a[i].vec[j].coeffs, KYBER_N, buf, MAXNBLOCKS * XOF_BLOCKBYTES); + + while (ctr < KYBER_N) { + xof_squeezeblocks(buf, 1, &state); + ctr += rej_uniform(a[i].vec[j].coeffs + ctr, KYBER_N - ctr, buf, XOF_BLOCKBYTES); + } + xof_ctx_release(&state); + } + } +} + +/************************************************* +* Name: indcpa_keypair +* +* Description: Generates public and private key for the CPA-secure +* public-key encryption scheme underlying Kyber +* +* Arguments: - uint8_t *pk: pointer to output public key (of length KYBER_INDCPA_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key (of length KYBER_INDCPA_SECRETKEYBYTES bytes) +**************************************************/ +int PQCLEAN_KYBER512_CLEAN_indcpa_keypair(uint8_t *pk, uint8_t *sk) { + polyvec a[KYBER_K], e, pkpv, skpv; + uint8_t buf[2 * KYBER_SYMBYTES]; + uint8_t *publicseed = buf; + uint8_t *noiseseed = buf + KYBER_SYMBYTES; + uint8_t nonce = 0; + + GUARD_AS_POSIX(s2n_get_random_bytes(buf, KYBER_SYMBYTES)); + hash_g(buf, buf, KYBER_SYMBYTES); + + gen_a(a, publicseed); + + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER512_CLEAN_poly_getnoise(skpv.vec + i, noiseseed, nonce++); + } + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER512_CLEAN_poly_getnoise(e.vec + i, noiseseed, nonce++); + } + + PQCLEAN_KYBER512_CLEAN_polyvec_ntt(&skpv); + PQCLEAN_KYBER512_CLEAN_polyvec_ntt(&e); + + // matrix-vector multiplication + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER512_CLEAN_polyvec_pointwise_acc(&pkpv.vec[i], &a[i], &skpv); + PQCLEAN_KYBER512_CLEAN_poly_frommont(&pkpv.vec[i]); + } + + PQCLEAN_KYBER512_CLEAN_polyvec_add(&pkpv, &pkpv, &e); + PQCLEAN_KYBER512_CLEAN_polyvec_reduce(&pkpv); + + pack_sk(sk, &skpv); + pack_pk(pk, &pkpv, publicseed); + return 0; +} + +/************************************************* +* Name: indcpa_enc +* +* Description: Encryption function of the CPA-secure +* public-key encryption scheme underlying Kyber. +* +* Arguments: - uint8_t *c: pointer to output ciphertext (of length KYBER_INDCPA_BYTES bytes) +* - const uint8_t *m: pointer to input message (of length KYBER_INDCPA_MSGBYTES bytes) +* - const uint8_t *pk: pointer to input public key (of length KYBER_INDCPA_PUBLICKEYBYTES bytes) +* - const uint8_t *coin: pointer to input random coins used as seed (of length KYBER_SYMBYTES bytes) +* to deterministically generate all randomness +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_indcpa_enc(uint8_t *c, + const uint8_t *m, + const uint8_t *pk, + const uint8_t *coins) { + polyvec sp, pkpv, ep, at[KYBER_K], bp; + poly v, k, epp; + uint8_t seed[KYBER_SYMBYTES]; + uint8_t nonce = 0; + + unpack_pk(&pkpv, seed, pk); + PQCLEAN_KYBER512_CLEAN_poly_frommsg(&k, m); + gen_at(at, seed); + + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER512_CLEAN_poly_getnoise(sp.vec + i, coins, nonce++); + } + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER512_CLEAN_poly_getnoise(ep.vec + i, coins, nonce++); + } + PQCLEAN_KYBER512_CLEAN_poly_getnoise(&epp, coins, nonce++); + + PQCLEAN_KYBER512_CLEAN_polyvec_ntt(&sp); + + // matrix-vector multiplication + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER512_CLEAN_polyvec_pointwise_acc(&bp.vec[i], &at[i], &sp); + } + + PQCLEAN_KYBER512_CLEAN_polyvec_pointwise_acc(&v, &pkpv, &sp); + + PQCLEAN_KYBER512_CLEAN_polyvec_invntt(&bp); + PQCLEAN_KYBER512_CLEAN_poly_invntt(&v); + + PQCLEAN_KYBER512_CLEAN_polyvec_add(&bp, &bp, &ep); + PQCLEAN_KYBER512_CLEAN_poly_add(&v, &v, &epp); + PQCLEAN_KYBER512_CLEAN_poly_add(&v, &v, &k); + PQCLEAN_KYBER512_CLEAN_polyvec_reduce(&bp); + PQCLEAN_KYBER512_CLEAN_poly_reduce(&v); + + pack_ciphertext(c, &bp, &v); +} + +/************************************************* +* Name: indcpa_dec +* +* Description: Decryption function of the CPA-secure +* public-key encryption scheme underlying Kyber. +* +* Arguments: - uint8_t *m: pointer to output decrypted message (of length KYBER_INDCPA_MSGBYTES) +* - const uint8_t *c: pointer to input ciphertext (of length KYBER_INDCPA_BYTES) +* - const uint8_t *sk: pointer to input secret key (of length KYBER_INDCPA_SECRETKEYBYTES) +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_indcpa_dec(uint8_t *m, + const uint8_t *c, + const uint8_t *sk) { + polyvec bp, skpv; + poly v, mp; + + unpack_ciphertext(&bp, &v, c); + unpack_sk(&skpv, sk); + + PQCLEAN_KYBER512_CLEAN_polyvec_ntt(&bp); + PQCLEAN_KYBER512_CLEAN_polyvec_pointwise_acc(&mp, &skpv, &bp); + PQCLEAN_KYBER512_CLEAN_poly_invntt(&mp); + + PQCLEAN_KYBER512_CLEAN_poly_sub(&mp, &v, &mp); + PQCLEAN_KYBER512_CLEAN_poly_reduce(&mp); + + PQCLEAN_KYBER512_CLEAN_poly_tomsg(m, &mp); +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/indcpa.h b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/indcpa.h index 23328f0970..2311d02a11 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/indcpa.h +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/indcpa.h @@ -1,21 +1,21 @@ -#ifndef INDCPA_H -#define INDCPA_H - -#include <stdint.h> - -int PQCLEAN_KYBER512_CLEAN_indcpa_keypair( - uint8_t *pk, - uint8_t *sk); - -void PQCLEAN_KYBER512_CLEAN_indcpa_enc( - uint8_t *c, - const uint8_t *m, - const uint8_t *pk, - const uint8_t *coins); - -void PQCLEAN_KYBER512_CLEAN_indcpa_dec( - uint8_t *m, - const uint8_t *c, - const uint8_t *sk); - -#endif +#ifndef INDCPA_H +#define INDCPA_H + +#include <stdint.h> + +int PQCLEAN_KYBER512_CLEAN_indcpa_keypair( + uint8_t *pk, + uint8_t *sk); + +void PQCLEAN_KYBER512_CLEAN_indcpa_enc( + uint8_t *c, + const uint8_t *m, + const uint8_t *pk, + const uint8_t *coins); + +void PQCLEAN_KYBER512_CLEAN_indcpa_dec( + uint8_t *m, + const uint8_t *c, + const uint8_t *sk); + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/kyber_r2_kem.c b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/kyber_r2_kem.c index d9b64d4336..d28caa8642 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/kyber_r2_kem.c +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/kyber_r2_kem.c @@ -1,102 +1,102 @@ -#include "indcpa.h" -#include "params.h" -#include "symmetric.h" -#include "verify.h" - -#include "../s2n_pq_random.h" -#include "utils/s2n_safety.h" -#include "tls/s2n_kem.h" - -#include <stdlib.h> - -/************************************************* -* Name: crypto_kem_keypair -* -* Description: Generates public and private key -* for CCA-secure Kyber key encapsulation mechanism -* -* Arguments: - uint8_t *pk: pointer to output public key (an already allocated array of CRYPTO_PUBLICKEYBYTES bytes) -* - uint8_t *sk: pointer to output private key (an already allocated array of CRYPTO_SECRETKEYBYTES bytes) -* -* Returns 0 (success) -**************************************************/ -int kyber_512_r2_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) { - size_t i; - PQCLEAN_KYBER512_CLEAN_indcpa_keypair(pk, sk); - for (i = 0; i < KYBER_INDCPA_PUBLICKEYBYTES; i++) { - sk[i + KYBER_INDCPA_SECRETKEYBYTES] = pk[i]; - } - hash_h(sk + KYBER_SECRETKEYBYTES - 2 * KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); - GUARD_AS_POSIX(s2n_get_random_bytes(sk + KYBER_SECRETKEYBYTES - KYBER_SYMBYTES, KYBER_SYMBYTES)); /* Value z for pseudo-random output on reject */ - return 0; -} - -/************************************************* -* Name: crypto_kem_enc -* -* Description: Generates cipher text and shared -* secret for given public key -* -* Arguments: - uint8_t *ct: pointer to output cipher text (an already allocated array of CRYPTO_CIPHERTEXTBYTES bytes) -* - uint8_t *ss: pointer to output shared secret (an already allocated array of CRYPTO_BYTES bytes) -* - const uint8_t *pk: pointer to input public key (an already allocated array of CRYPTO_PUBLICKEYBYTES bytes) -* -* Returns 0 (success) -**************************************************/ -int kyber_512_r2_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) { - uint8_t kr[2 * KYBER_SYMBYTES]; /* Will contain key, coins */ - uint8_t buf[2 * KYBER_SYMBYTES]; - - GUARD_AS_POSIX(s2n_get_random_bytes(buf, KYBER_SYMBYTES)); - hash_h(buf, buf, KYBER_SYMBYTES); /* Don't release system RNG output */ - - hash_h(buf + KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); /* Multitarget countermeasure for coins + contributory KEM */ - hash_g(kr, buf, 2 * KYBER_SYMBYTES); - - PQCLEAN_KYBER512_CLEAN_indcpa_enc(ct, buf, pk, kr + KYBER_SYMBYTES); /* coins are in kr+KYBER_SYMBYTES */ - - hash_h(kr + KYBER_SYMBYTES, ct, KYBER_CIPHERTEXTBYTES); /* overwrite coins in kr with H(c) */ - kdf(ss, kr, 2 * KYBER_SYMBYTES); /* hash concatenation of pre-k and H(c) to k */ - return 0; -} - -/************************************************* -* Name: crypto_kem_dec -* -* Description: Generates shared secret for given -* cipher text and private key -* -* Arguments: - uint8_t *ss: pointer to output shared secret (an already allocated array of CRYPTO_BYTES bytes) -* - const uint8_t *ct: pointer to input cipher text (an already allocated array of CRYPTO_CIPHERTEXTBYTES bytes) -* - const uint8_t *sk: pointer to input private key (an already allocated array of CRYPTO_SECRETKEYBYTES bytes) -* -* Returns 0. -* -* On failure, ss will contain a pseudo-random value. -**************************************************/ -int kyber_512_r2_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) { - size_t i; - uint8_t fail; - uint8_t cmp[KYBER_CIPHERTEXTBYTES]; - uint8_t buf[2 * KYBER_SYMBYTES]; - uint8_t kr[2 * KYBER_SYMBYTES]; /* Will contain key, coins */ - const uint8_t *pk = sk + KYBER_INDCPA_SECRETKEYBYTES; - - PQCLEAN_KYBER512_CLEAN_indcpa_dec(buf, ct, sk); - - for (i = 0; i < KYBER_SYMBYTES; i++) { /* Multitarget countermeasure for coins + contributory KEM */ - buf[KYBER_SYMBYTES + i] = sk[KYBER_SECRETKEYBYTES - 2 * KYBER_SYMBYTES + i]; /* Save hash by storing H(pk) in sk */ - } - hash_g(kr, buf, 2 * KYBER_SYMBYTES); - - PQCLEAN_KYBER512_CLEAN_indcpa_enc(cmp, buf, pk, kr + KYBER_SYMBYTES); /* coins are in kr+KYBER_SYMBYTES */ - - fail = PQCLEAN_KYBER512_CLEAN_verify(ct, cmp, KYBER_CIPHERTEXTBYTES); - - hash_h(kr + KYBER_SYMBYTES, ct, KYBER_CIPHERTEXTBYTES); /* overwrite coins in kr with H(c) */ - - PQCLEAN_KYBER512_CLEAN_cmov(kr, sk + KYBER_SECRETKEYBYTES - KYBER_SYMBYTES, KYBER_SYMBYTES, fail); /* Overwrite pre-k with z on re-encryption failure */ - - kdf(ss, kr, 2 * KYBER_SYMBYTES); /* hash concatenation of pre-k and H(c) to k */ - return 0; -} +#include "indcpa.h" +#include "params.h" +#include "symmetric.h" +#include "verify.h" + +#include "../s2n_pq_random.h" +#include "utils/s2n_safety.h" +#include "tls/s2n_kem.h" + +#include <stdlib.h> + +/************************************************* +* Name: crypto_kem_keypair +* +* Description: Generates public and private key +* for CCA-secure Kyber key encapsulation mechanism +* +* Arguments: - uint8_t *pk: pointer to output public key (an already allocated array of CRYPTO_PUBLICKEYBYTES bytes) +* - uint8_t *sk: pointer to output private key (an already allocated array of CRYPTO_SECRETKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int kyber_512_r2_crypto_kem_keypair(uint8_t *pk, uint8_t *sk) { + size_t i; + PQCLEAN_KYBER512_CLEAN_indcpa_keypair(pk, sk); + for (i = 0; i < KYBER_INDCPA_PUBLICKEYBYTES; i++) { + sk[i + KYBER_INDCPA_SECRETKEYBYTES] = pk[i]; + } + hash_h(sk + KYBER_SECRETKEYBYTES - 2 * KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); + GUARD_AS_POSIX(s2n_get_random_bytes(sk + KYBER_SECRETKEYBYTES - KYBER_SYMBYTES, KYBER_SYMBYTES)); /* Value z for pseudo-random output on reject */ + return 0; +} + +/************************************************* +* Name: crypto_kem_enc +* +* Description: Generates cipher text and shared +* secret for given public key +* +* Arguments: - uint8_t *ct: pointer to output cipher text (an already allocated array of CRYPTO_CIPHERTEXTBYTES bytes) +* - uint8_t *ss: pointer to output shared secret (an already allocated array of CRYPTO_BYTES bytes) +* - const uint8_t *pk: pointer to input public key (an already allocated array of CRYPTO_PUBLICKEYBYTES bytes) +* +* Returns 0 (success) +**************************************************/ +int kyber_512_r2_crypto_kem_enc(uint8_t *ct, uint8_t *ss, const uint8_t *pk) { + uint8_t kr[2 * KYBER_SYMBYTES]; /* Will contain key, coins */ + uint8_t buf[2 * KYBER_SYMBYTES]; + + GUARD_AS_POSIX(s2n_get_random_bytes(buf, KYBER_SYMBYTES)); + hash_h(buf, buf, KYBER_SYMBYTES); /* Don't release system RNG output */ + + hash_h(buf + KYBER_SYMBYTES, pk, KYBER_PUBLICKEYBYTES); /* Multitarget countermeasure for coins + contributory KEM */ + hash_g(kr, buf, 2 * KYBER_SYMBYTES); + + PQCLEAN_KYBER512_CLEAN_indcpa_enc(ct, buf, pk, kr + KYBER_SYMBYTES); /* coins are in kr+KYBER_SYMBYTES */ + + hash_h(kr + KYBER_SYMBYTES, ct, KYBER_CIPHERTEXTBYTES); /* overwrite coins in kr with H(c) */ + kdf(ss, kr, 2 * KYBER_SYMBYTES); /* hash concatenation of pre-k and H(c) to k */ + return 0; +} + +/************************************************* +* Name: crypto_kem_dec +* +* Description: Generates shared secret for given +* cipher text and private key +* +* Arguments: - uint8_t *ss: pointer to output shared secret (an already allocated array of CRYPTO_BYTES bytes) +* - const uint8_t *ct: pointer to input cipher text (an already allocated array of CRYPTO_CIPHERTEXTBYTES bytes) +* - const uint8_t *sk: pointer to input private key (an already allocated array of CRYPTO_SECRETKEYBYTES bytes) +* +* Returns 0. +* +* On failure, ss will contain a pseudo-random value. +**************************************************/ +int kyber_512_r2_crypto_kem_dec(uint8_t *ss, const uint8_t *ct, const uint8_t *sk) { + size_t i; + uint8_t fail; + uint8_t cmp[KYBER_CIPHERTEXTBYTES]; + uint8_t buf[2 * KYBER_SYMBYTES]; + uint8_t kr[2 * KYBER_SYMBYTES]; /* Will contain key, coins */ + const uint8_t *pk = sk + KYBER_INDCPA_SECRETKEYBYTES; + + PQCLEAN_KYBER512_CLEAN_indcpa_dec(buf, ct, sk); + + for (i = 0; i < KYBER_SYMBYTES; i++) { /* Multitarget countermeasure for coins + contributory KEM */ + buf[KYBER_SYMBYTES + i] = sk[KYBER_SECRETKEYBYTES - 2 * KYBER_SYMBYTES + i]; /* Save hash by storing H(pk) in sk */ + } + hash_g(kr, buf, 2 * KYBER_SYMBYTES); + + PQCLEAN_KYBER512_CLEAN_indcpa_enc(cmp, buf, pk, kr + KYBER_SYMBYTES); /* coins are in kr+KYBER_SYMBYTES */ + + fail = PQCLEAN_KYBER512_CLEAN_verify(ct, cmp, KYBER_CIPHERTEXTBYTES); + + hash_h(kr + KYBER_SYMBYTES, ct, KYBER_CIPHERTEXTBYTES); /* overwrite coins in kr with H(c) */ + + PQCLEAN_KYBER512_CLEAN_cmov(kr, sk + KYBER_SECRETKEYBYTES - KYBER_SYMBYTES, KYBER_SYMBYTES, fail); /* Overwrite pre-k with z on re-encryption failure */ + + kdf(ss, kr, 2 * KYBER_SYMBYTES); /* hash concatenation of pre-k and H(c) to k */ + return 0; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/ntt.c b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/ntt.c index 444664b3e2..68834a255f 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/ntt.c +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/ntt.c @@ -1,155 +1,155 @@ -#include "ntt.h" -#include "params.h" -#include "reduce.h" - -#include <stddef.h> -#include <stdint.h> - -/* Code to generate zetas and zetas_inv used in the number-theoretic transform: - -#define KYBER_ROOT_OF_UNITY 17 - -static const uint16_t tree[128] = { - 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120, - 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124, - 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122, - 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126, - 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121, - 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125, - 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123, - 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127}; - - -static int16_t fqmul(int16_t a, int16_t b) { - return montgomery_reduce((int32_t)a*b); -} - -void init_ntt() { - unsigned int i, j, k; - int16_t tmp[128]; - - tmp[0] = MONT; - for(i = 1; i < 128; ++i) - tmp[i] = fqmul(tmp[i-1], KYBER_ROOT_OF_UNITY*MONT % KYBER_Q); - - for(i = 0; i < 128; ++i) - zetas[i] = tmp[tree[i]]; - - k = 0; - for(i = 64; i >= 1; i >>= 1) - for(j = i; j < 2*i; ++j) - zetas_inv[k++] = -tmp[128 - tree[j]]; - - zetas_inv[127] = MONT * (MONT * (KYBER_Q - 1) * ((KYBER_Q - 1)/128) % KYBER_Q) % KYBER_Q; -} - -*/ -const int16_t PQCLEAN_KYBER512_CLEAN_zetas[128] = { - 2285, 2571, 2970, 1812, 1493, 1422, 287, 202, 3158, 622, 1577, 182, 962, 2127, 1855, 1468, - 573, 2004, 264, 383, 2500, 1458, 1727, 3199, 2648, 1017, 732, 608, 1787, 411, 3124, 1758, - 1223, 652, 2777, 1015, 2036, 1491, 3047, 1785, 516, 3321, 3009, 2663, 1711, 2167, 126, 1469, - 2476, 3239, 3058, 830, 107, 1908, 3082, 2378, 2931, 961, 1821, 2604, 448, 2264, 677, 2054, - 2226, 430, 555, 843, 2078, 871, 1550, 105, 422, 587, 177, 3094, 3038, 2869, 1574, 1653, - 3083, 778, 1159, 3182, 2552, 1483, 2727, 1119, 1739, 644, 2457, 349, 418, 329, 3173, 3254, - 817, 1097, 603, 610, 1322, 2044, 1864, 384, 2114, 3193, 1218, 1994, 2455, 220, 2142, 1670, - 2144, 1799, 2051, 794, 1819, 2475, 2459, 478, 3221, 3021, 996, 991, 958, 1869, 1522, 1628 -}; - -const int16_t PQCLEAN_KYBER512_CLEAN_zetas_inv[128] = { - 1701, 1807, 1460, 2371, 2338, 2333, 308, 108, 2851, 870, 854, 1510, 2535, 1278, 1530, 1185, - 1659, 1187, 3109, 874, 1335, 2111, 136, 1215, 2945, 1465, 1285, 2007, 2719, 2726, 2232, 2512, - 75, 156, 3000, 2911, 2980, 872, 2685, 1590, 2210, 602, 1846, 777, 147, 2170, 2551, 246, - 1676, 1755, 460, 291, 235, 3152, 2742, 2907, 3224, 1779, 2458, 1251, 2486, 2774, 2899, 1103, - 1275, 2652, 1065, 2881, 725, 1508, 2368, 398, 951, 247, 1421, 3222, 2499, 271, 90, 853, - 1860, 3203, 1162, 1618, 666, 320, 8, 2813, 1544, 282, 1838, 1293, 2314, 552, 2677, 2106, - 1571, 205, 2918, 1542, 2721, 2597, 2312, 681, 130, 1602, 1871, 829, 2946, 3065, 1325, 2756, - 1861, 1474, 1202, 2367, 3147, 1752, 2707, 171, 3127, 3042, 1907, 1836, 1517, 359, 758, 1441 -}; - - -/************************************************* -* Name: fqmul -* -* Description: Multiplication followed by Montgomery reduction -* -* Arguments: - int16_t a: first factor -* - int16_t b: second factor -* -* Returns 16-bit integer congruent to a*b*R^{-1} mod q -**************************************************/ -static int16_t fqmul(int16_t a, int16_t b) { - return PQCLEAN_KYBER512_CLEAN_montgomery_reduce((int32_t)a * b); -} - -/************************************************* -* Name: ntt -* -* Description: Inplace number-theoretic transform (NTT) in Rq -* input is in standard order, output is in bitreversed order -* -* Arguments: - int16_t poly[256]: pointer to input/output vector of elements of Zq -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_ntt(int16_t poly[256]) { - size_t j, k = 1; - int16_t t, zeta; - - for (size_t len = 128; len >= 2; len >>= 1) { - for (size_t start = 0; start < 256; start = j + len) { - zeta = PQCLEAN_KYBER512_CLEAN_zetas[k++]; - for (j = start; j < start + len; ++j) { - t = fqmul(zeta, poly[j + len]); - poly[j + len] = poly[j] - t; - poly[j] = poly[j] + t; - } - } - } -} - -/************************************************* -* Name: invntt -* -* Description: Inplace inverse number-theoretic transform in Rq -* input is in bitreversed order, output is in standard order -* -* Arguments: - int16_t poly[256]: pointer to input/output vector of elements of Zq -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_invntt(int16_t poly[256]) { - size_t j, k = 0; - int16_t t, zeta; - - for (size_t len = 2; len <= 128; len <<= 1) { - for (size_t start = 0; start < 256; start = j + len) { - zeta = PQCLEAN_KYBER512_CLEAN_zetas_inv[k++]; - for (j = start; j < start + len; ++j) { - t = poly[j]; - poly[j] = PQCLEAN_KYBER512_CLEAN_barrett_reduce(t + poly[j + len]); - poly[j + len] = t - poly[j + len]; - poly[j + len] = fqmul(zeta, poly[j + len]); - } - } - } - - for (j = 0; j < 256; ++j) { - poly[j] = fqmul(poly[j], PQCLEAN_KYBER512_CLEAN_zetas_inv[127]); - } -} - -/************************************************* -* Name: basemul -* -* Description: Multiplication of polynomials in Zq[X]/((X^2-zeta)) -* used for multiplication of elements in Rq in NTT domain -* -* Arguments: - int16_t r[2]: pointer to the output polynomial -* - const int16_t a[2]: pointer to the first factor -* - const int16_t b[2]: pointer to the second factor -* - int16_t zeta: integer defining the reduction polynomial -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta) { - r[0] = fqmul(a[1], b[1]); - r[0] = fqmul(r[0], zeta); - r[0] += fqmul(a[0], b[0]); - - r[1] = fqmul(a[0], b[1]); - r[1] += fqmul(a[1], b[0]); -} +#include "ntt.h" +#include "params.h" +#include "reduce.h" + +#include <stddef.h> +#include <stdint.h> + +/* Code to generate zetas and zetas_inv used in the number-theoretic transform: + +#define KYBER_ROOT_OF_UNITY 17 + +static const uint16_t tree[128] = { + 0, 64, 32, 96, 16, 80, 48, 112, 8, 72, 40, 104, 24, 88, 56, 120, + 4, 68, 36, 100, 20, 84, 52, 116, 12, 76, 44, 108, 28, 92, 60, 124, + 2, 66, 34, 98, 18, 82, 50, 114, 10, 74, 42, 106, 26, 90, 58, 122, + 6, 70, 38, 102, 22, 86, 54, 118, 14, 78, 46, 110, 30, 94, 62, 126, + 1, 65, 33, 97, 17, 81, 49, 113, 9, 73, 41, 105, 25, 89, 57, 121, + 5, 69, 37, 101, 21, 85, 53, 117, 13, 77, 45, 109, 29, 93, 61, 125, + 3, 67, 35, 99, 19, 83, 51, 115, 11, 75, 43, 107, 27, 91, 59, 123, + 7, 71, 39, 103, 23, 87, 55, 119, 15, 79, 47, 111, 31, 95, 63, 127}; + + +static int16_t fqmul(int16_t a, int16_t b) { + return montgomery_reduce((int32_t)a*b); +} + +void init_ntt() { + unsigned int i, j, k; + int16_t tmp[128]; + + tmp[0] = MONT; + for(i = 1; i < 128; ++i) + tmp[i] = fqmul(tmp[i-1], KYBER_ROOT_OF_UNITY*MONT % KYBER_Q); + + for(i = 0; i < 128; ++i) + zetas[i] = tmp[tree[i]]; + + k = 0; + for(i = 64; i >= 1; i >>= 1) + for(j = i; j < 2*i; ++j) + zetas_inv[k++] = -tmp[128 - tree[j]]; + + zetas_inv[127] = MONT * (MONT * (KYBER_Q - 1) * ((KYBER_Q - 1)/128) % KYBER_Q) % KYBER_Q; +} + +*/ +const int16_t PQCLEAN_KYBER512_CLEAN_zetas[128] = { + 2285, 2571, 2970, 1812, 1493, 1422, 287, 202, 3158, 622, 1577, 182, 962, 2127, 1855, 1468, + 573, 2004, 264, 383, 2500, 1458, 1727, 3199, 2648, 1017, 732, 608, 1787, 411, 3124, 1758, + 1223, 652, 2777, 1015, 2036, 1491, 3047, 1785, 516, 3321, 3009, 2663, 1711, 2167, 126, 1469, + 2476, 3239, 3058, 830, 107, 1908, 3082, 2378, 2931, 961, 1821, 2604, 448, 2264, 677, 2054, + 2226, 430, 555, 843, 2078, 871, 1550, 105, 422, 587, 177, 3094, 3038, 2869, 1574, 1653, + 3083, 778, 1159, 3182, 2552, 1483, 2727, 1119, 1739, 644, 2457, 349, 418, 329, 3173, 3254, + 817, 1097, 603, 610, 1322, 2044, 1864, 384, 2114, 3193, 1218, 1994, 2455, 220, 2142, 1670, + 2144, 1799, 2051, 794, 1819, 2475, 2459, 478, 3221, 3021, 996, 991, 958, 1869, 1522, 1628 +}; + +const int16_t PQCLEAN_KYBER512_CLEAN_zetas_inv[128] = { + 1701, 1807, 1460, 2371, 2338, 2333, 308, 108, 2851, 870, 854, 1510, 2535, 1278, 1530, 1185, + 1659, 1187, 3109, 874, 1335, 2111, 136, 1215, 2945, 1465, 1285, 2007, 2719, 2726, 2232, 2512, + 75, 156, 3000, 2911, 2980, 872, 2685, 1590, 2210, 602, 1846, 777, 147, 2170, 2551, 246, + 1676, 1755, 460, 291, 235, 3152, 2742, 2907, 3224, 1779, 2458, 1251, 2486, 2774, 2899, 1103, + 1275, 2652, 1065, 2881, 725, 1508, 2368, 398, 951, 247, 1421, 3222, 2499, 271, 90, 853, + 1860, 3203, 1162, 1618, 666, 320, 8, 2813, 1544, 282, 1838, 1293, 2314, 552, 2677, 2106, + 1571, 205, 2918, 1542, 2721, 2597, 2312, 681, 130, 1602, 1871, 829, 2946, 3065, 1325, 2756, + 1861, 1474, 1202, 2367, 3147, 1752, 2707, 171, 3127, 3042, 1907, 1836, 1517, 359, 758, 1441 +}; + + +/************************************************* +* Name: fqmul +* +* Description: Multiplication followed by Montgomery reduction +* +* Arguments: - int16_t a: first factor +* - int16_t b: second factor +* +* Returns 16-bit integer congruent to a*b*R^{-1} mod q +**************************************************/ +static int16_t fqmul(int16_t a, int16_t b) { + return PQCLEAN_KYBER512_CLEAN_montgomery_reduce((int32_t)a * b); +} + +/************************************************* +* Name: ntt +* +* Description: Inplace number-theoretic transform (NTT) in Rq +* input is in standard order, output is in bitreversed order +* +* Arguments: - int16_t poly[256]: pointer to input/output vector of elements of Zq +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_ntt(int16_t poly[256]) { + size_t j, k = 1; + int16_t t, zeta; + + for (size_t len = 128; len >= 2; len >>= 1) { + for (size_t start = 0; start < 256; start = j + len) { + zeta = PQCLEAN_KYBER512_CLEAN_zetas[k++]; + for (j = start; j < start + len; ++j) { + t = fqmul(zeta, poly[j + len]); + poly[j + len] = poly[j] - t; + poly[j] = poly[j] + t; + } + } + } +} + +/************************************************* +* Name: invntt +* +* Description: Inplace inverse number-theoretic transform in Rq +* input is in bitreversed order, output is in standard order +* +* Arguments: - int16_t poly[256]: pointer to input/output vector of elements of Zq +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_invntt(int16_t poly[256]) { + size_t j, k = 0; + int16_t t, zeta; + + for (size_t len = 2; len <= 128; len <<= 1) { + for (size_t start = 0; start < 256; start = j + len) { + zeta = PQCLEAN_KYBER512_CLEAN_zetas_inv[k++]; + for (j = start; j < start + len; ++j) { + t = poly[j]; + poly[j] = PQCLEAN_KYBER512_CLEAN_barrett_reduce(t + poly[j + len]); + poly[j + len] = t - poly[j + len]; + poly[j + len] = fqmul(zeta, poly[j + len]); + } + } + } + + for (j = 0; j < 256; ++j) { + poly[j] = fqmul(poly[j], PQCLEAN_KYBER512_CLEAN_zetas_inv[127]); + } +} + +/************************************************* +* Name: basemul +* +* Description: Multiplication of polynomials in Zq[X]/((X^2-zeta)) +* used for multiplication of elements in Rq in NTT domain +* +* Arguments: - int16_t r[2]: pointer to the output polynomial +* - const int16_t a[2]: pointer to the first factor +* - const int16_t b[2]: pointer to the second factor +* - int16_t zeta: integer defining the reduction polynomial +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta) { + r[0] = fqmul(a[1], b[1]); + r[0] = fqmul(r[0], zeta); + r[0] += fqmul(a[0], b[0]); + + r[1] = fqmul(a[0], b[1]); + r[1] += fqmul(a[1], b[0]); +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/ntt.h b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/ntt.h index 13e976f7d0..bc373f0a5e 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/ntt.h +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/ntt.h @@ -1,13 +1,13 @@ -#ifndef NTT_H -#define NTT_H - -#include <stdint.h> - -extern const int16_t PQCLEAN_KYBER512_CLEAN_zetas[128]; -extern const int16_t PQCLEAN_KYBER512_CLEAN_zetasinv[128]; - -void PQCLEAN_KYBER512_CLEAN_ntt(int16_t *poly); -void PQCLEAN_KYBER512_CLEAN_invntt(int16_t *poly); -void PQCLEAN_KYBER512_CLEAN_basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta); - -#endif +#ifndef NTT_H +#define NTT_H + +#include <stdint.h> + +extern const int16_t PQCLEAN_KYBER512_CLEAN_zetas[128]; +extern const int16_t PQCLEAN_KYBER512_CLEAN_zetasinv[128]; + +void PQCLEAN_KYBER512_CLEAN_ntt(int16_t *poly); +void PQCLEAN_KYBER512_CLEAN_invntt(int16_t *poly); +void PQCLEAN_KYBER512_CLEAN_basemul(int16_t r[2], const int16_t a[2], const int16_t b[2], int16_t zeta); + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/params.h b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/params.h index d086d4c694..a0ff58a397 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/params.h +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/params.h @@ -1,32 +1,32 @@ -#ifndef PARAMS_H -#define PARAMS_H - - -/* Don't change parameters below this line */ - -#define KYBER_N 256 -#define KYBER_Q 3329 - -#define KYBER_ETA 2 - -#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ -#define KYBER_SSBYTES 32 /* size in bytes of shared key */ - -#define KYBER_POLYBYTES 384 -#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES) - - -#define KYBER_K 2 -#define KYBER_POLYCOMPRESSEDBYTES 96 -#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) - -#define KYBER_INDCPA_MSGBYTES KYBER_SYMBYTES -#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES) -#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) -#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) - -#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) -#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES) /* 32 bytes of additional space to save H(pk) */ -#define KYBER_CIPHERTEXTBYTES KYBER_INDCPA_BYTES - -#endif +#ifndef PARAMS_H +#define PARAMS_H + + +/* Don't change parameters below this line */ + +#define KYBER_N 256 +#define KYBER_Q 3329 + +#define KYBER_ETA 2 + +#define KYBER_SYMBYTES 32 /* size in bytes of hashes, and seeds */ +#define KYBER_SSBYTES 32 /* size in bytes of shared key */ + +#define KYBER_POLYBYTES 384 +#define KYBER_POLYVECBYTES (KYBER_K * KYBER_POLYBYTES) + + +#define KYBER_K 2 +#define KYBER_POLYCOMPRESSEDBYTES 96 +#define KYBER_POLYVECCOMPRESSEDBYTES (KYBER_K * 320) + +#define KYBER_INDCPA_MSGBYTES KYBER_SYMBYTES +#define KYBER_INDCPA_PUBLICKEYBYTES (KYBER_POLYVECBYTES + KYBER_SYMBYTES) +#define KYBER_INDCPA_SECRETKEYBYTES (KYBER_POLYVECBYTES) +#define KYBER_INDCPA_BYTES (KYBER_POLYVECCOMPRESSEDBYTES + KYBER_POLYCOMPRESSEDBYTES) + +#define KYBER_PUBLICKEYBYTES (KYBER_INDCPA_PUBLICKEYBYTES) +#define KYBER_SECRETKEYBYTES (KYBER_INDCPA_SECRETKEYBYTES + KYBER_INDCPA_PUBLICKEYBYTES + 2*KYBER_SYMBYTES) /* 32 bytes of additional space to save H(pk) */ +#define KYBER_CIPHERTEXTBYTES KYBER_INDCPA_BYTES + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/poly.c b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/poly.c index 694b4e9942..ae1ae7c719 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/poly.c +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/poly.c @@ -1,277 +1,277 @@ -#include "cbd.h" -#include "ntt.h" -#include "params.h" -#include "poly.h" -#include "reduce.h" -#include "symmetric.h" - -#include <stdint.h> -/************************************************* -* Name: poly_compress -* -* Description: Compression and subsequent serialization of a polynomial -* -* Arguments: - uint8_t *r: pointer to output byte array (needs space for KYBER_POLYCOMPRESSEDBYTES bytes) -* - const poly *a: pointer to input polynomial -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_poly_compress(uint8_t *r, poly *a) { - uint8_t t[8]; - size_t k = 0; - - PQCLEAN_KYBER512_CLEAN_poly_csubq(a); - - for (size_t i = 0; i < KYBER_N; i += 8) { - for (size_t j = 0; j < 8; j++) { - t[j] = ((((uint32_t)a->coeffs[i + j] << 3) + KYBER_Q / 2) / KYBER_Q) & 7; - } - - r[k] = (uint8_t)( t[0] | (t[1] << 3) | (t[2] << 6)); - r[k + 1] = (uint8_t)((t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7)); - r[k + 2] = (uint8_t)((t[5] >> 1) | (t[6] << 2) | (t[7] << 5)); - k += 3; - } -} - -/************************************************* -* Name: poly_decompress -* -* Description: De-serialization and subsequent decompression of a polynomial; -* approximate inverse of poly_compress -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *a: pointer to input byte array (of length KYBER_POLYCOMPRESSEDBYTES bytes) -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_poly_decompress(poly *r, const uint8_t *a) { - for (size_t i = 0; i < KYBER_N; i += 8) { - r->coeffs[i + 0] = (int16_t)( (((a[0] & 7) * KYBER_Q) + 4) >> 3); - r->coeffs[i + 1] = (int16_t)(((((a[0] >> 3) & 7) * KYBER_Q) + 4) >> 3); - r->coeffs[i + 2] = (int16_t)(((((a[0] >> 6) | ((a[1] << 2) & 4)) * KYBER_Q) + 4) >> 3); - r->coeffs[i + 3] = (int16_t)(((((a[1] >> 1) & 7) * KYBER_Q) + 4) >> 3); - r->coeffs[i + 4] = (int16_t)(((((a[1] >> 4) & 7) * KYBER_Q) + 4) >> 3); - r->coeffs[i + 5] = (int16_t)(((((a[1] >> 7) | ((a[2] << 1) & 6)) * KYBER_Q) + 4) >> 3); - r->coeffs[i + 6] = (int16_t)(((((a[2] >> 2) & 7) * KYBER_Q) + 4) >> 3); - r->coeffs[i + 7] = (int16_t)(((((a[2] >> 5)) * KYBER_Q) + 4) >> 3); - a += 3; - } -} - -/************************************************* -* Name: poly_tobytes -* -* Description: Serialization of a polynomial -* -* Arguments: - uint8_t *r: pointer to output byte array (needs space for KYBER_POLYBYTES bytes) -* - const poly *a: pointer to input polynomial -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_poly_tobytes(uint8_t *r, poly *a) { - PQCLEAN_KYBER512_CLEAN_poly_csubq(a); - - for (size_t i = 0; i < KYBER_N / 2; i++) { - int16_t t0 = a->coeffs[2 * i]; - int16_t t1 = a->coeffs[2 * i + 1]; - r[3 * i] = t0 & 0xff; - r[3 * i + 1] = (uint8_t)((t0 >> 8) | ((t1 & 0xf) << 4)); - r[3 * i + 2] = (uint8_t)(t1 >> 4); - } -} - -/************************************************* -* Name: poly_frombytes -* -* Description: De-serialization of a polynomial; -* inverse of poly_tobytes -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *a: pointer to input byte array (of KYBER_POLYBYTES bytes) -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_poly_frombytes(poly *r, const uint8_t *a) { - for (size_t i = 0; i < KYBER_N / 2; i++) { - r->coeffs[2 * i] = (int16_t)(a[3 * i] | ((uint16_t)a[3 * i + 1] & 0x0f) << 8); - r->coeffs[2 * i + 1] = (int16_t)(a[3 * i + 1] >> 4 | ((uint16_t)a[3 * i + 2] & 0xff) << 4); - } -} - -/************************************************* -* Name: poly_getnoise -* -* Description: Sample a polynomial deterministically from a seed and a nonce, -* with output polynomial close to centered binomial distribution -* with parameter KYBER_ETA -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *seed: pointer to input seed (pointing to array of length KYBER_SYMBYTES bytes) -* - uint8_t nonce: one-byte input nonce -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_poly_getnoise(poly *r, const uint8_t *seed, uint8_t nonce) { - uint8_t buf[KYBER_ETA * KYBER_N / 4]; - - prf(buf, KYBER_ETA * KYBER_N / 4, seed, nonce); - PQCLEAN_KYBER512_CLEAN_cbd(r, buf); -} - -/************************************************* -* Name: poly_ntt -* -* Description: Computes negacyclic number-theoretic transform (NTT) of -* a polynomial in place; -* inputs assumed to be in normal order, output in bitreversed order -* -* Arguments: - uint16_t *r: pointer to in/output polynomial -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_poly_ntt(poly *r) { - PQCLEAN_KYBER512_CLEAN_ntt(r->coeffs); - PQCLEAN_KYBER512_CLEAN_poly_reduce(r); -} - -/************************************************* -* Name: poly_invntt -* -* Description: Computes inverse of negacyclic number-theoretic transform (NTT) of -* a polynomial in place; -* inputs assumed to be in bitreversed order, output in normal order -* -* Arguments: - uint16_t *a: pointer to in/output polynomial -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_poly_invntt(poly *r) { - PQCLEAN_KYBER512_CLEAN_invntt(r->coeffs); -} - -/************************************************* -* Name: poly_basemul -* -* Description: Multiplication of two polynomials in NTT domain -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_poly_basemul(poly *r, const poly *a, const poly *b) { - for (size_t i = 0; i < KYBER_N / 4; ++i) { - PQCLEAN_KYBER512_CLEAN_basemul( - r->coeffs + 4 * i, - a->coeffs + 4 * i, - b->coeffs + 4 * i, - PQCLEAN_KYBER512_CLEAN_zetas[64 + i]); - PQCLEAN_KYBER512_CLEAN_basemul( - r->coeffs + 4 * i + 2, - a->coeffs + 4 * i + 2, - b->coeffs + 4 * i + 2, - -PQCLEAN_KYBER512_CLEAN_zetas[64 + i]); - } -} - -/************************************************* -* Name: poly_frommont -* -* Description: Inplace conversion of all coefficients of a polynomial -* from Montgomery domain to normal domain -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_poly_frommont(poly *r) { - const int16_t f = (1ULL << 32) % KYBER_Q; - - for (size_t i = 0; i < KYBER_N; i++) { - r->coeffs[i] = PQCLEAN_KYBER512_CLEAN_montgomery_reduce( - (int32_t)r->coeffs[i] * f); - } -} - -/************************************************* -* Name: poly_reduce -* -* Description: Applies Barrett reduction to all coefficients of a polynomial -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_poly_reduce(poly *r) { - for (size_t i = 0; i < KYBER_N; i++) { - r->coeffs[i] = PQCLEAN_KYBER512_CLEAN_barrett_reduce(r->coeffs[i]); - } -} - -/************************************************* -* Name: poly_csubq -* -* Description: Applies conditional subtraction of q to each coefficient of a polynomial -* for details of conditional subtraction of q see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_poly_csubq(poly *r) { - for (size_t i = 0; i < KYBER_N; i++) { - r->coeffs[i] = PQCLEAN_KYBER512_CLEAN_csubq(r->coeffs[i]); - } -} - -/************************************************* -* Name: poly_add -* -* Description: Add two polynomials -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_poly_add(poly *r, const poly *a, const poly *b) { - for (size_t i = 0; i < KYBER_N; i++) { - r->coeffs[i] = a->coeffs[i] + b->coeffs[i]; - } -} - -/************************************************* -* Name: poly_sub -* -* Description: Subtract two polynomials -* -* Arguments: - poly *r: pointer to output polynomial -* - const poly *a: pointer to first input polynomial -* - const poly *b: pointer to second input polynomial -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_poly_sub(poly *r, const poly *a, const poly *b) { - for (size_t i = 0; i < KYBER_N; i++) { - r->coeffs[i] = a->coeffs[i] - b->coeffs[i]; - } -} - -/************************************************* -* Name: poly_frommsg -* -* Description: Convert 32-byte message to polynomial -* -* Arguments: - poly *r: pointer to output polynomial -* - const uint8_t *msg: pointer to input message -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_poly_frommsg(poly *r, const uint8_t msg[KYBER_SYMBYTES]) { - uint16_t mask; - - for (size_t i = 0; i < KYBER_SYMBYTES; i++) { - for (size_t j = 0; j < 8; j++) { - mask = -((msg[i] >> j) & 1); - r->coeffs[8 * i + j] = mask & ((KYBER_Q + 1) / 2); - } - } -} - -/************************************************* -* Name: poly_tomsg -* -* Description: Convert polynomial to 32-byte message -* -* Arguments: - uint8_t *msg: pointer to output message -* - const poly *a: pointer to input polynomial -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_poly_tomsg(uint8_t msg[KYBER_SYMBYTES], poly *a) { - uint16_t t; - - PQCLEAN_KYBER512_CLEAN_poly_csubq(a); - - for (size_t i = 0; i < KYBER_SYMBYTES; i++) { - msg[i] = 0; - for (size_t j = 0; j < 8; j++) { - t = (((a->coeffs[8 * i + j] << 1) + KYBER_Q / 2) / KYBER_Q) & 1; - msg[i] |= t << j; - } - } -} +#include "cbd.h" +#include "ntt.h" +#include "params.h" +#include "poly.h" +#include "reduce.h" +#include "symmetric.h" + +#include <stdint.h> +/************************************************* +* Name: poly_compress +* +* Description: Compression and subsequent serialization of a polynomial +* +* Arguments: - uint8_t *r: pointer to output byte array (needs space for KYBER_POLYCOMPRESSEDBYTES bytes) +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_poly_compress(uint8_t *r, poly *a) { + uint8_t t[8]; + size_t k = 0; + + PQCLEAN_KYBER512_CLEAN_poly_csubq(a); + + for (size_t i = 0; i < KYBER_N; i += 8) { + for (size_t j = 0; j < 8; j++) { + t[j] = ((((uint32_t)a->coeffs[i + j] << 3) + KYBER_Q / 2) / KYBER_Q) & 7; + } + + r[k] = (uint8_t)( t[0] | (t[1] << 3) | (t[2] << 6)); + r[k + 1] = (uint8_t)((t[2] >> 2) | (t[3] << 1) | (t[4] << 4) | (t[5] << 7)); + r[k + 2] = (uint8_t)((t[5] >> 1) | (t[6] << 2) | (t[7] << 5)); + k += 3; + } +} + +/************************************************* +* Name: poly_decompress +* +* Description: De-serialization and subsequent decompression of a polynomial; +* approximate inverse of poly_compress +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: pointer to input byte array (of length KYBER_POLYCOMPRESSEDBYTES bytes) +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_poly_decompress(poly *r, const uint8_t *a) { + for (size_t i = 0; i < KYBER_N; i += 8) { + r->coeffs[i + 0] = (int16_t)( (((a[0] & 7) * KYBER_Q) + 4) >> 3); + r->coeffs[i + 1] = (int16_t)(((((a[0] >> 3) & 7) * KYBER_Q) + 4) >> 3); + r->coeffs[i + 2] = (int16_t)(((((a[0] >> 6) | ((a[1] << 2) & 4)) * KYBER_Q) + 4) >> 3); + r->coeffs[i + 3] = (int16_t)(((((a[1] >> 1) & 7) * KYBER_Q) + 4) >> 3); + r->coeffs[i + 4] = (int16_t)(((((a[1] >> 4) & 7) * KYBER_Q) + 4) >> 3); + r->coeffs[i + 5] = (int16_t)(((((a[1] >> 7) | ((a[2] << 1) & 6)) * KYBER_Q) + 4) >> 3); + r->coeffs[i + 6] = (int16_t)(((((a[2] >> 2) & 7) * KYBER_Q) + 4) >> 3); + r->coeffs[i + 7] = (int16_t)(((((a[2] >> 5)) * KYBER_Q) + 4) >> 3); + a += 3; + } +} + +/************************************************* +* Name: poly_tobytes +* +* Description: Serialization of a polynomial +* +* Arguments: - uint8_t *r: pointer to output byte array (needs space for KYBER_POLYBYTES bytes) +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_poly_tobytes(uint8_t *r, poly *a) { + PQCLEAN_KYBER512_CLEAN_poly_csubq(a); + + for (size_t i = 0; i < KYBER_N / 2; i++) { + int16_t t0 = a->coeffs[2 * i]; + int16_t t1 = a->coeffs[2 * i + 1]; + r[3 * i] = t0 & 0xff; + r[3 * i + 1] = (uint8_t)((t0 >> 8) | ((t1 & 0xf) << 4)); + r[3 * i + 2] = (uint8_t)(t1 >> 4); + } +} + +/************************************************* +* Name: poly_frombytes +* +* Description: De-serialization of a polynomial; +* inverse of poly_tobytes +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *a: pointer to input byte array (of KYBER_POLYBYTES bytes) +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_poly_frombytes(poly *r, const uint8_t *a) { + for (size_t i = 0; i < KYBER_N / 2; i++) { + r->coeffs[2 * i] = (int16_t)(a[3 * i] | ((uint16_t)a[3 * i + 1] & 0x0f) << 8); + r->coeffs[2 * i + 1] = (int16_t)(a[3 * i + 1] >> 4 | ((uint16_t)a[3 * i + 2] & 0xff) << 4); + } +} + +/************************************************* +* Name: poly_getnoise +* +* Description: Sample a polynomial deterministically from a seed and a nonce, +* with output polynomial close to centered binomial distribution +* with parameter KYBER_ETA +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *seed: pointer to input seed (pointing to array of length KYBER_SYMBYTES bytes) +* - uint8_t nonce: one-byte input nonce +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_poly_getnoise(poly *r, const uint8_t *seed, uint8_t nonce) { + uint8_t buf[KYBER_ETA * KYBER_N / 4]; + + prf(buf, KYBER_ETA * KYBER_N / 4, seed, nonce); + PQCLEAN_KYBER512_CLEAN_cbd(r, buf); +} + +/************************************************* +* Name: poly_ntt +* +* Description: Computes negacyclic number-theoretic transform (NTT) of +* a polynomial in place; +* inputs assumed to be in normal order, output in bitreversed order +* +* Arguments: - uint16_t *r: pointer to in/output polynomial +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_poly_ntt(poly *r) { + PQCLEAN_KYBER512_CLEAN_ntt(r->coeffs); + PQCLEAN_KYBER512_CLEAN_poly_reduce(r); +} + +/************************************************* +* Name: poly_invntt +* +* Description: Computes inverse of negacyclic number-theoretic transform (NTT) of +* a polynomial in place; +* inputs assumed to be in bitreversed order, output in normal order +* +* Arguments: - uint16_t *a: pointer to in/output polynomial +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_poly_invntt(poly *r) { + PQCLEAN_KYBER512_CLEAN_invntt(r->coeffs); +} + +/************************************************* +* Name: poly_basemul +* +* Description: Multiplication of two polynomials in NTT domain +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_poly_basemul(poly *r, const poly *a, const poly *b) { + for (size_t i = 0; i < KYBER_N / 4; ++i) { + PQCLEAN_KYBER512_CLEAN_basemul( + r->coeffs + 4 * i, + a->coeffs + 4 * i, + b->coeffs + 4 * i, + PQCLEAN_KYBER512_CLEAN_zetas[64 + i]); + PQCLEAN_KYBER512_CLEAN_basemul( + r->coeffs + 4 * i + 2, + a->coeffs + 4 * i + 2, + b->coeffs + 4 * i + 2, + -PQCLEAN_KYBER512_CLEAN_zetas[64 + i]); + } +} + +/************************************************* +* Name: poly_frommont +* +* Description: Inplace conversion of all coefficients of a polynomial +* from Montgomery domain to normal domain +* +* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_poly_frommont(poly *r) { + const int16_t f = (1ULL << 32) % KYBER_Q; + + for (size_t i = 0; i < KYBER_N; i++) { + r->coeffs[i] = PQCLEAN_KYBER512_CLEAN_montgomery_reduce( + (int32_t)r->coeffs[i] * f); + } +} + +/************************************************* +* Name: poly_reduce +* +* Description: Applies Barrett reduction to all coefficients of a polynomial +* for details of the Barrett reduction see comments in reduce.c +* +* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_poly_reduce(poly *r) { + for (size_t i = 0; i < KYBER_N; i++) { + r->coeffs[i] = PQCLEAN_KYBER512_CLEAN_barrett_reduce(r->coeffs[i]); + } +} + +/************************************************* +* Name: poly_csubq +* +* Description: Applies conditional subtraction of q to each coefficient of a polynomial +* for details of conditional subtraction of q see comments in reduce.c +* +* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_poly_csubq(poly *r) { + for (size_t i = 0; i < KYBER_N; i++) { + r->coeffs[i] = PQCLEAN_KYBER512_CLEAN_csubq(r->coeffs[i]); + } +} + +/************************************************* +* Name: poly_add +* +* Description: Add two polynomials +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_poly_add(poly *r, const poly *a, const poly *b) { + for (size_t i = 0; i < KYBER_N; i++) { + r->coeffs[i] = a->coeffs[i] + b->coeffs[i]; + } +} + +/************************************************* +* Name: poly_sub +* +* Description: Subtract two polynomials +* +* Arguments: - poly *r: pointer to output polynomial +* - const poly *a: pointer to first input polynomial +* - const poly *b: pointer to second input polynomial +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_poly_sub(poly *r, const poly *a, const poly *b) { + for (size_t i = 0; i < KYBER_N; i++) { + r->coeffs[i] = a->coeffs[i] - b->coeffs[i]; + } +} + +/************************************************* +* Name: poly_frommsg +* +* Description: Convert 32-byte message to polynomial +* +* Arguments: - poly *r: pointer to output polynomial +* - const uint8_t *msg: pointer to input message +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_poly_frommsg(poly *r, const uint8_t msg[KYBER_SYMBYTES]) { + uint16_t mask; + + for (size_t i = 0; i < KYBER_SYMBYTES; i++) { + for (size_t j = 0; j < 8; j++) { + mask = -((msg[i] >> j) & 1); + r->coeffs[8 * i + j] = mask & ((KYBER_Q + 1) / 2); + } + } +} + +/************************************************* +* Name: poly_tomsg +* +* Description: Convert polynomial to 32-byte message +* +* Arguments: - uint8_t *msg: pointer to output message +* - const poly *a: pointer to input polynomial +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_poly_tomsg(uint8_t msg[KYBER_SYMBYTES], poly *a) { + uint16_t t; + + PQCLEAN_KYBER512_CLEAN_poly_csubq(a); + + for (size_t i = 0; i < KYBER_SYMBYTES; i++) { + msg[i] = 0; + for (size_t j = 0; j < 8; j++) { + t = (((a->coeffs[8 * i + j] << 1) + KYBER_Q / 2) / KYBER_Q) & 1; + msg[i] |= t << j; + } + } +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/poly.h b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/poly.h index ecdc7c2951..90c1655e53 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/poly.h +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/poly.h @@ -1,37 +1,37 @@ -#ifndef POLY_H -#define POLY_H - -#include "params.h" - -#include <stdint.h> -/* - * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial - * coeffs[0] + X*coeffs[1] + X^2*xoeffs[2] + ... + X^{n-1}*coeffs[n-1] - */ -typedef struct { - int16_t coeffs[KYBER_N]; -} poly; - -void PQCLEAN_KYBER512_CLEAN_poly_compress(uint8_t *r, poly *a); -void PQCLEAN_KYBER512_CLEAN_poly_decompress(poly *r, const uint8_t *a); - -void PQCLEAN_KYBER512_CLEAN_poly_tobytes(uint8_t *r, poly *a); -void PQCLEAN_KYBER512_CLEAN_poly_frombytes(poly *r, const uint8_t *a); - -void PQCLEAN_KYBER512_CLEAN_poly_frommsg(poly *r, const uint8_t msg[KYBER_SYMBYTES]); -void PQCLEAN_KYBER512_CLEAN_poly_tomsg(uint8_t msg[KYBER_SYMBYTES], poly *a); - -void PQCLEAN_KYBER512_CLEAN_poly_getnoise(poly *r, const uint8_t *seed, uint8_t nonce); - -void PQCLEAN_KYBER512_CLEAN_poly_ntt(poly *r); -void PQCLEAN_KYBER512_CLEAN_poly_invntt(poly *r); -void PQCLEAN_KYBER512_CLEAN_poly_basemul(poly *r, const poly *a, const poly *b); -void PQCLEAN_KYBER512_CLEAN_poly_frommont(poly *r); - -void PQCLEAN_KYBER512_CLEAN_poly_reduce(poly *r); -void PQCLEAN_KYBER512_CLEAN_poly_csubq(poly *r); - -void PQCLEAN_KYBER512_CLEAN_poly_add(poly *r, const poly *a, const poly *b); -void PQCLEAN_KYBER512_CLEAN_poly_sub(poly *r, const poly *a, const poly *b); - -#endif +#ifndef POLY_H +#define POLY_H + +#include "params.h" + +#include <stdint.h> +/* + * Elements of R_q = Z_q[X]/(X^n + 1). Represents polynomial + * coeffs[0] + X*coeffs[1] + X^2*xoeffs[2] + ... + X^{n-1}*coeffs[n-1] + */ +typedef struct { + int16_t coeffs[KYBER_N]; +} poly; + +void PQCLEAN_KYBER512_CLEAN_poly_compress(uint8_t *r, poly *a); +void PQCLEAN_KYBER512_CLEAN_poly_decompress(poly *r, const uint8_t *a); + +void PQCLEAN_KYBER512_CLEAN_poly_tobytes(uint8_t *r, poly *a); +void PQCLEAN_KYBER512_CLEAN_poly_frombytes(poly *r, const uint8_t *a); + +void PQCLEAN_KYBER512_CLEAN_poly_frommsg(poly *r, const uint8_t msg[KYBER_SYMBYTES]); +void PQCLEAN_KYBER512_CLEAN_poly_tomsg(uint8_t msg[KYBER_SYMBYTES], poly *a); + +void PQCLEAN_KYBER512_CLEAN_poly_getnoise(poly *r, const uint8_t *seed, uint8_t nonce); + +void PQCLEAN_KYBER512_CLEAN_poly_ntt(poly *r); +void PQCLEAN_KYBER512_CLEAN_poly_invntt(poly *r); +void PQCLEAN_KYBER512_CLEAN_poly_basemul(poly *r, const poly *a, const poly *b); +void PQCLEAN_KYBER512_CLEAN_poly_frommont(poly *r); + +void PQCLEAN_KYBER512_CLEAN_poly_reduce(poly *r); +void PQCLEAN_KYBER512_CLEAN_poly_csubq(poly *r); + +void PQCLEAN_KYBER512_CLEAN_poly_add(poly *r, const poly *a, const poly *b); +void PQCLEAN_KYBER512_CLEAN_poly_sub(poly *r, const poly *a, const poly *b); + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/polyvec.c b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/polyvec.c index ab4a352a73..a98efc0502 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/polyvec.c +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/polyvec.c @@ -1,175 +1,175 @@ -#include "polyvec.h" - -#include "poly.h" - -#include <stddef.h> -#include <stdint.h> -/************************************************* -* Name: polyvec_compress -* -* Description: Compress and serialize vector of polynomials -* -* Arguments: - uint8_t *r: pointer to output byte array (needs space for KYBER_POLYVECCOMPRESSEDBYTES) -* - const polyvec *a: pointer to input vector of polynomials -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_polyvec_compress(uint8_t *r, polyvec *a) { - PQCLEAN_KYBER512_CLEAN_polyvec_csubq(a); - - uint16_t t[4]; - for (size_t i = 0; i < KYBER_K; i++) { - for (size_t j = 0; j < KYBER_N / 4; j++) { - for (size_t k = 0; k < 4; k++) { - t[k] = ((((uint32_t)a->vec[i].coeffs[4 * j + k] << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff; - } - - r[5 * j + 0] = (uint8_t)t[0]; - r[5 * j + 1] = (uint8_t)((t[0] >> 8) | ((t[1] & 0x3f) << 2)); - r[5 * j + 2] = (uint8_t)((t[1] >> 6) | ((t[2] & 0x0f) << 4)); - r[5 * j + 3] = (uint8_t)((t[2] >> 4) | ((t[3] & 0x03) << 6)); - r[5 * j + 4] = (uint8_t)((t[3] >> 2)); - } - r += 320; - } -} - -/************************************************* -* Name: polyvec_decompress -* -* Description: De-serialize and decompress vector of polynomials; -* approximate inverse of polyvec_compress -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - uint8_t *a: pointer to input byte array (of length KYBER_POLYVECCOMPRESSEDBYTES) -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_polyvec_decompress(polyvec *r, const uint8_t *a) { - for (size_t i = 0; i < KYBER_K; i++) { - for (size_t j = 0; j < KYBER_N / 4; j++) { - r->vec[i].coeffs[4 * j + 0] = (int16_t)( (((a[5 * j + 0] | (((uint32_t)a[5 * j + 1] & 0x03) << 8)) * KYBER_Q) + 512) >> 10); - r->vec[i].coeffs[4 * j + 1] = (int16_t)(((((a[5 * j + 1] >> 2) | (((uint32_t)a[5 * j + 2] & 0x0f) << 6)) * KYBER_Q) + 512) >> 10); - r->vec[i].coeffs[4 * j + 2] = (int16_t)(((((a[5 * j + 2] >> 4) | (((uint32_t)a[5 * j + 3] & 0x3f) << 4)) * KYBER_Q) + 512) >> 10); - r->vec[i].coeffs[4 * j + 3] = (int16_t)(((((a[5 * j + 3] >> 6) | (((uint32_t)a[5 * j + 4] & 0xff) << 2)) * KYBER_Q) + 512) >> 10); - } - a += 320; - } -} - -/************************************************* -* Name: polyvec_tobytes -* -* Description: Serialize vector of polynomials -* -* Arguments: - uint8_t *r: pointer to output byte array (needs space for KYBER_POLYVECBYTES) -* - const polyvec *a: pointer to input vector of polynomials -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_polyvec_tobytes(uint8_t *r, polyvec *a) { - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER512_CLEAN_poly_tobytes(r + i * KYBER_POLYBYTES, &a->vec[i]); - } -} - -/************************************************* -* Name: polyvec_frombytes -* -* Description: De-serialize vector of polynomials; -* inverse of polyvec_tobytes -* -* Arguments: - uint8_t *r: pointer to output byte array -* - const polyvec *a: pointer to input vector of polynomials (of length KYBER_POLYVECBYTES) -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_polyvec_frombytes(polyvec *r, const uint8_t *a) { - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER512_CLEAN_poly_frombytes(&r->vec[i], a + i * KYBER_POLYBYTES); - } -} - -/************************************************* -* Name: polyvec_ntt -* -* Description: Apply forward NTT to all elements of a vector of polynomials -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_polyvec_ntt(polyvec *r) { - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER512_CLEAN_poly_ntt(&r->vec[i]); - } -} - -/************************************************* -* Name: polyvec_invntt -* -* Description: Apply inverse NTT to all elements of a vector of polynomials -* -* Arguments: - polyvec *r: pointer to in/output vector of polynomials -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_polyvec_invntt(polyvec *r) { - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER512_CLEAN_poly_invntt(&r->vec[i]); - } -} - -/************************************************* -* Name: polyvec_pointwise_acc -* -* Description: Pointwise multiply elements of a and b and accumulate into r -* -* Arguments: - poly *r: pointer to output polynomial -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_polyvec_pointwise_acc(poly *r, const polyvec *a, const polyvec *b) { - poly t; - - PQCLEAN_KYBER512_CLEAN_poly_basemul(r, &a->vec[0], &b->vec[0]); - for (size_t i = 1; i < KYBER_K; i++) { - PQCLEAN_KYBER512_CLEAN_poly_basemul(&t, &a->vec[i], &b->vec[i]); - PQCLEAN_KYBER512_CLEAN_poly_add(r, r, &t); - } - - PQCLEAN_KYBER512_CLEAN_poly_reduce(r); -} - -/************************************************* -* Name: polyvec_reduce -* -* Description: Applies Barrett reduction to each coefficient -* of each element of a vector of polynomials -* for details of the Barrett reduction see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_polyvec_reduce(polyvec *r) { - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER512_CLEAN_poly_reduce(&r->vec[i]); - } -} - -/************************************************* -* Name: polyvec_csubq -* -* Description: Applies conditional subtraction of q to each coefficient -* of each element of a vector of polynomials -* for details of conditional subtraction of q see comments in reduce.c -* -* Arguments: - poly *r: pointer to input/output polynomial -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_polyvec_csubq(polyvec *r) { - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER512_CLEAN_poly_csubq(&r->vec[i]); - } -} - -/************************************************* -* Name: polyvec_add -* -* Description: Add vectors of polynomials -* -* Arguments: - polyvec *r: pointer to output vector of polynomials -* - const polyvec *a: pointer to first input vector of polynomials -* - const polyvec *b: pointer to second input vector of polynomials -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) { - for (size_t i = 0; i < KYBER_K; i++) { - PQCLEAN_KYBER512_CLEAN_poly_add(&r->vec[i], &a->vec[i], &b->vec[i]); - } -} +#include "polyvec.h" + +#include "poly.h" + +#include <stddef.h> +#include <stdint.h> +/************************************************* +* Name: polyvec_compress +* +* Description: Compress and serialize vector of polynomials +* +* Arguments: - uint8_t *r: pointer to output byte array (needs space for KYBER_POLYVECCOMPRESSEDBYTES) +* - const polyvec *a: pointer to input vector of polynomials +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_polyvec_compress(uint8_t *r, polyvec *a) { + PQCLEAN_KYBER512_CLEAN_polyvec_csubq(a); + + uint16_t t[4]; + for (size_t i = 0; i < KYBER_K; i++) { + for (size_t j = 0; j < KYBER_N / 4; j++) { + for (size_t k = 0; k < 4; k++) { + t[k] = ((((uint32_t)a->vec[i].coeffs[4 * j + k] << 10) + KYBER_Q / 2) / KYBER_Q) & 0x3ff; + } + + r[5 * j + 0] = (uint8_t)t[0]; + r[5 * j + 1] = (uint8_t)((t[0] >> 8) | ((t[1] & 0x3f) << 2)); + r[5 * j + 2] = (uint8_t)((t[1] >> 6) | ((t[2] & 0x0f) << 4)); + r[5 * j + 3] = (uint8_t)((t[2] >> 4) | ((t[3] & 0x03) << 6)); + r[5 * j + 4] = (uint8_t)((t[3] >> 2)); + } + r += 320; + } +} + +/************************************************* +* Name: polyvec_decompress +* +* Description: De-serialize and decompress vector of polynomials; +* approximate inverse of polyvec_compress +* +* Arguments: - polyvec *r: pointer to output vector of polynomials +* - uint8_t *a: pointer to input byte array (of length KYBER_POLYVECCOMPRESSEDBYTES) +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_polyvec_decompress(polyvec *r, const uint8_t *a) { + for (size_t i = 0; i < KYBER_K; i++) { + for (size_t j = 0; j < KYBER_N / 4; j++) { + r->vec[i].coeffs[4 * j + 0] = (int16_t)( (((a[5 * j + 0] | (((uint32_t)a[5 * j + 1] & 0x03) << 8)) * KYBER_Q) + 512) >> 10); + r->vec[i].coeffs[4 * j + 1] = (int16_t)(((((a[5 * j + 1] >> 2) | (((uint32_t)a[5 * j + 2] & 0x0f) << 6)) * KYBER_Q) + 512) >> 10); + r->vec[i].coeffs[4 * j + 2] = (int16_t)(((((a[5 * j + 2] >> 4) | (((uint32_t)a[5 * j + 3] & 0x3f) << 4)) * KYBER_Q) + 512) >> 10); + r->vec[i].coeffs[4 * j + 3] = (int16_t)(((((a[5 * j + 3] >> 6) | (((uint32_t)a[5 * j + 4] & 0xff) << 2)) * KYBER_Q) + 512) >> 10); + } + a += 320; + } +} + +/************************************************* +* Name: polyvec_tobytes +* +* Description: Serialize vector of polynomials +* +* Arguments: - uint8_t *r: pointer to output byte array (needs space for KYBER_POLYVECBYTES) +* - const polyvec *a: pointer to input vector of polynomials +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_polyvec_tobytes(uint8_t *r, polyvec *a) { + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER512_CLEAN_poly_tobytes(r + i * KYBER_POLYBYTES, &a->vec[i]); + } +} + +/************************************************* +* Name: polyvec_frombytes +* +* Description: De-serialize vector of polynomials; +* inverse of polyvec_tobytes +* +* Arguments: - uint8_t *r: pointer to output byte array +* - const polyvec *a: pointer to input vector of polynomials (of length KYBER_POLYVECBYTES) +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_polyvec_frombytes(polyvec *r, const uint8_t *a) { + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER512_CLEAN_poly_frombytes(&r->vec[i], a + i * KYBER_POLYBYTES); + } +} + +/************************************************* +* Name: polyvec_ntt +* +* Description: Apply forward NTT to all elements of a vector of polynomials +* +* Arguments: - polyvec *r: pointer to in/output vector of polynomials +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_polyvec_ntt(polyvec *r) { + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER512_CLEAN_poly_ntt(&r->vec[i]); + } +} + +/************************************************* +* Name: polyvec_invntt +* +* Description: Apply inverse NTT to all elements of a vector of polynomials +* +* Arguments: - polyvec *r: pointer to in/output vector of polynomials +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_polyvec_invntt(polyvec *r) { + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER512_CLEAN_poly_invntt(&r->vec[i]); + } +} + +/************************************************* +* Name: polyvec_pointwise_acc +* +* Description: Pointwise multiply elements of a and b and accumulate into r +* +* Arguments: - poly *r: pointer to output polynomial +* - const polyvec *a: pointer to first input vector of polynomials +* - const polyvec *b: pointer to second input vector of polynomials +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_polyvec_pointwise_acc(poly *r, const polyvec *a, const polyvec *b) { + poly t; + + PQCLEAN_KYBER512_CLEAN_poly_basemul(r, &a->vec[0], &b->vec[0]); + for (size_t i = 1; i < KYBER_K; i++) { + PQCLEAN_KYBER512_CLEAN_poly_basemul(&t, &a->vec[i], &b->vec[i]); + PQCLEAN_KYBER512_CLEAN_poly_add(r, r, &t); + } + + PQCLEAN_KYBER512_CLEAN_poly_reduce(r); +} + +/************************************************* +* Name: polyvec_reduce +* +* Description: Applies Barrett reduction to each coefficient +* of each element of a vector of polynomials +* for details of the Barrett reduction see comments in reduce.c +* +* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_polyvec_reduce(polyvec *r) { + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER512_CLEAN_poly_reduce(&r->vec[i]); + } +} + +/************************************************* +* Name: polyvec_csubq +* +* Description: Applies conditional subtraction of q to each coefficient +* of each element of a vector of polynomials +* for details of conditional subtraction of q see comments in reduce.c +* +* Arguments: - poly *r: pointer to input/output polynomial +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_polyvec_csubq(polyvec *r) { + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER512_CLEAN_poly_csubq(&r->vec[i]); + } +} + +/************************************************* +* Name: polyvec_add +* +* Description: Add vectors of polynomials +* +* Arguments: - polyvec *r: pointer to output vector of polynomials +* - const polyvec *a: pointer to first input vector of polynomials +* - const polyvec *b: pointer to second input vector of polynomials +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_polyvec_add(polyvec *r, const polyvec *a, const polyvec *b) { + for (size_t i = 0; i < KYBER_K; i++) { + PQCLEAN_KYBER512_CLEAN_poly_add(&r->vec[i], &a->vec[i], &b->vec[i]); + } +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/polyvec.h b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/polyvec.h index 159d1bd29d..5c8c3c30b7 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/polyvec.h +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/polyvec.h @@ -1,29 +1,29 @@ -#ifndef POLYVEC_H -#define POLYVEC_H - -#include "params.h" -#include "poly.h" - -#include <stdint.h> - -typedef struct { - poly vec[KYBER_K]; -} polyvec; - -void PQCLEAN_KYBER512_CLEAN_polyvec_compress(uint8_t *r, polyvec *a); -void PQCLEAN_KYBER512_CLEAN_polyvec_decompress(polyvec *r, const uint8_t *a); - -void PQCLEAN_KYBER512_CLEAN_polyvec_tobytes(uint8_t *r, polyvec *a); -void PQCLEAN_KYBER512_CLEAN_polyvec_frombytes(polyvec *r, const uint8_t *a); - -void PQCLEAN_KYBER512_CLEAN_polyvec_ntt(polyvec *r); -void PQCLEAN_KYBER512_CLEAN_polyvec_invntt(polyvec *r); - -void PQCLEAN_KYBER512_CLEAN_polyvec_pointwise_acc(poly *r, const polyvec *a, const polyvec *b); - -void PQCLEAN_KYBER512_CLEAN_polyvec_reduce(polyvec *r); -void PQCLEAN_KYBER512_CLEAN_polyvec_csubq(polyvec *r); - -void PQCLEAN_KYBER512_CLEAN_polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); - -#endif +#ifndef POLYVEC_H +#define POLYVEC_H + +#include "params.h" +#include "poly.h" + +#include <stdint.h> + +typedef struct { + poly vec[KYBER_K]; +} polyvec; + +void PQCLEAN_KYBER512_CLEAN_polyvec_compress(uint8_t *r, polyvec *a); +void PQCLEAN_KYBER512_CLEAN_polyvec_decompress(polyvec *r, const uint8_t *a); + +void PQCLEAN_KYBER512_CLEAN_polyvec_tobytes(uint8_t *r, polyvec *a); +void PQCLEAN_KYBER512_CLEAN_polyvec_frombytes(polyvec *r, const uint8_t *a); + +void PQCLEAN_KYBER512_CLEAN_polyvec_ntt(polyvec *r); +void PQCLEAN_KYBER512_CLEAN_polyvec_invntt(polyvec *r); + +void PQCLEAN_KYBER512_CLEAN_polyvec_pointwise_acc(poly *r, const polyvec *a, const polyvec *b); + +void PQCLEAN_KYBER512_CLEAN_polyvec_reduce(polyvec *r); +void PQCLEAN_KYBER512_CLEAN_polyvec_csubq(polyvec *r); + +void PQCLEAN_KYBER512_CLEAN_polyvec_add(polyvec *r, const polyvec *a, const polyvec *b); + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/reduce.c b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/reduce.c index 60415deefe..4a80fc84e9 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/reduce.c +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/reduce.c @@ -1,61 +1,61 @@ -#include "reduce.h" - -#include "params.h" - -#include <stdint.h> -/************************************************* -* Name: montgomery_reduce -* -* Description: Montgomery reduction; given a 32-bit integer a, computes -* 16-bit integer congruent to a * R^-1 mod q, -* where R=2^16 -* -* Arguments: - int32_t a: input integer to be reduced; has to be in {-q2^15,...,q2^15-1} -* -* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q. -**************************************************/ -int16_t PQCLEAN_KYBER512_CLEAN_montgomery_reduce(int32_t a) { - int32_t t; - int16_t u; - - u = (int16_t)(a * (int64_t)QINV); - t = (int32_t)u * KYBER_Q; - t = a - t; - t >>= 16; - return (int16_t)t; -} - -/************************************************* -* Name: barrett_reduce -* -* Description: Barrett reduction; given a 16-bit integer a, computes -* 16-bit integer congruent to a mod q in {0,...,q} -* -* Arguments: - int16_t a: input integer to be reduced -* -* Returns: integer in {0,...,q} congruent to a modulo q. -**************************************************/ -int16_t PQCLEAN_KYBER512_CLEAN_barrett_reduce(int16_t a) { - int32_t t; - const int32_t v = (1U << 26) / KYBER_Q + 1; - - t = v * a; - t >>= 26; - t *= KYBER_Q; - return a - (int16_t)t; -} - -/************************************************* -* Name: csubq -* -* Description: Conditionallly subtract q -* -* Arguments: - int16_t a: input integer -* -* Returns: a - q if a >= q, else a -**************************************************/ -int16_t PQCLEAN_KYBER512_CLEAN_csubq(int16_t a) { - a -= KYBER_Q; - a += (a >> 15) & KYBER_Q; - return a; -} +#include "reduce.h" + +#include "params.h" + +#include <stdint.h> +/************************************************* +* Name: montgomery_reduce +* +* Description: Montgomery reduction; given a 32-bit integer a, computes +* 16-bit integer congruent to a * R^-1 mod q, +* where R=2^16 +* +* Arguments: - int32_t a: input integer to be reduced; has to be in {-q2^15,...,q2^15-1} +* +* Returns: integer in {-q+1,...,q-1} congruent to a * R^-1 modulo q. +**************************************************/ +int16_t PQCLEAN_KYBER512_CLEAN_montgomery_reduce(int32_t a) { + int32_t t; + int16_t u; + + u = (int16_t)(a * (int64_t)QINV); + t = (int32_t)u * KYBER_Q; + t = a - t; + t >>= 16; + return (int16_t)t; +} + +/************************************************* +* Name: barrett_reduce +* +* Description: Barrett reduction; given a 16-bit integer a, computes +* 16-bit integer congruent to a mod q in {0,...,q} +* +* Arguments: - int16_t a: input integer to be reduced +* +* Returns: integer in {0,...,q} congruent to a modulo q. +**************************************************/ +int16_t PQCLEAN_KYBER512_CLEAN_barrett_reduce(int16_t a) { + int32_t t; + const int32_t v = (1U << 26) / KYBER_Q + 1; + + t = v * a; + t >>= 26; + t *= KYBER_Q; + return a - (int16_t)t; +} + +/************************************************* +* Name: csubq +* +* Description: Conditionallly subtract q +* +* Arguments: - int16_t a: input integer +* +* Returns: a - q if a >= q, else a +**************************************************/ +int16_t PQCLEAN_KYBER512_CLEAN_csubq(int16_t a) { + a -= KYBER_Q; + a += (a >> 15) & KYBER_Q; + return a; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/reduce.h b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/reduce.h index 68a7f570ca..0aeffddaab 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/reduce.h +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/reduce.h @@ -1,15 +1,15 @@ -#ifndef REDUCE_H -#define REDUCE_H - -#include <stdint.h> - -#define MONT 2285 // 2^16 % Q -#define QINV 62209 // q^(-1) mod 2^16 - -int16_t PQCLEAN_KYBER512_CLEAN_montgomery_reduce(int32_t a); - -int16_t PQCLEAN_KYBER512_CLEAN_barrett_reduce(int16_t a); - -int16_t PQCLEAN_KYBER512_CLEAN_csubq(int16_t a); - -#endif +#ifndef REDUCE_H +#define REDUCE_H + +#include <stdint.h> + +#define MONT 2285 // 2^16 % Q +#define QINV 62209 // q^(-1) mod 2^16 + +int16_t PQCLEAN_KYBER512_CLEAN_montgomery_reduce(int32_t a); + +int16_t PQCLEAN_KYBER512_CLEAN_barrett_reduce(int16_t a); + +int16_t PQCLEAN_KYBER512_CLEAN_csubq(int16_t a); + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/symmetric-fips202.c b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/symmetric-fips202.c index d482a66682..2596ce444d 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/symmetric-fips202.c +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/symmetric-fips202.c @@ -1,63 +1,63 @@ -#include "fips202_kyber_r2.h" -#include "symmetric.h" - -#include <stdlib.h> -/************************************************* -* Name: kyber_shake128_absorb -* -* Description: Absorb step of the SHAKE128 specialized for the Kyber context. - -* Arguments: - keccak_state *s: pointer to (uninitialized) output Keccak state -* - const uint8_t *input: pointer to KYBER_SYMBYTES input to be absorbed into s -* - uint8_t i additional byte of input -* - uint8_t j additional byte of input -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_kyber_shake128_absorb(keccak_state *s, const uint8_t *input, uint8_t x, uint8_t y) { - size_t i; - uint8_t extseed[KYBER_SYMBYTES + 2]; - - for (i = 0; i < KYBER_SYMBYTES; i++) { - extseed[i] = input[i]; - } - extseed[i++] = x; - extseed[i] = y; - shake128_absorb(s, extseed, KYBER_SYMBYTES + 2); -} - -/************************************************* -* Name: kyber_shake128_squeezeblocks -* -* Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of SHAKE128_RATE bytes each. -* Modifies the state. Can be called multiple times to keep squeezing, -* i.e., is incremental. -* -* Arguments: - uint8_t *output: pointer to output blocks -* - unsigned long long nblocks: number of blocks to be squeezed (written to output) -* - keccak_state *s: pointer to in/output Keccak state -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_kyber_shake128_squeezeblocks(uint8_t *output, size_t nblocks, keccak_state *s) { - shake128_squeezeblocks(output, nblocks, s); -} - -/************************************************* -* Name: shake256_prf -* -* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input -* and then generates outlen bytes of SHAKE256 output -* -* Arguments: - uint8_t *output: pointer to output -* - size_t outlen: number of requested output bytes -* - const uint8_t * key: pointer to the key (of length KYBER_SYMBYTES) -* - const uint8_t nonce: single-byte nonce (public PRF input) -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_shake256_prf(uint8_t *output, size_t outlen, const uint8_t *key, uint8_t nonce) { - uint8_t extkey[KYBER_SYMBYTES + 1]; - size_t i; - - for (i = 0; i < KYBER_SYMBYTES; i++) { - extkey[i] = key[i]; - } - extkey[i] = nonce; - - shake256_kyber(output, outlen, extkey, KYBER_SYMBYTES + 1); -} +#include "fips202_kyber_r2.h" +#include "symmetric.h" + +#include <stdlib.h> +/************************************************* +* Name: kyber_shake128_absorb +* +* Description: Absorb step of the SHAKE128 specialized for the Kyber context. + +* Arguments: - keccak_state *s: pointer to (uninitialized) output Keccak state +* - const uint8_t *input: pointer to KYBER_SYMBYTES input to be absorbed into s +* - uint8_t i additional byte of input +* - uint8_t j additional byte of input +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_kyber_shake128_absorb(keccak_state *s, const uint8_t *input, uint8_t x, uint8_t y) { + size_t i; + uint8_t extseed[KYBER_SYMBYTES + 2]; + + for (i = 0; i < KYBER_SYMBYTES; i++) { + extseed[i] = input[i]; + } + extseed[i++] = x; + extseed[i] = y; + shake128_absorb(s, extseed, KYBER_SYMBYTES + 2); +} + +/************************************************* +* Name: kyber_shake128_squeezeblocks +* +* Description: Squeeze step of SHAKE128 XOF. Squeezes full blocks of SHAKE128_RATE bytes each. +* Modifies the state. Can be called multiple times to keep squeezing, +* i.e., is incremental. +* +* Arguments: - uint8_t *output: pointer to output blocks +* - unsigned long long nblocks: number of blocks to be squeezed (written to output) +* - keccak_state *s: pointer to in/output Keccak state +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_kyber_shake128_squeezeblocks(uint8_t *output, size_t nblocks, keccak_state *s) { + shake128_squeezeblocks(output, nblocks, s); +} + +/************************************************* +* Name: shake256_prf +* +* Description: Usage of SHAKE256 as a PRF, concatenates secret and public input +* and then generates outlen bytes of SHAKE256 output +* +* Arguments: - uint8_t *output: pointer to output +* - size_t outlen: number of requested output bytes +* - const uint8_t * key: pointer to the key (of length KYBER_SYMBYTES) +* - const uint8_t nonce: single-byte nonce (public PRF input) +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_shake256_prf(uint8_t *output, size_t outlen, const uint8_t *key, uint8_t nonce) { + uint8_t extkey[KYBER_SYMBYTES + 1]; + size_t i; + + for (i = 0; i < KYBER_SYMBYTES; i++) { + extkey[i] = key[i]; + } + extkey[i] = nonce; + + shake256_kyber(output, outlen, extkey, KYBER_SYMBYTES + 1); +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/symmetric.h b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/symmetric.h index 26128e2431..ce4befd805 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/symmetric.h +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/symmetric.h @@ -1,30 +1,30 @@ -#ifndef SYMMETRIC_H -#define SYMMETRIC_H - -#include "params.h" - - -#include "fips202_kyber_r2.h" - -#include <stdint.h> -#include <stddef.h> - - -void PQCLEAN_KYBER512_CLEAN_kyber_shake128_absorb(keccak_state *s, const uint8_t *input, uint8_t x, uint8_t y); -void PQCLEAN_KYBER512_CLEAN_kyber_shake128_squeezeblocks(uint8_t *output, size_t nblocks, keccak_state *s); -void PQCLEAN_KYBER512_CLEAN_shake256_prf(uint8_t *output, size_t outlen, const uint8_t *key, uint8_t nonce); - -#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES) -#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES) -#define xof_absorb(STATE, IN, X, Y) PQCLEAN_KYBER512_CLEAN_kyber_shake128_absorb(STATE, IN, X, Y) -#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) PQCLEAN_KYBER512_CLEAN_kyber_shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) -#define xof_ctx_release(STATE) shake128_ctx_release(STATE) -#define prf(OUT, OUTBYTES, KEY, NONCE) PQCLEAN_KYBER512_CLEAN_shake256_prf(OUT, OUTBYTES, KEY, NONCE) -#define kdf(OUT, IN, INBYTES) shake256_kyber(OUT, KYBER_SSBYTES, IN, INBYTES) - -#define XOF_BLOCKBYTES 168 - -typedef keccak_state xof_state; - - -#endif /* SYMMETRIC_H */ +#ifndef SYMMETRIC_H +#define SYMMETRIC_H + +#include "params.h" + + +#include "fips202_kyber_r2.h" + +#include <stdint.h> +#include <stddef.h> + + +void PQCLEAN_KYBER512_CLEAN_kyber_shake128_absorb(keccak_state *s, const uint8_t *input, uint8_t x, uint8_t y); +void PQCLEAN_KYBER512_CLEAN_kyber_shake128_squeezeblocks(uint8_t *output, size_t nblocks, keccak_state *s); +void PQCLEAN_KYBER512_CLEAN_shake256_prf(uint8_t *output, size_t outlen, const uint8_t *key, uint8_t nonce); + +#define hash_h(OUT, IN, INBYTES) sha3_256(OUT, IN, INBYTES) +#define hash_g(OUT, IN, INBYTES) sha3_512(OUT, IN, INBYTES) +#define xof_absorb(STATE, IN, X, Y) PQCLEAN_KYBER512_CLEAN_kyber_shake128_absorb(STATE, IN, X, Y) +#define xof_squeezeblocks(OUT, OUTBLOCKS, STATE) PQCLEAN_KYBER512_CLEAN_kyber_shake128_squeezeblocks(OUT, OUTBLOCKS, STATE) +#define xof_ctx_release(STATE) shake128_ctx_release(STATE) +#define prf(OUT, OUTBYTES, KEY, NONCE) PQCLEAN_KYBER512_CLEAN_shake256_prf(OUT, OUTBYTES, KEY, NONCE) +#define kdf(OUT, IN, INBYTES) shake256_kyber(OUT, KYBER_SSBYTES, IN, INBYTES) + +#define XOF_BLOCKBYTES 168 + +typedef keccak_state xof_state; + + +#endif /* SYMMETRIC_H */ diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/verify.c b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/verify.c index 149e52d7b0..5fcbf00577 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/verify.c +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/verify.c @@ -1,50 +1,50 @@ -#include "verify.h" - -#include <stddef.h> -#include <stdint.h> - -/************************************************* -* Name: verify -* -* Description: Compare two arrays for equality in constant time. -* -* Arguments: const uint8_t *a: pointer to first byte array -* const uint8_t *b: pointer to second byte array -* size_t len: length of the byte arrays -* -* Returns 0 if the byte arrays are equal, 1 otherwise -**************************************************/ -uint8_t PQCLEAN_KYBER512_CLEAN_verify(const uint8_t *a, const uint8_t *b, size_t len) { - uint64_t r; - size_t i; - r = 0; - - for (i = 0; i < len; i++) { - r |= a[i] ^ b[i]; - } - - r = (-r) >> 63; - return (uint8_t)r; -} - -/************************************************* -* Name: cmov -* -* Description: Copy len bytes from x to r if b is 1; -* don't modify x if b is 0. Requires b to be in {0,1}; -* assumes two's complement representation of negative integers. -* Runs in constant time. -* -* Arguments: uint8_t *r: pointer to output byte array -* const uint8_t *x: pointer to input byte array -* size_t len: Amount of bytes to be copied -* uint8_t b: Condition bit; has to be in {0,1} -**************************************************/ -void PQCLEAN_KYBER512_CLEAN_cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b) { - size_t i; - - b = -b; - for (i = 0; i < len; i++) { - r[i] ^= b & (x[i] ^ r[i]); - } -} +#include "verify.h" + +#include <stddef.h> +#include <stdint.h> + +/************************************************* +* Name: verify +* +* Description: Compare two arrays for equality in constant time. +* +* Arguments: const uint8_t *a: pointer to first byte array +* const uint8_t *b: pointer to second byte array +* size_t len: length of the byte arrays +* +* Returns 0 if the byte arrays are equal, 1 otherwise +**************************************************/ +uint8_t PQCLEAN_KYBER512_CLEAN_verify(const uint8_t *a, const uint8_t *b, size_t len) { + uint64_t r; + size_t i; + r = 0; + + for (i = 0; i < len; i++) { + r |= a[i] ^ b[i]; + } + + r = (-r) >> 63; + return (uint8_t)r; +} + +/************************************************* +* Name: cmov +* +* Description: Copy len bytes from x to r if b is 1; +* don't modify x if b is 0. Requires b to be in {0,1}; +* assumes two's complement representation of negative integers. +* Runs in constant time. +* +* Arguments: uint8_t *r: pointer to output byte array +* const uint8_t *x: pointer to input byte array +* size_t len: Amount of bytes to be copied +* uint8_t b: Condition bit; has to be in {0,1} +**************************************************/ +void PQCLEAN_KYBER512_CLEAN_cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b) { + size_t i; + + b = -b; + for (i = 0; i < len; i++) { + r[i] ^= b & (x[i] ^ r[i]); + } +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/verify.h b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/verify.h index d95be219df..fc75db0408 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/verify.h +++ b/contrib/restricted/aws/s2n/pq-crypto/kyber_r2/verify.h @@ -1,11 +1,11 @@ -#ifndef VERIFY_H -#define VERIFY_H - -#include <stddef.h> -#include <stdint.h> - -uint8_t PQCLEAN_KYBER512_CLEAN_verify(const uint8_t *a, const uint8_t *b, size_t len); - -void PQCLEAN_KYBER512_CLEAN_cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b); - -#endif +#ifndef VERIFY_H +#define VERIFY_H + +#include <stddef.h> +#include <stdint.h> + +uint8_t PQCLEAN_KYBER512_CLEAN_verify(const uint8_t *a, const uint8_t *b, size_t len); + +void PQCLEAN_KYBER512_CLEAN_cmov(uint8_t *r, const uint8_t *x, size_t len, uint8_t b); + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/s2n_pq.c b/contrib/restricted/aws/s2n/pq-crypto/s2n_pq.c index 7381deed4e..3bd68cded6 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/s2n_pq.c +++ b/contrib/restricted/aws/s2n/pq-crypto/s2n_pq.c @@ -1,122 +1,122 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - -#include "s2n_pq.h" - -static bool sikep434r2_asm_enabled = false; - -#if defined(S2N_CPUID_AVAILABLE) -/* https://en.wikipedia.org/wiki/CPUID */ -#include <cpuid.h> - -#define EXTENDED_FEATURES_LEAF 7 -#define EXTENDED_FEATURES_SUBLEAF_ZERO 0 - -/* The cpuid.h header included with older versions of gcc and - * clang doesn't include definitions for bit_ADX, bit_BMI2, or - * __get_cpuid_count(). */ -#if !defined(bit_ADX) - #define bit_ADX (1 << 19) -#endif - -#if !defined(bit_BMI2) - #define bit_BMI2 (1 << 8) -#endif - -bool s2n_get_cpuid_count(uint32_t leaf, uint32_t sub_leaf, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) { - /* 0x80000000 probes for extended cpuid info */ - uint32_t max_level = __get_cpuid_max(leaf & 0x80000000, 0); - - if (max_level == 0 || max_level < leaf) { - return false; - } - - __cpuid_count(leaf, sub_leaf, *eax, *ebx, *ecx, *edx); - return true; -} - -/* https://en.wikipedia.org/wiki/Bit_manipulation_instruction_set#BMI2_(Bit_Manipulation_Instruction_Set_2) */ -bool s2n_cpu_supports_bmi2() { - uint32_t eax, ebx, ecx, edx; - if (!s2n_get_cpuid_count(EXTENDED_FEATURES_LEAF, EXTENDED_FEATURES_SUBLEAF_ZERO, &eax, &ebx, &ecx, &edx)) { - return false; - } - - return (ebx & bit_BMI2); -} - -/* https://en.wikipedia.org/wiki/Intel_ADX */ -bool s2n_cpu_supports_adx() { - uint32_t eax, ebx, ecx, edx; - if (!s2n_get_cpuid_count(EXTENDED_FEATURES_LEAF, EXTENDED_FEATURES_SUBLEAF_ZERO, &eax, &ebx, &ecx, &edx)) { - return false; - } - - return (ebx & bit_ADX); -} - -bool s2n_cpu_supports_sikep434r2_asm() { -#if defined(S2N_SIKEP434R2_ASM) - /* The sikep434r2 assembly code always requires BMI2. If the assembly - * was compiled with support for ADX, we also require ADX at runtime. */ - #if defined(S2N_ADX) - return s2n_cpu_supports_bmi2() && s2n_cpu_supports_adx(); - #else - return s2n_cpu_supports_bmi2(); - #endif -#else - /* sikep434r2 assembly was not supported at compile time */ - return false; -#endif /* defined(S2N_SIKEP434R2_ASM) */ -} - -#else /* defined(S2N_CPUID_AVAILABLE) */ - -/* If CPUID is not available, we cannot perform necessary run-time checks. */ -bool s2n_cpu_supports_sikep434r2_asm() { - return false; -} - -#endif /* defined(S2N_CPUID_AVAILABLE) */ - -bool s2n_sikep434r2_asm_is_enabled() { - return sikep434r2_asm_enabled; -} - -bool s2n_pq_is_enabled() { -#if defined(S2N_NO_PQ) - return false; -#else - return !s2n_is_in_fips_mode(); -#endif -} - -S2N_RESULT s2n_disable_sikep434r2_asm() { - sikep434r2_asm_enabled = false; - return S2N_RESULT_OK; -} - -S2N_RESULT s2n_try_enable_sikep434r2_asm() { - if (s2n_pq_is_enabled() && s2n_cpu_supports_sikep434r2_asm()) { - sikep434r2_asm_enabled = true; - } - return S2N_RESULT_OK; -} - -S2N_RESULT s2n_pq_init() { - ENSURE_OK(s2n_try_enable_sikep434r2_asm(), S2N_ERR_SAFETY); - - return S2N_RESULT_OK; -} +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#include "s2n_pq.h" + +static bool sikep434r2_asm_enabled = false; + +#if defined(S2N_CPUID_AVAILABLE) +/* https://en.wikipedia.org/wiki/CPUID */ +#include <cpuid.h> + +#define EXTENDED_FEATURES_LEAF 7 +#define EXTENDED_FEATURES_SUBLEAF_ZERO 0 + +/* The cpuid.h header included with older versions of gcc and + * clang doesn't include definitions for bit_ADX, bit_BMI2, or + * __get_cpuid_count(). */ +#if !defined(bit_ADX) + #define bit_ADX (1 << 19) +#endif + +#if !defined(bit_BMI2) + #define bit_BMI2 (1 << 8) +#endif + +bool s2n_get_cpuid_count(uint32_t leaf, uint32_t sub_leaf, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx) { + /* 0x80000000 probes for extended cpuid info */ + uint32_t max_level = __get_cpuid_max(leaf & 0x80000000, 0); + + if (max_level == 0 || max_level < leaf) { + return false; + } + + __cpuid_count(leaf, sub_leaf, *eax, *ebx, *ecx, *edx); + return true; +} + +/* https://en.wikipedia.org/wiki/Bit_manipulation_instruction_set#BMI2_(Bit_Manipulation_Instruction_Set_2) */ +bool s2n_cpu_supports_bmi2() { + uint32_t eax, ebx, ecx, edx; + if (!s2n_get_cpuid_count(EXTENDED_FEATURES_LEAF, EXTENDED_FEATURES_SUBLEAF_ZERO, &eax, &ebx, &ecx, &edx)) { + return false; + } + + return (ebx & bit_BMI2); +} + +/* https://en.wikipedia.org/wiki/Intel_ADX */ +bool s2n_cpu_supports_adx() { + uint32_t eax, ebx, ecx, edx; + if (!s2n_get_cpuid_count(EXTENDED_FEATURES_LEAF, EXTENDED_FEATURES_SUBLEAF_ZERO, &eax, &ebx, &ecx, &edx)) { + return false; + } + + return (ebx & bit_ADX); +} + +bool s2n_cpu_supports_sikep434r2_asm() { +#if defined(S2N_SIKEP434R2_ASM) + /* The sikep434r2 assembly code always requires BMI2. If the assembly + * was compiled with support for ADX, we also require ADX at runtime. */ + #if defined(S2N_ADX) + return s2n_cpu_supports_bmi2() && s2n_cpu_supports_adx(); + #else + return s2n_cpu_supports_bmi2(); + #endif +#else + /* sikep434r2 assembly was not supported at compile time */ + return false; +#endif /* defined(S2N_SIKEP434R2_ASM) */ +} + +#else /* defined(S2N_CPUID_AVAILABLE) */ + +/* If CPUID is not available, we cannot perform necessary run-time checks. */ +bool s2n_cpu_supports_sikep434r2_asm() { + return false; +} + +#endif /* defined(S2N_CPUID_AVAILABLE) */ + +bool s2n_sikep434r2_asm_is_enabled() { + return sikep434r2_asm_enabled; +} + +bool s2n_pq_is_enabled() { +#if defined(S2N_NO_PQ) + return false; +#else + return !s2n_is_in_fips_mode(); +#endif +} + +S2N_RESULT s2n_disable_sikep434r2_asm() { + sikep434r2_asm_enabled = false; + return S2N_RESULT_OK; +} + +S2N_RESULT s2n_try_enable_sikep434r2_asm() { + if (s2n_pq_is_enabled() && s2n_cpu_supports_sikep434r2_asm()) { + sikep434r2_asm_enabled = true; + } + return S2N_RESULT_OK; +} + +S2N_RESULT s2n_pq_init() { + ENSURE_OK(s2n_try_enable_sikep434r2_asm(), S2N_ERR_SAFETY); + + return S2N_RESULT_OK; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/s2n_pq.h b/contrib/restricted/aws/s2n/pq-crypto/s2n_pq.h index 7e5d93e991..e2a461a1a9 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/s2n_pq.h +++ b/contrib/restricted/aws/s2n/pq-crypto/s2n_pq.h @@ -1,27 +1,27 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - -#pragma once - -#include <stdbool.h> -#include "utils/s2n_result.h" -#include "utils/s2n_safety.h" -#include "crypto/s2n_fips.h" - -bool s2n_sikep434r2_asm_is_enabled(void); -bool s2n_pq_is_enabled(void); -S2N_RESULT s2n_disable_sikep434r2_asm(void); -S2N_RESULT s2n_try_enable_sikep434r2_asm(void); -S2N_RESULT s2n_pq_init(void); +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#pragma once + +#include <stdbool.h> +#include "utils/s2n_result.h" +#include "utils/s2n_safety.h" +#include "crypto/s2n_fips.h" + +bool s2n_sikep434r2_asm_is_enabled(void); +bool s2n_pq_is_enabled(void); +S2N_RESULT s2n_disable_sikep434r2_asm(void); +S2N_RESULT s2n_try_enable_sikep434r2_asm(void); +S2N_RESULT s2n_pq_init(void); diff --git a/contrib/restricted/aws/s2n/pq-crypto/s2n_pq_random.c b/contrib/restricted/aws/s2n/pq-crypto/s2n_pq_random.c index 845def4a31..f8c2572741 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/s2n_pq_random.c +++ b/contrib/restricted/aws/s2n/pq-crypto/s2n_pq_random.c @@ -1,45 +1,45 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - -#include "utils/s2n_random.h" -#include "utils/s2n_result.h" -#include "utils/s2n_safety.h" -#include "s2n_pq_random.h" - -static S2N_RESULT s2n_get_random_bytes_default(uint8_t *buffer, uint32_t num_bytes); - -static s2n_get_random_bytes_callback s2n_get_random_bytes_cb = s2n_get_random_bytes_default; - -S2N_RESULT s2n_get_random_bytes(uint8_t *buffer, uint32_t num_bytes) { - ENSURE_REF(buffer); - GUARD_RESULT(s2n_get_random_bytes_cb(buffer, num_bytes)); - - return S2N_RESULT_OK; -} - -static S2N_RESULT s2n_get_random_bytes_default(uint8_t *buffer, uint32_t num_bytes) { - struct s2n_blob out = { .data = buffer, .size = num_bytes }; - GUARD_RESULT(s2n_get_private_random_data(&out)); - - return S2N_RESULT_OK; -} - -S2N_RESULT s2n_set_rand_bytes_callback_for_testing(s2n_get_random_bytes_callback rand_bytes_callback) { - ENSURE(s2n_in_unit_test(), S2N_ERR_NOT_IN_UNIT_TEST); - - s2n_get_random_bytes_cb = rand_bytes_callback; - - return S2N_RESULT_OK; -} +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#include "utils/s2n_random.h" +#include "utils/s2n_result.h" +#include "utils/s2n_safety.h" +#include "s2n_pq_random.h" + +static S2N_RESULT s2n_get_random_bytes_default(uint8_t *buffer, uint32_t num_bytes); + +static s2n_get_random_bytes_callback s2n_get_random_bytes_cb = s2n_get_random_bytes_default; + +S2N_RESULT s2n_get_random_bytes(uint8_t *buffer, uint32_t num_bytes) { + ENSURE_REF(buffer); + GUARD_RESULT(s2n_get_random_bytes_cb(buffer, num_bytes)); + + return S2N_RESULT_OK; +} + +static S2N_RESULT s2n_get_random_bytes_default(uint8_t *buffer, uint32_t num_bytes) { + struct s2n_blob out = { .data = buffer, .size = num_bytes }; + GUARD_RESULT(s2n_get_private_random_data(&out)); + + return S2N_RESULT_OK; +} + +S2N_RESULT s2n_set_rand_bytes_callback_for_testing(s2n_get_random_bytes_callback rand_bytes_callback) { + ENSURE(s2n_in_unit_test(), S2N_ERR_NOT_IN_UNIT_TEST); + + s2n_get_random_bytes_cb = rand_bytes_callback; + + return S2N_RESULT_OK; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/s2n_pq_random.h b/contrib/restricted/aws/s2n/pq-crypto/s2n_pq_random.h index 04dcd9cd93..6e059c5d9e 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/s2n_pq_random.h +++ b/contrib/restricted/aws/s2n/pq-crypto/s2n_pq_random.h @@ -1,23 +1,23 @@ -/* - * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. - * - * Licensed under the Apache License, Version 2.0 (the "License"). - * You may not use this file except in compliance with the License. - * A copy of the License is located at - * - * http://aws.amazon.com/apache2.0 - * - * or in the "license" file accompanying this file. This file is distributed - * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either - * express or implied. See the License for the specific language governing - * permissions and limitations under the License. - */ - -#pragma once - -#include "utils/s2n_result.h" - -typedef S2N_RESULT (*s2n_get_random_bytes_callback)(uint8_t *buffer, uint32_t num_bytes); - -S2N_RESULT s2n_get_random_bytes(uint8_t *buffer, uint32_t num_bytes); -S2N_RESULT s2n_set_rand_bytes_callback_for_testing(s2n_get_random_bytes_callback rand_bytes_callback); +/* + * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved. + * + * Licensed under the Apache License, Version 2.0 (the "License"). + * You may not use this file except in compliance with the License. + * A copy of the License is located at + * + * http://aws.amazon.com/apache2.0 + * + * or in the "license" file accompanying this file. This file is distributed + * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either + * express or implied. See the License for the specific language governing + * permissions and limitations under the License. + */ + +#pragma once + +#include "utils/s2n_result.h" + +typedef S2N_RESULT (*s2n_get_random_bytes_callback)(uint8_t *buffer, uint32_t num_bytes); + +S2N_RESULT s2n_get_random_bytes(uint8_t *buffer, uint32_t num_bytes); +S2N_RESULT s2n_set_rand_bytes_callback_for_testing(s2n_get_random_bytes_callback rand_bytes_callback); diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/P503_internal_r1.h b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/P503_internal_r1.h index f6674fa2bc..242ca83d61 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/P503_internal_r1.h +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/P503_internal_r1.h @@ -1,261 +1,261 @@ -/******************************************************************************************** -* Supersingular Isogeny Key Encapsulation Library -* -* Abstract: internal header file for P503 -*********************************************************************************************/ - -#ifndef __P503_INTERNAL_H__ -#define __P503_INTERNAL_H__ - -#include "sike_r1_namespace.h" -#include "api_r1.h" - -#if (TARGET == TARGET_AMD64) - #define NWORDS_FIELD 8 // Number of words of a 503-bit field element - #define p503_ZERO_WORDS 3 // Number of "0" digits in the least significant part of p503 + 1 -#elif (TARGET == TARGET_x86) - #define NWORDS_FIELD 16 - #define p503_ZERO_WORDS 7 -#elif (TARGET == TARGET_ARM) - #define NWORDS_FIELD 16 - #define p503_ZERO_WORDS 7 -#elif (TARGET == TARGET_ARM64) - #define NWORDS_FIELD 8 - #define p503_ZERO_WORDS 3 -#endif - -// Basic constants - -#define NBITS_FIELD 503 -#define MAXBITS_FIELD 512 -#define MAXWORDS_FIELD ((MAXBITS_FIELD+RADIX-1)/RADIX) // Max. number of words to represent field elements -#define NWORDS64_FIELD ((NBITS_FIELD+63)/64) // Number of 64-bit words of a 503-bit field element -#define NBITS_ORDER 256 -#define NWORDS_ORDER ((NBITS_ORDER+RADIX-1)/RADIX) // Number of words of oA and oB, where oA and oB are the subgroup orders of Alice and Bob, resp. -#define NWORDS64_ORDER ((NBITS_ORDER+63)/64) // Number of 64-bit words of a 256-bit element -#define MAXBITS_ORDER NBITS_ORDER -#define MAXWORDS_ORDER ((MAXBITS_ORDER+RADIX-1)/RADIX) // Max. number of words to represent elements in [1, oA-1] or [1, oB]. -#define ALICE 0 -#define BOB 1 -#define OALICE_BITS 250 -#define OBOB_BITS 253 -#define OBOB_EXPON 159 -#define MASK_ALICE 0x03 -#define MASK_BOB 0x0F -#define PRIME p503 -#define PARAM_A 0 -#define PARAM_C 1 -// Fixed parameters for isogeny tree computation -#define MAX_INT_POINTS_ALICE 7 -#define MAX_INT_POINTS_BOB 8 -#define MAX_Alice 125 -#define MAX_Bob 159 -#define MSG_BYTES 24 -#define SECRETKEY_A_BYTES (OALICE_BITS + 7) / 8 -#define SECRETKEY_B_BYTES (OBOB_BITS + 7) / 8 -#define FP2_ENCODED_BYTES 2*((NBITS_FIELD + 7) / 8) - - -// SIDH's basic element definitions and point representations - -typedef digit_t felm_t[NWORDS_FIELD]; // Datatype for representing 503-bit field elements (512-bit max.) -typedef digit_t dfelm_t[2*NWORDS_FIELD]; // Datatype for representing double-precision 2x503-bit field elements (512-bit max.) -typedef struct felm_s -{ - felm_t e[2]; -} f2elm_t; // Datatype for representing quadratic extension field elements GF(p503^2) -typedef f2elm_t publickey_t[3]; // Datatype for representing public keys equivalent to three GF(p503^2) elements - -typedef struct { f2elm_t X; f2elm_t Z; } point_proj; // Point representation in projective XZ Montgomery coordinates. -typedef point_proj point_proj_t[1]; - - - -/**************** Function prototypes ****************/ -/************* Multiprecision functions **************/ - -// Copy wordsize digits, c = a, where lng(a) = nwords -void copy_words(const digit_t* a, digit_t* c, const unsigned int nwords); - -// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit -unsigned int mp_add(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords); - -// 503-bit multiprecision addition, c = a+b -void mp_add503(const digit_t* a, const digit_t* b, digit_t* c); -void mp_add503_asm(const digit_t* a, const digit_t* b, digit_t* c); -//void mp_addmask503_asm(const digit_t* a, const digit_t mask, digit_t* c); - -// 2x503-bit multiprecision addition, c = a+b -void mp_add503x2(const digit_t* a, const digit_t* b, digit_t* c); -void mp_add503x2_asm(const digit_t* a, const digit_t* b, digit_t* c); - -// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit -unsigned int mp_sub(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords); -digit_t mp_sub503x2_asm(const digit_t* a, const digit_t* b, digit_t* c); - -// Multiprecision right shift by one -void mp_shiftr1(digit_t* x, const unsigned int nwords); - -// Digit multiplication, digit * digit -> 2-digit result -void digit_x_digit(const digit_t a, const digit_t b, digit_t* c); - -// Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords. -void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords); - -void multiply(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords); - -// Montgomery multiplication modulo the group order, mc = ma*mb*r' mod order, where ma,mb,mc in [0, order-1] -void Montgomery_multiply_mod_order(const digit_t* ma, const digit_t* mb, digit_t* mc, const digit_t* order, const digit_t* Montgomery_rprime); - -// (Non-constant time) Montgomery inversion modulo the curve order using a^(-1) = a^(order-2) mod order -//void Montgomery_inversion_mod_order(const digit_t* ma, digit_t* mc, const digit_t* order, const digit_t* Montgomery_rprime); - -void Montgomery_inversion_mod_order_bingcd(const digit_t* a, digit_t* c, const digit_t* order, const digit_t* Montgomery_rprime, const digit_t* Montgomery_R2); - -// Conversion of elements in Z_r to Montgomery representation, where the order r is up to 384 bits. -void to_Montgomery_mod_order(const digit_t* a, digit_t* mc, const digit_t* order, const digit_t* Montgomery_rprime, const digit_t* Montgomery_Rprime); - -// Conversion of elements in Z_r from Montgomery to standard representation, where the order is up to 384 bits. -void from_Montgomery_mod_order(const digit_t* ma, digit_t* c, const digit_t* order, const digit_t* Montgomery_rprime); - -// Inversion modulo Alice's order 2^372. -void inv_mod_orderA(const digit_t* a, digit_t* c); - -/************ Field arithmetic functions *************/ - -// Copy of a field element, c = a -void fpcopy503(const felm_t a, felm_t c); - -// Zeroing a field element, a = 0 -void fpzero503(felm_t a); - -// Non constant-time comparison of two field elements. If a = b return TRUE, otherwise, return FALSE -bool fpequal503_non_constant_time(const felm_t a, const felm_t b); - -// Modular addition, c = a+b mod p503 -extern void fpadd503(const digit_t* a, const digit_t* b, digit_t* c); -extern void fpadd503_asm(const digit_t* a, const digit_t* b, digit_t* c); - -// Modular subtraction, c = a-b mod p503 -extern void fpsub503(const digit_t* a, const digit_t* b, digit_t* c); -extern void fpsub503_asm(const digit_t* a, const digit_t* b, digit_t* c); - -// Modular negation, a = -a mod p503 -extern void fpneg503(digit_t* a); - -// Modular division by two, c = a/2 mod p503. -void fpdiv2_503(const digit_t* a, digit_t* c); - -// Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1]. -void fpcorrection503(digit_t* a); - -// 503-bit Montgomery reduction, c = a mod p -void rdc_mont(const digit_t* a, digit_t* c); - -// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p503, where R=2^768 -void fpmul503_mont(const felm_t a, const felm_t b, felm_t c); -void mul503_asm(const felm_t a, const felm_t b, dfelm_t c); -void rdc503_asm(const dfelm_t ma, dfelm_t mc); - -// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p503, where R=2^768 -void fpsqr503_mont(const felm_t ma, felm_t mc); - -// Conversion to Montgomery representation -void to_mont(const felm_t a, felm_t mc); - -// Conversion from Montgomery representation to standard representation -void from_mont(const felm_t ma, felm_t c); - -// Field inversion, a = a^-1 in GF(p503) -void fpinv503_mont(felm_t a); - -// Field inversion, a = a^-1 in GF(p503) using the binary GCD -void fpinv503_mont_bingcd(felm_t a); - -// Chain to compute (p503-3)/4 using Montgomery arithmetic -void fpinv503_chain_mont(felm_t a); - -/************ GF(p^2) arithmetic functions *************/ - -// Copy of a GF(p503^2) element, c = a -void fp2copy503(const f2elm_t *a, f2elm_t *c); - -// GF(p503^2) negation, a = -a in GF(p503^2) -void fp2neg503(f2elm_t *a); - -// GF(p503^2) addition, c = a+b in GF(p503^2) -extern void fp2add503(const f2elm_t *a, const f2elm_t *b, f2elm_t *c); - -// GF(p503^2) subtraction, c = a-b in GF(p503^2) -extern void fp2sub503(const f2elm_t *a, const f2elm_t *b, f2elm_t *c); - -// GF(p503^2) division by two, c = a/2 in GF(p503^2) -void fp2div2_503(const f2elm_t *a, f2elm_t *c); - -// Modular correction, a = a in GF(p503^2) -void fp2correction503(f2elm_t *a); - -// GF(p503^2) squaring using Montgomery arithmetic, c = a^2 in GF(p503^2) -void fp2sqr503_mont(const f2elm_t *a, f2elm_t *c); - -// GF(p503^2) multiplication using Montgomery arithmetic, c = a*b in GF(p503^2) -void fp2mul503_mont(const f2elm_t *a, const f2elm_t *b, f2elm_t *c); - -// Conversion of a GF(p503^2) element to Montgomery representation -void to_fp2mont(const f2elm_t *a, f2elm_t *mc); - -// Conversion of a GF(p503^2) element from Montgomery representation to standard representation -void from_fp2mont(const f2elm_t *ma, f2elm_t *c); - -// GF(p503^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2) -void fp2inv503_mont(f2elm_t *a); - -// GF(p503^2) inversion, a = (a0-i*a1)/(a0^2+a1^2), GF(p503) inversion done using the binary GCD -void fp2inv503_mont_bingcd(f2elm_t *a); - -// n-way Montgomery inversion -void mont_n_way_inv(const f2elm_t* vec, const int n, f2elm_t* out); - -/************ Elliptic curve and isogeny functions *************/ - -// Computes the j-invariant of a Montgomery curve with projective constant. -void j_inv(const f2elm_t *A, const f2elm_t *C, f2elm_t *jinv); - -// Simultaneous doubling and differential addition. -void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t *xPQ, const f2elm_t *A24); - -// Doubling of a Montgomery point in projective coordinates (X:Z). -void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t *A24plus, const f2elm_t *C24); - -// Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings. -void xDBLe(const point_proj_t P, point_proj_t Q, const f2elm_t *A24plus, const f2elm_t *C24, const int e); - -// Differential addition. -void xADD(point_proj_t P, const point_proj_t Q, const f2elm_t *xPQ); - -// Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4. -void get_4_isog(const point_proj_t P, f2elm_t *A24plus, f2elm_t *C24, f2elm_t* coeff); - -// Evaluates the isogeny at the point (X:Z) in the domain of the isogeny. -void eval_4_isog(point_proj_t P, f2elm_t* coeff); - -// Tripling of a Montgomery point in projective coordinates (X:Z). -void xTPL(const point_proj_t P, point_proj_t Q, const f2elm_t *A24minus, const f2elm_t *A24plus); - -// Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings. -void xTPLe(const point_proj_t P, point_proj_t Q, const f2elm_t *A24minus, const f2elm_t *A24plus, const int e); - -// Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3. -void get_3_isog(const point_proj_t P, f2elm_t *A24minus, f2elm_t *A24plus, f2elm_t* coeff); - -// Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and a point P with coefficients given in coeff. -void eval_3_isog(point_proj_t Q, const f2elm_t* coeff); - -// 3-way simultaneous inversion -void inv_3_way(f2elm_t *z1, f2elm_t *z2, f2elm_t *z3); - -// Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A. -void get_A(const f2elm_t *xP, const f2elm_t *xQ, const f2elm_t *xR, f2elm_t *A); - - -#endif +/******************************************************************************************** +* Supersingular Isogeny Key Encapsulation Library +* +* Abstract: internal header file for P503 +*********************************************************************************************/ + +#ifndef __P503_INTERNAL_H__ +#define __P503_INTERNAL_H__ + +#include "sike_r1_namespace.h" +#include "api_r1.h" + +#if (TARGET == TARGET_AMD64) + #define NWORDS_FIELD 8 // Number of words of a 503-bit field element + #define p503_ZERO_WORDS 3 // Number of "0" digits in the least significant part of p503 + 1 +#elif (TARGET == TARGET_x86) + #define NWORDS_FIELD 16 + #define p503_ZERO_WORDS 7 +#elif (TARGET == TARGET_ARM) + #define NWORDS_FIELD 16 + #define p503_ZERO_WORDS 7 +#elif (TARGET == TARGET_ARM64) + #define NWORDS_FIELD 8 + #define p503_ZERO_WORDS 3 +#endif + +// Basic constants + +#define NBITS_FIELD 503 +#define MAXBITS_FIELD 512 +#define MAXWORDS_FIELD ((MAXBITS_FIELD+RADIX-1)/RADIX) // Max. number of words to represent field elements +#define NWORDS64_FIELD ((NBITS_FIELD+63)/64) // Number of 64-bit words of a 503-bit field element +#define NBITS_ORDER 256 +#define NWORDS_ORDER ((NBITS_ORDER+RADIX-1)/RADIX) // Number of words of oA and oB, where oA and oB are the subgroup orders of Alice and Bob, resp. +#define NWORDS64_ORDER ((NBITS_ORDER+63)/64) // Number of 64-bit words of a 256-bit element +#define MAXBITS_ORDER NBITS_ORDER +#define MAXWORDS_ORDER ((MAXBITS_ORDER+RADIX-1)/RADIX) // Max. number of words to represent elements in [1, oA-1] or [1, oB]. +#define ALICE 0 +#define BOB 1 +#define OALICE_BITS 250 +#define OBOB_BITS 253 +#define OBOB_EXPON 159 +#define MASK_ALICE 0x03 +#define MASK_BOB 0x0F +#define PRIME p503 +#define PARAM_A 0 +#define PARAM_C 1 +// Fixed parameters for isogeny tree computation +#define MAX_INT_POINTS_ALICE 7 +#define MAX_INT_POINTS_BOB 8 +#define MAX_Alice 125 +#define MAX_Bob 159 +#define MSG_BYTES 24 +#define SECRETKEY_A_BYTES (OALICE_BITS + 7) / 8 +#define SECRETKEY_B_BYTES (OBOB_BITS + 7) / 8 +#define FP2_ENCODED_BYTES 2*((NBITS_FIELD + 7) / 8) + + +// SIDH's basic element definitions and point representations + +typedef digit_t felm_t[NWORDS_FIELD]; // Datatype for representing 503-bit field elements (512-bit max.) +typedef digit_t dfelm_t[2*NWORDS_FIELD]; // Datatype for representing double-precision 2x503-bit field elements (512-bit max.) +typedef struct felm_s +{ + felm_t e[2]; +} f2elm_t; // Datatype for representing quadratic extension field elements GF(p503^2) +typedef f2elm_t publickey_t[3]; // Datatype for representing public keys equivalent to three GF(p503^2) elements + +typedef struct { f2elm_t X; f2elm_t Z; } point_proj; // Point representation in projective XZ Montgomery coordinates. +typedef point_proj point_proj_t[1]; + + + +/**************** Function prototypes ****************/ +/************* Multiprecision functions **************/ + +// Copy wordsize digits, c = a, where lng(a) = nwords +void copy_words(const digit_t* a, digit_t* c, const unsigned int nwords); + +// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit +unsigned int mp_add(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords); + +// 503-bit multiprecision addition, c = a+b +void mp_add503(const digit_t* a, const digit_t* b, digit_t* c); +void mp_add503_asm(const digit_t* a, const digit_t* b, digit_t* c); +//void mp_addmask503_asm(const digit_t* a, const digit_t mask, digit_t* c); + +// 2x503-bit multiprecision addition, c = a+b +void mp_add503x2(const digit_t* a, const digit_t* b, digit_t* c); +void mp_add503x2_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit +unsigned int mp_sub(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords); +digit_t mp_sub503x2_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Multiprecision right shift by one +void mp_shiftr1(digit_t* x, const unsigned int nwords); + +// Digit multiplication, digit * digit -> 2-digit result +void digit_x_digit(const digit_t a, const digit_t b, digit_t* c); + +// Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords. +void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords); + +void multiply(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords); + +// Montgomery multiplication modulo the group order, mc = ma*mb*r' mod order, where ma,mb,mc in [0, order-1] +void Montgomery_multiply_mod_order(const digit_t* ma, const digit_t* mb, digit_t* mc, const digit_t* order, const digit_t* Montgomery_rprime); + +// (Non-constant time) Montgomery inversion modulo the curve order using a^(-1) = a^(order-2) mod order +//void Montgomery_inversion_mod_order(const digit_t* ma, digit_t* mc, const digit_t* order, const digit_t* Montgomery_rprime); + +void Montgomery_inversion_mod_order_bingcd(const digit_t* a, digit_t* c, const digit_t* order, const digit_t* Montgomery_rprime, const digit_t* Montgomery_R2); + +// Conversion of elements in Z_r to Montgomery representation, where the order r is up to 384 bits. +void to_Montgomery_mod_order(const digit_t* a, digit_t* mc, const digit_t* order, const digit_t* Montgomery_rprime, const digit_t* Montgomery_Rprime); + +// Conversion of elements in Z_r from Montgomery to standard representation, where the order is up to 384 bits. +void from_Montgomery_mod_order(const digit_t* ma, digit_t* c, const digit_t* order, const digit_t* Montgomery_rprime); + +// Inversion modulo Alice's order 2^372. +void inv_mod_orderA(const digit_t* a, digit_t* c); + +/************ Field arithmetic functions *************/ + +// Copy of a field element, c = a +void fpcopy503(const felm_t a, felm_t c); + +// Zeroing a field element, a = 0 +void fpzero503(felm_t a); + +// Non constant-time comparison of two field elements. If a = b return TRUE, otherwise, return FALSE +bool fpequal503_non_constant_time(const felm_t a, const felm_t b); + +// Modular addition, c = a+b mod p503 +extern void fpadd503(const digit_t* a, const digit_t* b, digit_t* c); +extern void fpadd503_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Modular subtraction, c = a-b mod p503 +extern void fpsub503(const digit_t* a, const digit_t* b, digit_t* c); +extern void fpsub503_asm(const digit_t* a, const digit_t* b, digit_t* c); + +// Modular negation, a = -a mod p503 +extern void fpneg503(digit_t* a); + +// Modular division by two, c = a/2 mod p503. +void fpdiv2_503(const digit_t* a, digit_t* c); + +// Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1]. +void fpcorrection503(digit_t* a); + +// 503-bit Montgomery reduction, c = a mod p +void rdc_mont(const digit_t* a, digit_t* c); + +// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p503, where R=2^768 +void fpmul503_mont(const felm_t a, const felm_t b, felm_t c); +void mul503_asm(const felm_t a, const felm_t b, dfelm_t c); +void rdc503_asm(const dfelm_t ma, dfelm_t mc); + +// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p503, where R=2^768 +void fpsqr503_mont(const felm_t ma, felm_t mc); + +// Conversion to Montgomery representation +void to_mont(const felm_t a, felm_t mc); + +// Conversion from Montgomery representation to standard representation +void from_mont(const felm_t ma, felm_t c); + +// Field inversion, a = a^-1 in GF(p503) +void fpinv503_mont(felm_t a); + +// Field inversion, a = a^-1 in GF(p503) using the binary GCD +void fpinv503_mont_bingcd(felm_t a); + +// Chain to compute (p503-3)/4 using Montgomery arithmetic +void fpinv503_chain_mont(felm_t a); + +/************ GF(p^2) arithmetic functions *************/ + +// Copy of a GF(p503^2) element, c = a +void fp2copy503(const f2elm_t *a, f2elm_t *c); + +// GF(p503^2) negation, a = -a in GF(p503^2) +void fp2neg503(f2elm_t *a); + +// GF(p503^2) addition, c = a+b in GF(p503^2) +extern void fp2add503(const f2elm_t *a, const f2elm_t *b, f2elm_t *c); + +// GF(p503^2) subtraction, c = a-b in GF(p503^2) +extern void fp2sub503(const f2elm_t *a, const f2elm_t *b, f2elm_t *c); + +// GF(p503^2) division by two, c = a/2 in GF(p503^2) +void fp2div2_503(const f2elm_t *a, f2elm_t *c); + +// Modular correction, a = a in GF(p503^2) +void fp2correction503(f2elm_t *a); + +// GF(p503^2) squaring using Montgomery arithmetic, c = a^2 in GF(p503^2) +void fp2sqr503_mont(const f2elm_t *a, f2elm_t *c); + +// GF(p503^2) multiplication using Montgomery arithmetic, c = a*b in GF(p503^2) +void fp2mul503_mont(const f2elm_t *a, const f2elm_t *b, f2elm_t *c); + +// Conversion of a GF(p503^2) element to Montgomery representation +void to_fp2mont(const f2elm_t *a, f2elm_t *mc); + +// Conversion of a GF(p503^2) element from Montgomery representation to standard representation +void from_fp2mont(const f2elm_t *ma, f2elm_t *c); + +// GF(p503^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2) +void fp2inv503_mont(f2elm_t *a); + +// GF(p503^2) inversion, a = (a0-i*a1)/(a0^2+a1^2), GF(p503) inversion done using the binary GCD +void fp2inv503_mont_bingcd(f2elm_t *a); + +// n-way Montgomery inversion +void mont_n_way_inv(const f2elm_t* vec, const int n, f2elm_t* out); + +/************ Elliptic curve and isogeny functions *************/ + +// Computes the j-invariant of a Montgomery curve with projective constant. +void j_inv(const f2elm_t *A, const f2elm_t *C, f2elm_t *jinv); + +// Simultaneous doubling and differential addition. +void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t *xPQ, const f2elm_t *A24); + +// Doubling of a Montgomery point in projective coordinates (X:Z). +void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t *A24plus, const f2elm_t *C24); + +// Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings. +void xDBLe(const point_proj_t P, point_proj_t Q, const f2elm_t *A24plus, const f2elm_t *C24, const int e); + +// Differential addition. +void xADD(point_proj_t P, const point_proj_t Q, const f2elm_t *xPQ); + +// Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4. +void get_4_isog(const point_proj_t P, f2elm_t *A24plus, f2elm_t *C24, f2elm_t* coeff); + +// Evaluates the isogeny at the point (X:Z) in the domain of the isogeny. +void eval_4_isog(point_proj_t P, f2elm_t* coeff); + +// Tripling of a Montgomery point in projective coordinates (X:Z). +void xTPL(const point_proj_t P, point_proj_t Q, const f2elm_t *A24minus, const f2elm_t *A24plus); + +// Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings. +void xTPLe(const point_proj_t P, point_proj_t Q, const f2elm_t *A24minus, const f2elm_t *A24plus, const int e); + +// Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3. +void get_3_isog(const point_proj_t P, f2elm_t *A24minus, f2elm_t *A24plus, f2elm_t* coeff); + +// Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and a point P with coefficients given in coeff. +void eval_3_isog(point_proj_t Q, const f2elm_t* coeff); + +// 3-way simultaneous inversion +void inv_3_way(f2elm_t *z1, f2elm_t *z2, f2elm_t *z3); + +// Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A. +void get_A(const f2elm_t *xP, const f2elm_t *xQ, const f2elm_t *xR, f2elm_t *A); + + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/P503_r1.c b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/P503_r1.c index 3d4a2045ab..30981cf9b6 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/P503_r1.c +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/P503_r1.c @@ -1,114 +1,114 @@ -/******************************************************************************************** -* Supersingular Isogeny Key Encapsulation Library -* -* Abstract: supersingular isogeny parameters and generation of functions for P503 -*********************************************************************************************/ - -#include "sike_r1_namespace.h" -#include "P503_internal_r1.h" - -// Encoding of field elements, elements over Z_order, elements over GF(p^2) and elliptic curve points: -// -------------------------------------------------------------------------------------------------- -// Elements over GF(p) and Z_order are encoded with the least significant octet (and digit) located at the leftmost position (i.e., little endian format). -// Elements (a+b*i) over GF(p^2), where a and b are defined over GF(p), are encoded as {a, b}, with a in the least significant position. -// Elliptic curve points P = (x,y) are encoded as {x, y}, with x in the least significant position. -// Internally, the number of digits used to represent all these elements is obtained by approximating the number of bits to the immediately greater multiple of 32. -// For example, a 503-bit field element is represented with Ceil(503 / 64) = 8 64-bit digits or Ceil(503 / 32) = 16 32-bit digits. - -// -// Curve isogeny system "SIDHp503". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p503^2), where A=0, B=1, C=1 and p503 = 2^250*3^159-1 -// - -const uint64_t p503[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xABFFFFFFFFFFFFFF, - 0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E }; -const uint64_t p503p1[NWORDS64_FIELD] = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xAC00000000000000, - 0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E }; -const uint64_t p503x2[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x57FFFFFFFFFFFFFF, - 0x2610B7B44423CF41, 0x3737ED90F6FCFB5E, 0xC08B8D7BB4EF49A0, 0x0080CDEA83023C3C }; -// Order of Alice's subgroup -const uint64_t Alice_order[NWORDS64_ORDER] = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0400000000000000 }; -// Order of Bob's subgroup -const uint64_t Bob_order[NWORDS64_ORDER] = { 0xC216F6888479E82B, 0xE6FDB21EDF9F6BC4, 0x1171AF769DE93406, 0x1019BD5060478798 }; -// Alice's generator values {XPA0 + XPA1*i, XQA0, XRA0 + XRA1*i} in GF(p503^2), expressed in Montgomery representation -const uint64_t A_gen[5*NWORDS64_FIELD] = { 0xE7EF4AA786D855AF, 0xED5758F03EB34D3B, 0x09AE172535A86AA9, 0x237B9CC07D622723, - 0xE3A284CBA4E7932D, 0x27481D9176C5E63F, 0x6A323FF55C6E71BF, 0x002ECC31A6FB8773, // XPA0 - 0x64D02E4E90A620B8, 0xDAB8128537D4B9F1, 0x4BADF77B8A228F98, 0x0F5DBDF9D1FB7D1B, - 0xBEC4DB288E1A0DCC, 0xE76A8665E80675DB, 0x6D6F252E12929463, 0x003188BD1463FACC, // XPA1 - 0xB79D41025DE85D56, 0x0B867DA9DF169686, 0x740E5368021C827D, 0x20615D72157BF25C, - 0xFF1590013C9B9F5B, 0xC884DCADE8C16CEA, 0xEBD05E53BF724E01, 0x0032FEF8FDA5748C, // XQA0 - 0x12E2E849AA0A8006, 0x41CF47008635A1E8, 0x9CD720A70798AED7, 0x42A820B42FCF04CF, - 0x7BF9BAD32AAE88B1, 0xF619127A54090BBE, 0x1CB10D8F56408EAA, 0x001D6B54C3C0EDEB, // XRA0 - 0x34DB54931CBAAC36, 0x420A18CB8DD5F0C4, 0x32008C1A48C0F44D, 0x3B3BA772B1CFD44D, - 0xA74B058FDAF13515, 0x095FC9CA7EEC17B4, 0x448E829D28F120F8, 0x00261EC3ED16A489 }; // XRA1 -// Bob's generator values {XPB0 + XPB1*i, XQB0, XRB0 + XRB1*i} in GF(p503^2), expressed in Montgomery representation -const uint64_t B_gen[5*NWORDS64_FIELD] = { 0x7EDE37F4FA0BC727, 0xF7F8EC5C8598941C, 0xD15519B516B5F5C8, 0xF6D5AC9B87A36282, - 0x7B19F105B30E952E, 0x13BD8B2025B4EBEE, 0x7B96D27F4EC579A2, 0x00140850CAB7E5DE, // XPB0 - 0x7764909DAE7B7B2D, 0x578ABB16284911AB, 0x76E2BFD146A6BF4D, 0x4824044B23AA02F0, - 0x1105048912A321F3, 0xB8A2E482CF0F10C1, 0x42FF7D0BE2152085, 0x0018E599C5223352, // XPB1 - 0x4256C520FB388820, 0x744FD7C3BAAF0A13, 0x4B6A2DDDB12CBCB8, 0xE46826E27F427DF8, - 0xFE4A663CD505A61B, 0xD6B3A1BAF025C695, 0x7C3BB62B8FCC00BD, 0x003AFDDE4A35746C, // XQB0 - 0x75601CD1E6C0DFCB, 0x1A9007239B58F93E, 0xC1F1BE80C62107AC, 0x7F513B898F29FF08, - 0xEA0BEDFF43E1F7B2, 0x2C6D94018CBAE6D0, 0x3A430D31BCD84672, 0x000D26892ECCFE83, // XRB0 - 0x1119D62AEA3007A1, 0xE3702AA4E04BAE1B, 0x9AB96F7D59F990E7, 0xF58440E8B43319C0, - 0xAF8134BEE1489775, 0xE7F7774E905192AA, 0xF54AE09308E98039, 0x001EF7A041A86112 }; // XRB1 -// Montgomery constant Montgomery_R2 = (2^512)^2 mod p503 -const uint64_t Montgomery_R2[NWORDS64_FIELD] = { 0x5289A0CF641D011F, 0x9B88257189FED2B9, 0xA3B365D58DC8F17A, 0x5BC57AB6EFF168EC, - 0x9E51998BD84D4423, 0xBF8999CBAC3B5695, 0x46E9127BCE14CDB6, 0x003F6CFCE8B81771 }; -// Value one in Montgomery representation -const uint64_t Montgomery_one[NWORDS64_FIELD] = { 0x00000000000003F9, 0x0000000000000000, 0x0000000000000000, 0xB400000000000000, - 0x63CB1A6EA6DED2B4, 0x51689D8D667EB37D, 0x8ACD77C71AB24142, 0x0026FBAEC60F5953 }; -// Value (2^256)^2 mod 3^159 -const uint64_t Montgomery_Rprime[NWORDS64_ORDER] = { 0x0C2615CA3C5BAA99, 0x5A4FF3072AB6AA6A, 0xA6AFD4B039AD6AA2, 0x010DA06A26DD05CB }; -// Value -(3^159)^-1 mod 2^256 -const uint64_t Montgomery_rprime[NWORDS64_ORDER] = { 0x49C8A87190C0697D, 0x2EB7968EA0F0A558, 0x944257B696777FA2, 0xBAA4DDCD6139D2B3 }; -// Value order_Bob/3 mod p503 -const uint64_t Border_div3[NWORDS_ORDER] = { 0xEB5CFCD82C28A2B9, 0x4CFF3B5F9FDFCE96, 0xB07B3A7CDF4DBC02, 0x055DE9C5756D2D32 }; - - -// Fixed parameters for isogeny tree computation -const unsigned int strat_Alice[MAX_Alice-1] = { -61, 32, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, -4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, -1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 29, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, -1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 13, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, -1, 1, 2, 1, 1, 5, 4, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1 }; - -const unsigned int strat_Bob[MAX_Bob-1] = { -71, 38, 21, 13, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 5, 4, 2, 1, 1, 2, 1, -1, 2, 1, 1, 1, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 17, 9, -5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1, -1, 4, 2, 1, 1, 2, 1, 1, 33, 17, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, -2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 1, 2, -1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1 }; - -// Setting up macro defines and including GF(p), GF(p^2), curve, isogeny and kex functions -#define fpcopy fpcopy503 -#define fpzero fpzero503 -#define fpadd fpadd503 -#define fpsub fpsub503 -#define fpneg fpneg503 -#define fpdiv2 fpdiv2_503 -#define fpcorrection fpcorrection503 -#define fpmul_mont fpmul503_mont -#define fpsqr_mont fpsqr503_mont -#define fpinv_mont fpinv503_mont -#define fpinv_chain_mont fpinv503_chain_mont -#define fpinv_mont_bingcd fpinv503_mont_bingcd -#define fp2copy fp2copy503 -#define fp2add fp2add503 -#define fp2sub fp2sub503 -#define fp2neg fp2neg503 -#define fp2div2 fp2div2_503 -#define fp2correction fp2correction503 -#define fp2mul_mont fp2mul503_mont -#define fp2sqr_mont fp2sqr503_mont -#define fp2inv_mont fp2inv503_mont -#define fp2inv_mont_bingcd fp2inv503_mont_bingcd -#define fpequal_non_constant_time fpequal503_non_constant_time -#define mp_add_asm mp_add503_asm -#define mp_addx2_asm mp_add503x2_asm -#define mp_subx2_asm mp_sub503x2_asm - -#include "fpx_r1.c" -#include "ec_isogeny_r1.c" -#include "sidh_r1.c" +/******************************************************************************************** +* Supersingular Isogeny Key Encapsulation Library +* +* Abstract: supersingular isogeny parameters and generation of functions for P503 +*********************************************************************************************/ + +#include "sike_r1_namespace.h" +#include "P503_internal_r1.h" + +// Encoding of field elements, elements over Z_order, elements over GF(p^2) and elliptic curve points: +// -------------------------------------------------------------------------------------------------- +// Elements over GF(p) and Z_order are encoded with the least significant octet (and digit) located at the leftmost position (i.e., little endian format). +// Elements (a+b*i) over GF(p^2), where a and b are defined over GF(p), are encoded as {a, b}, with a in the least significant position. +// Elliptic curve points P = (x,y) are encoded as {x, y}, with x in the least significant position. +// Internally, the number of digits used to represent all these elements is obtained by approximating the number of bits to the immediately greater multiple of 32. +// For example, a 503-bit field element is represented with Ceil(503 / 64) = 8 64-bit digits or Ceil(503 / 32) = 16 32-bit digits. + +// +// Curve isogeny system "SIDHp503". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p503^2), where A=0, B=1, C=1 and p503 = 2^250*3^159-1 +// + +const uint64_t p503[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xABFFFFFFFFFFFFFF, + 0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E }; +const uint64_t p503p1[NWORDS64_FIELD] = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xAC00000000000000, + 0x13085BDA2211E7A0, 0x1B9BF6C87B7E7DAF, 0x6045C6BDDA77A4D0, 0x004066F541811E1E }; +const uint64_t p503x2[NWORDS64_FIELD] = { 0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0x57FFFFFFFFFFFFFF, + 0x2610B7B44423CF41, 0x3737ED90F6FCFB5E, 0xC08B8D7BB4EF49A0, 0x0080CDEA83023C3C }; +// Order of Alice's subgroup +const uint64_t Alice_order[NWORDS64_ORDER] = { 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0400000000000000 }; +// Order of Bob's subgroup +const uint64_t Bob_order[NWORDS64_ORDER] = { 0xC216F6888479E82B, 0xE6FDB21EDF9F6BC4, 0x1171AF769DE93406, 0x1019BD5060478798 }; +// Alice's generator values {XPA0 + XPA1*i, XQA0, XRA0 + XRA1*i} in GF(p503^2), expressed in Montgomery representation +const uint64_t A_gen[5*NWORDS64_FIELD] = { 0xE7EF4AA786D855AF, 0xED5758F03EB34D3B, 0x09AE172535A86AA9, 0x237B9CC07D622723, + 0xE3A284CBA4E7932D, 0x27481D9176C5E63F, 0x6A323FF55C6E71BF, 0x002ECC31A6FB8773, // XPA0 + 0x64D02E4E90A620B8, 0xDAB8128537D4B9F1, 0x4BADF77B8A228F98, 0x0F5DBDF9D1FB7D1B, + 0xBEC4DB288E1A0DCC, 0xE76A8665E80675DB, 0x6D6F252E12929463, 0x003188BD1463FACC, // XPA1 + 0xB79D41025DE85D56, 0x0B867DA9DF169686, 0x740E5368021C827D, 0x20615D72157BF25C, + 0xFF1590013C9B9F5B, 0xC884DCADE8C16CEA, 0xEBD05E53BF724E01, 0x0032FEF8FDA5748C, // XQA0 + 0x12E2E849AA0A8006, 0x41CF47008635A1E8, 0x9CD720A70798AED7, 0x42A820B42FCF04CF, + 0x7BF9BAD32AAE88B1, 0xF619127A54090BBE, 0x1CB10D8F56408EAA, 0x001D6B54C3C0EDEB, // XRA0 + 0x34DB54931CBAAC36, 0x420A18CB8DD5F0C4, 0x32008C1A48C0F44D, 0x3B3BA772B1CFD44D, + 0xA74B058FDAF13515, 0x095FC9CA7EEC17B4, 0x448E829D28F120F8, 0x00261EC3ED16A489 }; // XRA1 +// Bob's generator values {XPB0 + XPB1*i, XQB0, XRB0 + XRB1*i} in GF(p503^2), expressed in Montgomery representation +const uint64_t B_gen[5*NWORDS64_FIELD] = { 0x7EDE37F4FA0BC727, 0xF7F8EC5C8598941C, 0xD15519B516B5F5C8, 0xF6D5AC9B87A36282, + 0x7B19F105B30E952E, 0x13BD8B2025B4EBEE, 0x7B96D27F4EC579A2, 0x00140850CAB7E5DE, // XPB0 + 0x7764909DAE7B7B2D, 0x578ABB16284911AB, 0x76E2BFD146A6BF4D, 0x4824044B23AA02F0, + 0x1105048912A321F3, 0xB8A2E482CF0F10C1, 0x42FF7D0BE2152085, 0x0018E599C5223352, // XPB1 + 0x4256C520FB388820, 0x744FD7C3BAAF0A13, 0x4B6A2DDDB12CBCB8, 0xE46826E27F427DF8, + 0xFE4A663CD505A61B, 0xD6B3A1BAF025C695, 0x7C3BB62B8FCC00BD, 0x003AFDDE4A35746C, // XQB0 + 0x75601CD1E6C0DFCB, 0x1A9007239B58F93E, 0xC1F1BE80C62107AC, 0x7F513B898F29FF08, + 0xEA0BEDFF43E1F7B2, 0x2C6D94018CBAE6D0, 0x3A430D31BCD84672, 0x000D26892ECCFE83, // XRB0 + 0x1119D62AEA3007A1, 0xE3702AA4E04BAE1B, 0x9AB96F7D59F990E7, 0xF58440E8B43319C0, + 0xAF8134BEE1489775, 0xE7F7774E905192AA, 0xF54AE09308E98039, 0x001EF7A041A86112 }; // XRB1 +// Montgomery constant Montgomery_R2 = (2^512)^2 mod p503 +const uint64_t Montgomery_R2[NWORDS64_FIELD] = { 0x5289A0CF641D011F, 0x9B88257189FED2B9, 0xA3B365D58DC8F17A, 0x5BC57AB6EFF168EC, + 0x9E51998BD84D4423, 0xBF8999CBAC3B5695, 0x46E9127BCE14CDB6, 0x003F6CFCE8B81771 }; +// Value one in Montgomery representation +const uint64_t Montgomery_one[NWORDS64_FIELD] = { 0x00000000000003F9, 0x0000000000000000, 0x0000000000000000, 0xB400000000000000, + 0x63CB1A6EA6DED2B4, 0x51689D8D667EB37D, 0x8ACD77C71AB24142, 0x0026FBAEC60F5953 }; +// Value (2^256)^2 mod 3^159 +const uint64_t Montgomery_Rprime[NWORDS64_ORDER] = { 0x0C2615CA3C5BAA99, 0x5A4FF3072AB6AA6A, 0xA6AFD4B039AD6AA2, 0x010DA06A26DD05CB }; +// Value -(3^159)^-1 mod 2^256 +const uint64_t Montgomery_rprime[NWORDS64_ORDER] = { 0x49C8A87190C0697D, 0x2EB7968EA0F0A558, 0x944257B696777FA2, 0xBAA4DDCD6139D2B3 }; +// Value order_Bob/3 mod p503 +const uint64_t Border_div3[NWORDS_ORDER] = { 0xEB5CFCD82C28A2B9, 0x4CFF3B5F9FDFCE96, 0xB07B3A7CDF4DBC02, 0x055DE9C5756D2D32 }; + + +// Fixed parameters for isogeny tree computation +const unsigned int strat_Alice[MAX_Alice-1] = { +61, 32, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, +4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, +1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 29, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, +1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 13, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, +1, 1, 2, 1, 1, 5, 4, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1 }; + +const unsigned int strat_Bob[MAX_Bob-1] = { +71, 38, 21, 13, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 5, 4, 2, 1, 1, 2, 1, +1, 2, 1, 1, 1, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 17, 9, +5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1, +1, 4, 2, 1, 1, 2, 1, 1, 33, 17, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, +2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 1, 2, +1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1 }; + +// Setting up macro defines and including GF(p), GF(p^2), curve, isogeny and kex functions +#define fpcopy fpcopy503 +#define fpzero fpzero503 +#define fpadd fpadd503 +#define fpsub fpsub503 +#define fpneg fpneg503 +#define fpdiv2 fpdiv2_503 +#define fpcorrection fpcorrection503 +#define fpmul_mont fpmul503_mont +#define fpsqr_mont fpsqr503_mont +#define fpinv_mont fpinv503_mont +#define fpinv_chain_mont fpinv503_chain_mont +#define fpinv_mont_bingcd fpinv503_mont_bingcd +#define fp2copy fp2copy503 +#define fp2add fp2add503 +#define fp2sub fp2sub503 +#define fp2neg fp2neg503 +#define fp2div2 fp2div2_503 +#define fp2correction fp2correction503 +#define fp2mul_mont fp2mul503_mont +#define fp2sqr_mont fp2sqr503_mont +#define fp2inv_mont fp2inv503_mont +#define fp2inv_mont_bingcd fp2inv503_mont_bingcd +#define fpequal_non_constant_time fpequal503_non_constant_time +#define mp_add_asm mp_add503_asm +#define mp_addx2_asm mp_add503x2_asm +#define mp_subx2_asm mp_sub503x2_asm + +#include "fpx_r1.c" +#include "ec_isogeny_r1.c" +#include "sidh_r1.c" diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/api_r1.h b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/api_r1.h index 210a90e000..cd342250b0 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/api_r1.h +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/api_r1.h @@ -1,76 +1,76 @@ -/******************************************************************************************** -* Supersingular Isogeny Key Encapsulation Library -* -* Abstract: API header file for P503 -*********************************************************************************************/ - -#ifndef __P503_API_H__ -#define __P503_API_H__ - -#include "sike_r1_namespace.h" -#include "config_r1.h" - -// Encoding of keys for KEM-based isogeny system "SIKEp503" (wire format): -// ---------------------------------------------------------------------- -// Elements over GF(p503) are encoded in 63 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). -// Elements (a+b*i) over GF(p503^2), where a and b are defined over GF(p503), are encoded as {a, b}, with a in the lowest memory portion. -// -// Private keys sk consist of the concatenation of a 24-byte random value, a value in the range [0, 2^252-1] and the public key pk. In the SIKE API, -// private keys are encoded in 434 octets in little endian format. -// Public keys pk consist of 3 elements in GF(p503^2). In the SIKE API, pk is encoded in 378 octets. -// Ciphertexts ct consist of the concatenation of a public key value and a 24-byte value. In the SIKE API, ct is encoded in 378 + 24 = 402 octets. -// Shared keys ss consist of a value of 16 octets. - - -/*********************** Ephemeral key exchange API ***********************/ - -#define SIDH_SECRETKEYBYTES 32 -#define SIDH_PUBLICKEYBYTES 378 -#define SIDH_BYTES 126 - -// SECURITY NOTE: SIDH supports ephemeral Diffie-Hellman key exchange. It is NOT secure to use it with static keys. -// See "On the Security of Supersingular Isogeny Cryptosystems", S.D. Galbraith, C. Petit, B. Shani and Y.B. Ti, in ASIACRYPT 2016, 2016. -// Extended version available at: http://eprint.iacr.org/2016/859 - -// Generation of Bob's secret key -// Outputs random value in [0, 2^Floor(Log(2,3^159)) - 1] to be used as Bob's private key -int random_mod_order_B(unsigned char* random_digits); - -// Alice's ephemeral public key generation -// Input: a private key PrivateKeyA in the range [0, 2^250 - 1], stored in 32 bytes. -// Output: the public key PublicKeyA consisting of 3 GF(p503^2) elements encoded in 378 bytes. -int EphemeralKeyGeneration_A(const digit_t* PrivateKeyA, unsigned char* PublicKeyA); - -// Bob's ephemeral key-pair generation -// It produces a private key PrivateKeyB and computes the public key PublicKeyB. -// The private key is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes. -// The public key consists of 3 GF(p503^2) elements encoded in 378 bytes. -int EphemeralKeyGeneration_B(const digit_t* PrivateKeyB, unsigned char* PublicKeyB); - -// Alice's ephemeral shared secret computation -// It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB -// Inputs: Alice's PrivateKeyA is an integer in the range [0, 2^250 - 1], stored in 32 bytes. -// Bob's PublicKeyB consists of 3 GF(p503^2) elements encoded in 378 bytes. -// Output: a shared secret SharedSecretA that consists of one element in GF(p503^2) encoded in 126 bytes. -int EphemeralSecretAgreement_A(const digit_t* PrivateKeyA, const unsigned char* PublicKeyB, unsigned char* SharedSecretA); - -// Bob's ephemeral shared secret computation -// It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA -// Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes. -// Alice's PublicKeyA consists of 3 GF(p503^2) elements encoded in 378 bytes. -// Output: a shared secret SharedSecretB that consists of one element in GF(p503^2) encoded in 126 bytes. -int EphemeralSecretAgreement_B(const digit_t* PrivateKeyB, const unsigned char* PublicKeyA, unsigned char* SharedSecretB); - - -// Encoding of keys for KEX-based isogeny system "SIDHp503" (wire format): -// ---------------------------------------------------------------------- -// Elements over GF(p503) are encoded in 63 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). -// Elements (a+b*i) over GF(p503^2), where a and b are defined over GF(p503), are encoded as {a, b}, with a in the lowest memory portion. -// -// Private keys PrivateKeyA and PrivateKeyB can have values in the range [0, 2^250-1] and [0, 2^252-1], resp. In the SIDH API, private keys are encoded -// in 32 octets in little endian format. -// Public keys PublicKeyA and PublicKeyB consist of 3 elements in GF(p503^2). In the SIDH API, they are encoded in 378 octets. -// Shared keys SharedSecretA and SharedSecretB consist of one element in GF(p503^2). In the SIDH API, they are encoded in 126 octets. - - -#endif +/******************************************************************************************** +* Supersingular Isogeny Key Encapsulation Library +* +* Abstract: API header file for P503 +*********************************************************************************************/ + +#ifndef __P503_API_H__ +#define __P503_API_H__ + +#include "sike_r1_namespace.h" +#include "config_r1.h" + +// Encoding of keys for KEM-based isogeny system "SIKEp503" (wire format): +// ---------------------------------------------------------------------- +// Elements over GF(p503) are encoded in 63 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). +// Elements (a+b*i) over GF(p503^2), where a and b are defined over GF(p503), are encoded as {a, b}, with a in the lowest memory portion. +// +// Private keys sk consist of the concatenation of a 24-byte random value, a value in the range [0, 2^252-1] and the public key pk. In the SIKE API, +// private keys are encoded in 434 octets in little endian format. +// Public keys pk consist of 3 elements in GF(p503^2). In the SIKE API, pk is encoded in 378 octets. +// Ciphertexts ct consist of the concatenation of a public key value and a 24-byte value. In the SIKE API, ct is encoded in 378 + 24 = 402 octets. +// Shared keys ss consist of a value of 16 octets. + + +/*********************** Ephemeral key exchange API ***********************/ + +#define SIDH_SECRETKEYBYTES 32 +#define SIDH_PUBLICKEYBYTES 378 +#define SIDH_BYTES 126 + +// SECURITY NOTE: SIDH supports ephemeral Diffie-Hellman key exchange. It is NOT secure to use it with static keys. +// See "On the Security of Supersingular Isogeny Cryptosystems", S.D. Galbraith, C. Petit, B. Shani and Y.B. Ti, in ASIACRYPT 2016, 2016. +// Extended version available at: http://eprint.iacr.org/2016/859 + +// Generation of Bob's secret key +// Outputs random value in [0, 2^Floor(Log(2,3^159)) - 1] to be used as Bob's private key +int random_mod_order_B(unsigned char* random_digits); + +// Alice's ephemeral public key generation +// Input: a private key PrivateKeyA in the range [0, 2^250 - 1], stored in 32 bytes. +// Output: the public key PublicKeyA consisting of 3 GF(p503^2) elements encoded in 378 bytes. +int EphemeralKeyGeneration_A(const digit_t* PrivateKeyA, unsigned char* PublicKeyA); + +// Bob's ephemeral key-pair generation +// It produces a private key PrivateKeyB and computes the public key PublicKeyB. +// The private key is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes. +// The public key consists of 3 GF(p503^2) elements encoded in 378 bytes. +int EphemeralKeyGeneration_B(const digit_t* PrivateKeyB, unsigned char* PublicKeyB); + +// Alice's ephemeral shared secret computation +// It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB +// Inputs: Alice's PrivateKeyA is an integer in the range [0, 2^250 - 1], stored in 32 bytes. +// Bob's PublicKeyB consists of 3 GF(p503^2) elements encoded in 378 bytes. +// Output: a shared secret SharedSecretA that consists of one element in GF(p503^2) encoded in 126 bytes. +int EphemeralSecretAgreement_A(const digit_t* PrivateKeyA, const unsigned char* PublicKeyB, unsigned char* SharedSecretA); + +// Bob's ephemeral shared secret computation +// It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA +// Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,3^159)) - 1], stored in 32 bytes. +// Alice's PublicKeyA consists of 3 GF(p503^2) elements encoded in 378 bytes. +// Output: a shared secret SharedSecretB that consists of one element in GF(p503^2) encoded in 126 bytes. +int EphemeralSecretAgreement_B(const digit_t* PrivateKeyB, const unsigned char* PublicKeyA, unsigned char* SharedSecretB); + + +// Encoding of keys for KEX-based isogeny system "SIDHp503" (wire format): +// ---------------------------------------------------------------------- +// Elements over GF(p503) are encoded in 63 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). +// Elements (a+b*i) over GF(p503^2), where a and b are defined over GF(p503), are encoded as {a, b}, with a in the lowest memory portion. +// +// Private keys PrivateKeyA and PrivateKeyB can have values in the range [0, 2^250-1] and [0, 2^252-1], resp. In the SIDH API, private keys are encoded +// in 32 octets in little endian format. +// Public keys PublicKeyA and PublicKeyB consist of 3 elements in GF(p503^2). In the SIDH API, they are encoded in 378 octets. +// Shared keys SharedSecretA and SharedSecretB consist of one element in GF(p503^2). In the SIDH API, they are encoded in 126 octets. + + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/config_r1.h b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/config_r1.h index f9bc5d36d0..0bd412dc4f 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/config_r1.h +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/config_r1.h @@ -1,146 +1,146 @@ -/******************************************************************************************** -* Supersingular Isogeny Key Encapsulation Library -* -* Abstract: configuration file and platform-dependent macros -*********************************************************************************************/ - -#ifndef __CONFIG_H__ -#define __CONFIG_H__ - -#include "sike_r1_namespace.h" -#include <stdint.h> -#include <stdbool.h> -#include <stddef.h> - -// Definition of compiler - -#define COMPILER_GCC 1 -#define COMPILER_CLANG 2 - -#if defined(__GNUC__) // GNU GCC compiler - #define COMPILER COMPILER_GCC -#elif defined(__clang__) // Clang compiler - #define COMPILER COMPILER_CLANG -#else - #error -- "Unsupported COMPILER" -#endif - -#define _AMD64_ - -// Definition of the targeted architecture and basic data types - -#define TARGET_AMD64 1 -#define TARGET_x86 2 -#define TARGET_ARM 3 -#define TARGET_ARM64 4 - -#if defined(_AMD64_) - #define TARGET TARGET_AMD64 - #define RADIX 64 - #define LOG2RADIX 6 - typedef uint64_t digit_t; // Unsigned 64-bit digit -#elif defined(_X86_) - #define TARGET TARGET_x86 - #define RADIX 32 - #define LOG2RADIX 5 - typedef uint32_t digit_t; // Unsigned 32-bit digit -#elif defined(_ARM_) - #define TARGET TARGET_ARM - #define RADIX 32 - #define LOG2RADIX 5 - typedef uint32_t digit_t; // Unsigned 32-bit digit -#elif defined(_ARM64_) - #define TARGET TARGET_ARM64 - #define RADIX 64 - #define LOG2RADIX 6 - typedef uint64_t digit_t; // Unsigned 64-bit digit -#else - #error -- "Unsupported ARCHITECTURE" -#endif - -#define RADIX64 64 - - -// Selection of implementation: optimized_generic - -#if defined(_OPTIMIZED_GENERIC_) - #define OPTIMIZED_GENERIC_IMPLEMENTATION -#endif - - -// Extended datatype support - -typedef uint64_t uint128_t[2]; - - -// Macro definitions - -#define NBITS_TO_NBYTES(nbits) (((nbits)+7)/8) // Conversion macro from number of bits to number of bytes -#define NBITS_TO_NWORDS(nbits) (((nbits)+(sizeof(digit_t)*8)-1)/(sizeof(digit_t)*8)) // Conversion macro from number of bits to number of computer words -#define NBYTES_TO_NWORDS(nbytes) (((nbytes)+sizeof(digit_t)-1)/sizeof(digit_t)) // Conversion macro from number of bytes to number of computer words - -// Macro to avoid compiler warnings when detecting unreferenced parameters -#define UNREFERENCED_PARAMETER(PAR) ((void)(PAR)) - - -/********************** Constant-time unsigned comparisons ***********************/ - -// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise - -static __inline unsigned int is_digit_nonzero_ct(digit_t x) -{ // Is x != 0? - return (unsigned int)((x | (0-x)) >> (RADIX-1)); -} - -static __inline unsigned int is_digit_zero_ct(digit_t x) -{ // Is x = 0? - return (unsigned int)(1 ^ is_digit_nonzero_ct(x)); -} - -static __inline unsigned int is_digit_lessthan_ct(digit_t x, digit_t y) -{ // Is x < y? - return (unsigned int)((x ^ ((x ^ y) | ((x - y) ^ y))) >> (RADIX-1)); -} - - -/********************** Macros for platform-dependent operations **********************/ - -// Digit multiplication -#define MUL(multiplier, multiplicand, hi, lo) \ - digit_x_digit((multiplier), (multiplicand), &(lo)); - -// Digit addition with carry -#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \ - { digit_t tempReg = (addend1) + (digit_t)(carryIn); \ - (sumOut) = (addend2) + tempReg; \ - (carryOut) = (is_digit_lessthan_ct(tempReg, (digit_t)(carryIn)) | is_digit_lessthan_ct((sumOut), tempReg)); } - -// Digit subtraction with borrow -#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \ - { digit_t tempReg = (minuend) - (subtrahend); \ - unsigned int borrowReg = (is_digit_lessthan_ct((minuend), (subtrahend)) | ((borrowIn) & is_digit_zero_ct(tempReg))); \ - (differenceOut) = tempReg - (digit_t)(borrowIn); \ - (borrowOut) = borrowReg; } - -// Shift right with flexible datatype -#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \ - (shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (DigitSize - (shift))); - -// Shift left with flexible datatype -#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \ - (shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (DigitSize - (shift))); - -// 64x64-bit multiplication -#define MUL128(multiplier, multiplicand, product) \ - mp_mul((digit_t*)&(multiplier), (digit_t*)&(multiplicand), (digit_t*)&(product), NWORDS_FIELD/2); - -// 128-bit addition, inputs < 2^127 -#define ADD128(addend1, addend2, addition) \ - mp_add((digit_t*)(addend1), (digit_t*)(addend2), (digit_t*)(addition), NWORDS_FIELD); - -// 128-bit addition with output carry -#define ADC128(addend1, addend2, carry, addition) \ - (carry) = mp_add((digit_t*)(addend1), (digit_t*)(addend2), (digit_t*)(addition), NWORDS_FIELD); - - -#endif +/******************************************************************************************** +* Supersingular Isogeny Key Encapsulation Library +* +* Abstract: configuration file and platform-dependent macros +*********************************************************************************************/ + +#ifndef __CONFIG_H__ +#define __CONFIG_H__ + +#include "sike_r1_namespace.h" +#include <stdint.h> +#include <stdbool.h> +#include <stddef.h> + +// Definition of compiler + +#define COMPILER_GCC 1 +#define COMPILER_CLANG 2 + +#if defined(__GNUC__) // GNU GCC compiler + #define COMPILER COMPILER_GCC +#elif defined(__clang__) // Clang compiler + #define COMPILER COMPILER_CLANG +#else + #error -- "Unsupported COMPILER" +#endif + +#define _AMD64_ + +// Definition of the targeted architecture and basic data types + +#define TARGET_AMD64 1 +#define TARGET_x86 2 +#define TARGET_ARM 3 +#define TARGET_ARM64 4 + +#if defined(_AMD64_) + #define TARGET TARGET_AMD64 + #define RADIX 64 + #define LOG2RADIX 6 + typedef uint64_t digit_t; // Unsigned 64-bit digit +#elif defined(_X86_) + #define TARGET TARGET_x86 + #define RADIX 32 + #define LOG2RADIX 5 + typedef uint32_t digit_t; // Unsigned 32-bit digit +#elif defined(_ARM_) + #define TARGET TARGET_ARM + #define RADIX 32 + #define LOG2RADIX 5 + typedef uint32_t digit_t; // Unsigned 32-bit digit +#elif defined(_ARM64_) + #define TARGET TARGET_ARM64 + #define RADIX 64 + #define LOG2RADIX 6 + typedef uint64_t digit_t; // Unsigned 64-bit digit +#else + #error -- "Unsupported ARCHITECTURE" +#endif + +#define RADIX64 64 + + +// Selection of implementation: optimized_generic + +#if defined(_OPTIMIZED_GENERIC_) + #define OPTIMIZED_GENERIC_IMPLEMENTATION +#endif + + +// Extended datatype support + +typedef uint64_t uint128_t[2]; + + +// Macro definitions + +#define NBITS_TO_NBYTES(nbits) (((nbits)+7)/8) // Conversion macro from number of bits to number of bytes +#define NBITS_TO_NWORDS(nbits) (((nbits)+(sizeof(digit_t)*8)-1)/(sizeof(digit_t)*8)) // Conversion macro from number of bits to number of computer words +#define NBYTES_TO_NWORDS(nbytes) (((nbytes)+sizeof(digit_t)-1)/sizeof(digit_t)) // Conversion macro from number of bytes to number of computer words + +// Macro to avoid compiler warnings when detecting unreferenced parameters +#define UNREFERENCED_PARAMETER(PAR) ((void)(PAR)) + + +/********************** Constant-time unsigned comparisons ***********************/ + +// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise + +static __inline unsigned int is_digit_nonzero_ct(digit_t x) +{ // Is x != 0? + return (unsigned int)((x | (0-x)) >> (RADIX-1)); +} + +static __inline unsigned int is_digit_zero_ct(digit_t x) +{ // Is x = 0? + return (unsigned int)(1 ^ is_digit_nonzero_ct(x)); +} + +static __inline unsigned int is_digit_lessthan_ct(digit_t x, digit_t y) +{ // Is x < y? + return (unsigned int)((x ^ ((x ^ y) | ((x - y) ^ y))) >> (RADIX-1)); +} + + +/********************** Macros for platform-dependent operations **********************/ + +// Digit multiplication +#define MUL(multiplier, multiplicand, hi, lo) \ + digit_x_digit((multiplier), (multiplicand), &(lo)); + +// Digit addition with carry +#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \ + { digit_t tempReg = (addend1) + (digit_t)(carryIn); \ + (sumOut) = (addend2) + tempReg; \ + (carryOut) = (is_digit_lessthan_ct(tempReg, (digit_t)(carryIn)) | is_digit_lessthan_ct((sumOut), tempReg)); } + +// Digit subtraction with borrow +#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \ + { digit_t tempReg = (minuend) - (subtrahend); \ + unsigned int borrowReg = (is_digit_lessthan_ct((minuend), (subtrahend)) | ((borrowIn) & is_digit_zero_ct(tempReg))); \ + (differenceOut) = tempReg - (digit_t)(borrowIn); \ + (borrowOut) = borrowReg; } + +// Shift right with flexible datatype +#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (DigitSize - (shift))); + +// Shift left with flexible datatype +#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (DigitSize - (shift))); + +// 64x64-bit multiplication +#define MUL128(multiplier, multiplicand, product) \ + mp_mul((digit_t*)&(multiplier), (digit_t*)&(multiplicand), (digit_t*)&(product), NWORDS_FIELD/2); + +// 128-bit addition, inputs < 2^127 +#define ADD128(addend1, addend2, addition) \ + mp_add((digit_t*)(addend1), (digit_t*)(addend2), (digit_t*)(addition), NWORDS_FIELD); + +// 128-bit addition with output carry +#define ADC128(addend1, addend2, carry, addition) \ + (carry) = mp_add((digit_t*)(addend1), (digit_t*)(addend2), (digit_t*)(addition), NWORDS_FIELD); + + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/ec_isogeny_r1.c b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/ec_isogeny_r1.c index b83e7a3ae3..670a1490ea 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/ec_isogeny_r1.c +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/ec_isogeny_r1.c @@ -1,346 +1,346 @@ -/******************************************************************************************** -* Supersingular Isogeny Key Encapsulation Library -* -* Abstract: elliptic curve and isogeny functions -*********************************************************************************************/ - -#include "sike_r1_namespace.h" -#include "P503_internal_r1.h" - -void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t *A24plus, const f2elm_t *C24) -{ // Doubling of a Montgomery point in projective coordinates (X:Z). - // Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constants A+2C and 4C. - // Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2). - f2elm_t _t0, _t1; - f2elm_t *t0=&_t0, *t1=&_t1; - - fp2sub(&P->X, &P->Z, t0); // t0 = X1-Z1 - fp2add(&P->X, &P->Z, t1); // t1 = X1+Z1 - fp2sqr_mont(t0, t0); // t0 = (X1-Z1)^2 - fp2sqr_mont(t1, t1); // t1 = (X1+Z1)^2 - fp2mul_mont(C24, t0, &Q->Z); // Z2 = C24*(X1-Z1)^2 - fp2mul_mont(t1, &Q->Z, &Q->X); // X2 = C24*(X1-Z1)^2*(X1+Z1)^2 - fp2sub(t1, t0, t1); // t1 = (X1+Z1)^2-(X1-Z1)^2 - fp2mul_mont(A24plus, t1, t0); // t0 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] - fp2add(&Q->Z, t0, &Q->Z); // Z2 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2 - fp2mul_mont(&Q->Z, t1, &Q->Z); // Z2 = [A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2] -} - - -void xDBLe(const point_proj_t P, point_proj_t Q, const f2elm_t *A24plus, const f2elm_t *C24, const int e) -{ // Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings. - // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A+2C and 4C. - // Output: projective Montgomery x-coordinates Q <- (2^e)*P. - int i; - - copy_words((const digit_t*)P, (digit_t*)Q, 2*2*NWORDS_FIELD); - - for (i = 0; i < e; i++) { - xDBL(Q, Q, A24plus, C24); - } -} - - -void get_4_isog(const point_proj_t P, f2elm_t *A24plus, f2elm_t *C24, f2elm_t* coeff) -{ // Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4. - // Input: projective point of order four P = (X4:Z4). - // Output: the 4-isogenous Montgomery curve with projective coefficients A+2C/4C and the 3 coefficients - // that are used to evaluate the isogeny at a point in eval_4_isog(). - - fp2sub(&P->X, &P->Z, &coeff[1]); // coeff[1] = X4-Z4 - fp2add(&P->X, &P->Z, &coeff[2]); // coeff[2] = X4+Z4 - fp2sqr_mont(&P->Z, &coeff[0]); // coeff[0] = Z4^2 - fp2add(&coeff[0], &coeff[0], &coeff[0]); // coeff[0] = 2*Z4^2 - fp2sqr_mont(&coeff[0], C24); // C24 = 4*Z4^4 - fp2add(&coeff[0], &coeff[0], &coeff[0]); // coeff[0] = 4*Z4^2 - fp2sqr_mont(&P->X, A24plus); // A24plus = X4^2 - fp2add(A24plus, A24plus, A24plus); // A24plus = 2*X4^2 - fp2sqr_mont(A24plus, A24plus); // A24plus = 4*X4^4 -} - - -void eval_4_isog(point_proj_t P, f2elm_t* coeff) -{ // Evaluates the isogeny at the point (X:Z) in the domain of the isogeny, given a 4-isogeny phi defined - // by the 3 coefficients in coeff (computed in the function get_4_isog()). - // Inputs: the coefficients defining the isogeny, and the projective point P = (X:Z). - // Output: the projective point P = phi(P) = (X:Z) in the codomain. - f2elm_t _t0, _t1; - f2elm_t *t0=&_t0, *t1=&_t1; - - fp2add(&P->X, &P->Z, t0); // t0 = X+Z - fp2sub(&P->X, &P->Z, t1); // t1 = X-Z - fp2mul_mont(t0, &coeff[1], &P->X); // X = (X+Z)*coeff[1] - fp2mul_mont(t1, &coeff[2], &P->Z); // Z = (X-Z)*coeff[2] - fp2mul_mont(t0, t1, t0); // t0 = (X+Z)*(X-Z) - fp2mul_mont(t0, &coeff[0], t0); // t0 = coeff[0]*(X+Z)*(X-Z) - fp2add(&P->X, &P->Z, t1); // t1 = (X-Z)*coeff[2] + (X+Z)*coeff[1] - fp2sub(&P->X, &P->Z, &P->Z); // Z = (X-Z)*coeff[2] - (X+Z)*coeff[1] - fp2sqr_mont(t1, t1); // t1 = [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2 - fp2sqr_mont(&P->Z, &P->Z); // Z = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 - fp2add(t1, t0, &P->X); // X = coeff[0]*(X+Z)*(X-Z) + [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2 - fp2sub(&P->Z, t0, t0); // t0 = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 - coeff[0]*(X+Z)*(X-Z) - fp2mul_mont(&P->X, t1, &P->X); // Xfinal - fp2mul_mont(&P->Z, t0, &P->Z); // Zfinal -} - - -void xTPL(const point_proj_t P, point_proj_t Q, const f2elm_t *A24minus, const f2elm_t *A24plus) -{ // Tripling of a Montgomery point in projective coordinates (X:Z). - // Input: projective Montgomery x-coordinates P = (X:Z), where x=X/Z and Montgomery curve constants A24plus = A+2C and A24minus = A-2C. - // Output: projective Montgomery x-coordinates Q = 3*P = (X3:Z3). - f2elm_t _t0, _t1, _t2, _t3, _t4, _t5, _t6; - f2elm_t *t0=&_t0, *t1=&_t1, *t2=&_t2 ; - f2elm_t *t3=&_t3, *t4=&_t4, *t5=&_t5, *t6=&_t6; - - fp2sub(&P->X, &P->Z, t0); // t0 = X-Z - fp2sqr_mont(t0, t2); // t2 = (X-Z)^2 - fp2add(&P->X, &P->Z, t1); // t1 = X+Z - fp2sqr_mont(t1, t3); // t3 = (X+Z)^2 - fp2add(t0, t1, t4); // t4 = 2*X - fp2sub(t1, t0, t0); // t0 = 2*Z - fp2sqr_mont(t4, t1); // t1 = 4*X^2 - fp2sub(t1, t3, t1); // t1 = 4*X^2 - (X+Z)^2 - fp2sub(t1, t2, t1); // t1 = 4*X^2 - (X+Z)^2 - (X-Z)^2 - fp2mul_mont(t3, A24plus, t5); // t5 = A24plus*(X+Z)^2 - fp2mul_mont(t3, t5, t3); // t3 = A24plus*(X+Z)^3 - fp2mul_mont(A24minus, t2, t6); // t6 = A24minus*(X-Z)^2 - fp2mul_mont(t2, t6, t2); // t2 = A24minus*(X-Z)^3 - fp2sub(t2, t3, t3); // t3 = A24minus*(X-Z)^3 - coeff*(X+Z)^3 - fp2sub(t5, t6, t2); // t2 = A24plus*(X+Z)^2 - A24minus*(X-Z)^2 - fp2mul_mont(t1, t2, t1); // t1 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] - fp2add(t3, t1, t2); // t2 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + A24minus*(X-Z)^3 - coeff*(X+Z)^3 - fp2sqr_mont(t2, t2); // t2 = t2^2 - fp2mul_mont(t4, t2, &Q->X); // X3 = 2*X*t2 - fp2sub(t3, t1, t1); // t1 = A24minus*(X-Z)^3 - A24plus*(X+Z)^3 - [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] - fp2sqr_mont(t1, t1); // t1 = t1^2 - fp2mul_mont(t0, t1, &Q->Z); // Z3 = 2*Z*t1 -} - - -void xTPLe(const point_proj_t P, point_proj_t Q, const f2elm_t *A24minus, const f2elm_t *A24plus, const int e) -{ // Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings. - // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A24plus = A+2C and A24minus = A-2C. - // Output: projective Montgomery x-coordinates Q <- (3^e)*P. - int i; - - copy_words((const digit_t*)P, (digit_t*)Q, 2*2*NWORDS_FIELD); - - for (i = 0; i < e; i++) { - xTPL(Q, Q, A24minus, A24plus); - } -} - - -void get_3_isog(const point_proj_t P, f2elm_t *A24minus, f2elm_t *A24plus, f2elm_t* coeff) -{ // Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3. - // Input: projective point of order three P = (X3:Z3). - // Output: the 3-isogenous Montgomery curve with projective coefficient A/C. - f2elm_t _t0, _t1, _t2, _t3, _t4; - f2elm_t *t0=&_t0, *t1=&_t1, *t2=&_t2 ; - f2elm_t *t3=&_t3, *t4=&_t4 ; - - fp2sub(&P->X, &P->Z, &coeff[0]); // coeff0 = X-Z - fp2sqr_mont(&coeff[0], t0); // t0 = (X-Z)^2 - fp2add(&P->X, &P->Z, &coeff[1]); // coeff1 = X+Z - fp2sqr_mont(&coeff[1], t1); // t1 = (X+Z)^2 - fp2add(t0, t1, t2); // t2 = (X+Z)^2 + (X-Z)^2 - fp2add(&coeff[0], &coeff[1], t3); // t3 = 2*X - fp2sqr_mont(t3, t3); // t3 = 4*X^2 - fp2sub(t3, t2, t3); // t3 = 4*X^2 - (X+Z)^2 - (X-Z)^2 - fp2add(t1, t3, t2); // t2 = 4*X^2 - (X-Z)^2 - fp2add(t3, t0, t3); // t3 = 4*X^2 - (X+Z)^2 - fp2add(t0, t3, t4); // t4 = 4*X^2 - (X+Z)^2 + (X-Z)^2 - fp2add(t4, t4, t4); // t4 = 2(4*X^2 - (X+Z)^2 + (X-Z)^2) - fp2add(t1, t4, t4); // t4 = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2 - fp2mul_mont(t2, t4, A24minus); // A24minus = [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2] - fp2add(t1, t2, t4); // t4 = 4*X^2 + (X+Z)^2 - (X-Z)^2 - fp2add(t4, t4, t4); // t4 = 2(4*X^2 + (X+Z)^2 - (X-Z)^2) - fp2add(t0, t4, t4); // t4 = 8*X^2 + 2*(X+Z)^2 - (X-Z)^2 - fp2mul_mont(t3, t4, t4); // t4 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2] - fp2sub(t4, A24minus, t0); // t0 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2] - [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2] - fp2add(A24minus, t0, A24plus); // A24plus = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2 -} - - -void eval_3_isog(point_proj_t Q, const f2elm_t* coeff) -{ // Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and - // a point P with 2 coefficients in coeff (computed in the function get_3_isog()). - // Inputs: projective points P = (X3:Z3) and Q = (X:Z). - // Output: the projective point Q <- phi(Q) = (X3:Z3). - f2elm_t _t0, _t1, _t2; - f2elm_t *t0=&_t0, *t1=&_t1, *t2=&_t2 ; - - fp2add(&Q->X, &Q->Z, t0); // t0 = X+Z - fp2sub(&Q->X, &Q->Z, t1); // t1 = X-Z - fp2mul_mont(t0, &coeff[0], t0); // t0 = coeff0*(X+Z) - fp2mul_mont(t1, &coeff[1], t1); // t1 = coeff1*(X-Z) - fp2add(t0, t1, t2); // t2 = coeff0*(X-Z) + coeff1*(X+Z) - fp2sub(t1, t0, t0); // t0 = coeff0*(X-Z) - coeff1*(X+Z) - fp2sqr_mont(t2, t2); // t2 = [coeff0*(X-Z) + coeff1*(X+Z)]^2 - fp2sqr_mont(t0, t0); // t1 = [coeff0*(X-Z) - coeff1*(X+Z)]^2 - fp2mul_mont(&Q->X, t2, &Q->X); // X3final = X*[coeff0*(X-Z) + coeff1*(X+Z)]^2 - fp2mul_mont(&Q->Z, t0, &Q->Z); // Z3final = Z*[coeff0*(X-Z) - coeff1*(X+Z)]^2 -} - - -void inv_3_way(f2elm_t *z1, f2elm_t *z2, f2elm_t *z3) -{ // 3-way simultaneous inversion - // Input: z1,z2,z3 - // Output: 1/z1,1/z2,1/z3 (override inputs). - f2elm_t _t0, _t1, _t2, _t3; - f2elm_t *t0=&_t0, *t1=&_t1; - f2elm_t *t2=&_t2, *t3=&_t3; - - fp2mul_mont(z1, z2, t0); // t0 = z1*z2 - fp2mul_mont(z3, t0, t1); // t1 = z1*z2*z3 - fp2inv_mont(t1); // t1 = 1/(z1*z2*z3) - fp2mul_mont(z3, t1, t2); // t2 = 1/(z1*z2) - fp2mul_mont(t2, z2, t3); // t3 = 1/z1 - fp2mul_mont(t2, z1, z2); // z2 = 1/z2 - fp2mul_mont(t0, t1, z3); // z3 = 1/z3 - fp2copy(t3, z1); // z1 = 1/z1 -} - - -void get_A(const f2elm_t *xP, const f2elm_t *xQ, const f2elm_t *xR, f2elm_t *A) -{ // Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A. - // Input: the x-coordinates xP, xQ, and xR of the points P, Q and R. - // Output: the coefficient A corresponding to the curve E_A: y^2=x^3+A*x^2+x. - f2elm_t _t0, _t1, one = {0}; - f2elm_t *t0=&_t0, *t1=&_t1; - - fpcopy((const digit_t*)&Montgomery_one, one.e[0]); - fp2add(xP, xQ, t1); // t1 = xP+xQ - fp2mul_mont(xP, xQ, t0); // t0 = xP*xQ - fp2mul_mont(xR, t1, A); // A = xR*t1 - fp2add(t0, A, A); // A = A+t0 - fp2mul_mont(t0, xR, t0); // t0 = t0*xR - fp2sub(A, &one, A); // A = A-1 - fp2add(t0, t0, t0); // t0 = t0+t0 - fp2add(t1, xR, t1); // t1 = t1+xR - fp2add(t0, t0, t0); // t0 = t0+t0 - fp2sqr_mont(A, A); // A = A^2 - fp2inv_mont(t0); // t0 = 1/t0 - fp2mul_mont(A, t0, A); // A = A*t0 - fp2sub(A, t1, A); // Afinal = A-t1 -} - - -void j_inv(const f2elm_t *A, const f2elm_t *C, f2elm_t *jinv) -{ // Computes the j-invariant of a Montgomery curve with projective constant. - // Input: A,C in GF(p^2). - // Output: j=256*(A^2-3*C^2)^3/(C^4*(A^2-4*C^2)), which is the j-invariant of the Montgomery curve B*y^2=x^3+(A/C)*x^2+x or (equivalently) j-invariant of B'*y^2=C*x^3+A*x^2+C*x. - f2elm_t _t0, _t1; - f2elm_t *t0=&_t0, *t1=&_t1; - - fp2sqr_mont(A, jinv); // jinv = A^2 - fp2sqr_mont(C, t1); // t1 = C^2 - fp2add(t1, t1, t0); // t0 = t1+t1 - fp2sub(jinv, t0, t0); // t0 = jinv-t0 - fp2sub(t0, t1, t0); // t0 = t0-t1 - fp2sub(t0, t1, jinv); // jinv = t0-t1 - fp2sqr_mont(t1, t1); // t1 = t1^2 - fp2mul_mont(jinv, t1, jinv); // jinv = jinv*t1 - fp2add(t0, t0, t0); // t0 = t0+t0 - fp2add(t0, t0, t0); // t0 = t0+t0 - fp2sqr_mont(t0, t1); // t1 = t0^2 - fp2mul_mont(t0, t1, t0); // t0 = t0*t1 - fp2add(t0, t0, t0); // t0 = t0+t0 - fp2add(t0, t0, t0); // t0 = t0+t0 - fp2inv_mont(jinv); // jinv = 1/jinv - fp2mul_mont(jinv, t0, jinv); // jinv = t0*jinv -} - - -void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t *xPQ, const f2elm_t *A24) -{ // Simultaneous doubling and differential addition. - // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4. - // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP. - f2elm_t _t0, _t1, _t2; - f2elm_t *t0=&_t0, *t1=&_t1, *t2=&_t2; - - fp2add(&P->X, &P->Z, t0); // t0 = XP+ZP - fp2sub(&P->X, &P->Z, t1); // t1 = XP-ZP - fp2sqr_mont(t0, &P->X); // XP = (XP+ZP)^2 - fp2sub(&Q->X, &Q->Z, t2); // t2 = XQ-ZQ - fp2correction(t2); - fp2add(&Q->X,&Q->Z, &Q->X); // XQ = XQ+ZQ - fp2mul_mont(t0, t2, t0); // t0 = (XP+ZP)*(XQ-ZQ) - fp2sqr_mont(t1, &P->Z); // ZP = (XP-ZP)^2 - fp2mul_mont(t1, &Q->X, t1); // t1 = (XP-ZP)*(XQ+ZQ) - fp2sub(&P->X, &P->Z, t2); // t2 = (XP+ZP)^2-(XP-ZP)^2 - fp2mul_mont(&P->X, &P->Z, &P->X); // XP = (XP+ZP)^2*(XP-ZP)^2 - fp2mul_mont(t2, A24, &Q->X); // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2] - fp2sub(t0, t1, &Q->Z); // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ) - fp2add(&Q->X, &P->Z, &P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2 - fp2add(t0, t1, &Q->X); // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ) - fp2mul_mont(&P->Z, t2, &P->Z); // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2] - fp2sqr_mont(&Q->Z, &Q->Z); // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 - fp2sqr_mont(&Q->X, &Q->X); // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 - fp2mul_mont(&Q->Z, xPQ, &Q->Z); // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 -} - - -static void swap_points(point_proj_t P, point_proj_t Q, const digit_t option) -{ // Swap points. - // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P - unsigned int i; - - for (i = 0; i < NWORDS_FIELD; i++) { - digit_t temp = option & (P->X.e[0][i] ^ Q->X.e[0][i]); - P->X.e[0][i] = temp ^ P->X.e[0][i]; - Q->X.e[0][i] = temp ^ Q->X.e[0][i]; - temp = option & (P->Z.e[0][i] ^ Q->Z.e[0][i]); - P->Z.e[0][i] = temp ^ P->Z.e[0][i]; - Q->Z.e[0][i] = temp ^ Q->Z.e[0][i]; - temp = option & (P->X.e[1][i] ^ Q->X.e[1][i]); - P->X.e[1][i] = temp ^ P->X.e[1][i]; - Q->X.e[1][i] = temp ^ Q->X.e[1][i]; - temp = option & (P->Z.e[1][i] ^ Q->Z.e[1][i]); - P->Z.e[1][i] = temp ^ P->Z.e[1][i]; - Q->Z.e[1][i] = temp ^ Q->Z.e[1][i]; - } -} - - -static void LADDER3PT(const f2elm_t *xP, const f2elm_t *xQ, const f2elm_t *xPQ, const digit_t* m, const unsigned int AliceOrBob, point_proj_t R, const f2elm_t *A) -{ - point_proj_t R0 = {0}, R2 = {0}; - f2elm_t _A24 = {0}; - f2elm_t *A24=&_A24; - int i, nbits, prevbit = 0; - - if (AliceOrBob == ALICE) { - nbits = OALICE_BITS; - } else { - nbits = OBOB_BITS; - } - - // Initializing constant - fpcopy((const digit_t*)&Montgomery_one, A24->e[0]); - fp2add(A24, A24, A24); - fp2add(A, A24, A24); - fp2div2(A24, A24); - fp2div2(A24, A24); // A24 = (A+2)/4 - - // Initializing points - fp2copy(xQ, &R0->X); - fpcopy((const digit_t*)&Montgomery_one, (digit_t*)R0->Z.e); - fp2copy(xPQ, &R2->X); - fpcopy((const digit_t*)&Montgomery_one, (digit_t*)R2->Z.e); - fp2copy(xP, &R->X); - fpcopy((const digit_t*)&Montgomery_one, (digit_t*)R->Z.e); - fpzero((digit_t*)(R->Z.e)[1]); - - // Main loop - for (i = 0; i < nbits; i++) { - int bit = (m[i >> LOG2RADIX] >> (i & (RADIX-1))) & 1; - int swap = bit ^ prevbit; - prevbit = bit; - digit_t mask = 0 - (digit_t)swap; - - swap_points(R, R2, mask); - xDBLADD(R0, R2, &R->X, A24); - fp2mul_mont(&R2->X, &R->Z, &R2->X); - } -} +/******************************************************************************************** +* Supersingular Isogeny Key Encapsulation Library +* +* Abstract: elliptic curve and isogeny functions +*********************************************************************************************/ + +#include "sike_r1_namespace.h" +#include "P503_internal_r1.h" + +void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t *A24plus, const f2elm_t *C24) +{ // Doubling of a Montgomery point in projective coordinates (X:Z). + // Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constants A+2C and 4C. + // Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2). + f2elm_t _t0, _t1; + f2elm_t *t0=&_t0, *t1=&_t1; + + fp2sub(&P->X, &P->Z, t0); // t0 = X1-Z1 + fp2add(&P->X, &P->Z, t1); // t1 = X1+Z1 + fp2sqr_mont(t0, t0); // t0 = (X1-Z1)^2 + fp2sqr_mont(t1, t1); // t1 = (X1+Z1)^2 + fp2mul_mont(C24, t0, &Q->Z); // Z2 = C24*(X1-Z1)^2 + fp2mul_mont(t1, &Q->Z, &Q->X); // X2 = C24*(X1-Z1)^2*(X1+Z1)^2 + fp2sub(t1, t0, t1); // t1 = (X1+Z1)^2-(X1-Z1)^2 + fp2mul_mont(A24plus, t1, t0); // t0 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] + fp2add(&Q->Z, t0, &Q->Z); // Z2 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2 + fp2mul_mont(&Q->Z, t1, &Q->Z); // Z2 = [A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2] +} + + +void xDBLe(const point_proj_t P, point_proj_t Q, const f2elm_t *A24plus, const f2elm_t *C24, const int e) +{ // Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings. + // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A+2C and 4C. + // Output: projective Montgomery x-coordinates Q <- (2^e)*P. + int i; + + copy_words((const digit_t*)P, (digit_t*)Q, 2*2*NWORDS_FIELD); + + for (i = 0; i < e; i++) { + xDBL(Q, Q, A24plus, C24); + } +} + + +void get_4_isog(const point_proj_t P, f2elm_t *A24plus, f2elm_t *C24, f2elm_t* coeff) +{ // Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4. + // Input: projective point of order four P = (X4:Z4). + // Output: the 4-isogenous Montgomery curve with projective coefficients A+2C/4C and the 3 coefficients + // that are used to evaluate the isogeny at a point in eval_4_isog(). + + fp2sub(&P->X, &P->Z, &coeff[1]); // coeff[1] = X4-Z4 + fp2add(&P->X, &P->Z, &coeff[2]); // coeff[2] = X4+Z4 + fp2sqr_mont(&P->Z, &coeff[0]); // coeff[0] = Z4^2 + fp2add(&coeff[0], &coeff[0], &coeff[0]); // coeff[0] = 2*Z4^2 + fp2sqr_mont(&coeff[0], C24); // C24 = 4*Z4^4 + fp2add(&coeff[0], &coeff[0], &coeff[0]); // coeff[0] = 4*Z4^2 + fp2sqr_mont(&P->X, A24plus); // A24plus = X4^2 + fp2add(A24plus, A24plus, A24plus); // A24plus = 2*X4^2 + fp2sqr_mont(A24plus, A24plus); // A24plus = 4*X4^4 +} + + +void eval_4_isog(point_proj_t P, f2elm_t* coeff) +{ // Evaluates the isogeny at the point (X:Z) in the domain of the isogeny, given a 4-isogeny phi defined + // by the 3 coefficients in coeff (computed in the function get_4_isog()). + // Inputs: the coefficients defining the isogeny, and the projective point P = (X:Z). + // Output: the projective point P = phi(P) = (X:Z) in the codomain. + f2elm_t _t0, _t1; + f2elm_t *t0=&_t0, *t1=&_t1; + + fp2add(&P->X, &P->Z, t0); // t0 = X+Z + fp2sub(&P->X, &P->Z, t1); // t1 = X-Z + fp2mul_mont(t0, &coeff[1], &P->X); // X = (X+Z)*coeff[1] + fp2mul_mont(t1, &coeff[2], &P->Z); // Z = (X-Z)*coeff[2] + fp2mul_mont(t0, t1, t0); // t0 = (X+Z)*(X-Z) + fp2mul_mont(t0, &coeff[0], t0); // t0 = coeff[0]*(X+Z)*(X-Z) + fp2add(&P->X, &P->Z, t1); // t1 = (X-Z)*coeff[2] + (X+Z)*coeff[1] + fp2sub(&P->X, &P->Z, &P->Z); // Z = (X-Z)*coeff[2] - (X+Z)*coeff[1] + fp2sqr_mont(t1, t1); // t1 = [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2 + fp2sqr_mont(&P->Z, &P->Z); // Z = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 + fp2add(t1, t0, &P->X); // X = coeff[0]*(X+Z)*(X-Z) + [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2 + fp2sub(&P->Z, t0, t0); // t0 = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 - coeff[0]*(X+Z)*(X-Z) + fp2mul_mont(&P->X, t1, &P->X); // Xfinal + fp2mul_mont(&P->Z, t0, &P->Z); // Zfinal +} + + +void xTPL(const point_proj_t P, point_proj_t Q, const f2elm_t *A24minus, const f2elm_t *A24plus) +{ // Tripling of a Montgomery point in projective coordinates (X:Z). + // Input: projective Montgomery x-coordinates P = (X:Z), where x=X/Z and Montgomery curve constants A24plus = A+2C and A24minus = A-2C. + // Output: projective Montgomery x-coordinates Q = 3*P = (X3:Z3). + f2elm_t _t0, _t1, _t2, _t3, _t4, _t5, _t6; + f2elm_t *t0=&_t0, *t1=&_t1, *t2=&_t2 ; + f2elm_t *t3=&_t3, *t4=&_t4, *t5=&_t5, *t6=&_t6; + + fp2sub(&P->X, &P->Z, t0); // t0 = X-Z + fp2sqr_mont(t0, t2); // t2 = (X-Z)^2 + fp2add(&P->X, &P->Z, t1); // t1 = X+Z + fp2sqr_mont(t1, t3); // t3 = (X+Z)^2 + fp2add(t0, t1, t4); // t4 = 2*X + fp2sub(t1, t0, t0); // t0 = 2*Z + fp2sqr_mont(t4, t1); // t1 = 4*X^2 + fp2sub(t1, t3, t1); // t1 = 4*X^2 - (X+Z)^2 + fp2sub(t1, t2, t1); // t1 = 4*X^2 - (X+Z)^2 - (X-Z)^2 + fp2mul_mont(t3, A24plus, t5); // t5 = A24plus*(X+Z)^2 + fp2mul_mont(t3, t5, t3); // t3 = A24plus*(X+Z)^3 + fp2mul_mont(A24minus, t2, t6); // t6 = A24minus*(X-Z)^2 + fp2mul_mont(t2, t6, t2); // t2 = A24minus*(X-Z)^3 + fp2sub(t2, t3, t3); // t3 = A24minus*(X-Z)^3 - coeff*(X+Z)^3 + fp2sub(t5, t6, t2); // t2 = A24plus*(X+Z)^2 - A24minus*(X-Z)^2 + fp2mul_mont(t1, t2, t1); // t1 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + fp2add(t3, t1, t2); // t2 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + A24minus*(X-Z)^3 - coeff*(X+Z)^3 + fp2sqr_mont(t2, t2); // t2 = t2^2 + fp2mul_mont(t4, t2, &Q->X); // X3 = 2*X*t2 + fp2sub(t3, t1, t1); // t1 = A24minus*(X-Z)^3 - A24plus*(X+Z)^3 - [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + fp2sqr_mont(t1, t1); // t1 = t1^2 + fp2mul_mont(t0, t1, &Q->Z); // Z3 = 2*Z*t1 +} + + +void xTPLe(const point_proj_t P, point_proj_t Q, const f2elm_t *A24minus, const f2elm_t *A24plus, const int e) +{ // Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings. + // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A24plus = A+2C and A24minus = A-2C. + // Output: projective Montgomery x-coordinates Q <- (3^e)*P. + int i; + + copy_words((const digit_t*)P, (digit_t*)Q, 2*2*NWORDS_FIELD); + + for (i = 0; i < e; i++) { + xTPL(Q, Q, A24minus, A24plus); + } +} + + +void get_3_isog(const point_proj_t P, f2elm_t *A24minus, f2elm_t *A24plus, f2elm_t* coeff) +{ // Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3. + // Input: projective point of order three P = (X3:Z3). + // Output: the 3-isogenous Montgomery curve with projective coefficient A/C. + f2elm_t _t0, _t1, _t2, _t3, _t4; + f2elm_t *t0=&_t0, *t1=&_t1, *t2=&_t2 ; + f2elm_t *t3=&_t3, *t4=&_t4 ; + + fp2sub(&P->X, &P->Z, &coeff[0]); // coeff0 = X-Z + fp2sqr_mont(&coeff[0], t0); // t0 = (X-Z)^2 + fp2add(&P->X, &P->Z, &coeff[1]); // coeff1 = X+Z + fp2sqr_mont(&coeff[1], t1); // t1 = (X+Z)^2 + fp2add(t0, t1, t2); // t2 = (X+Z)^2 + (X-Z)^2 + fp2add(&coeff[0], &coeff[1], t3); // t3 = 2*X + fp2sqr_mont(t3, t3); // t3 = 4*X^2 + fp2sub(t3, t2, t3); // t3 = 4*X^2 - (X+Z)^2 - (X-Z)^2 + fp2add(t1, t3, t2); // t2 = 4*X^2 - (X-Z)^2 + fp2add(t3, t0, t3); // t3 = 4*X^2 - (X+Z)^2 + fp2add(t0, t3, t4); // t4 = 4*X^2 - (X+Z)^2 + (X-Z)^2 + fp2add(t4, t4, t4); // t4 = 2(4*X^2 - (X+Z)^2 + (X-Z)^2) + fp2add(t1, t4, t4); // t4 = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2 + fp2mul_mont(t2, t4, A24minus); // A24minus = [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2] + fp2add(t1, t2, t4); // t4 = 4*X^2 + (X+Z)^2 - (X-Z)^2 + fp2add(t4, t4, t4); // t4 = 2(4*X^2 + (X+Z)^2 - (X-Z)^2) + fp2add(t0, t4, t4); // t4 = 8*X^2 + 2*(X+Z)^2 - (X-Z)^2 + fp2mul_mont(t3, t4, t4); // t4 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2] + fp2sub(t4, A24minus, t0); // t0 = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2] - [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2] + fp2add(A24minus, t0, A24plus); // A24plus = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2 +} + + +void eval_3_isog(point_proj_t Q, const f2elm_t* coeff) +{ // Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and + // a point P with 2 coefficients in coeff (computed in the function get_3_isog()). + // Inputs: projective points P = (X3:Z3) and Q = (X:Z). + // Output: the projective point Q <- phi(Q) = (X3:Z3). + f2elm_t _t0, _t1, _t2; + f2elm_t *t0=&_t0, *t1=&_t1, *t2=&_t2 ; + + fp2add(&Q->X, &Q->Z, t0); // t0 = X+Z + fp2sub(&Q->X, &Q->Z, t1); // t1 = X-Z + fp2mul_mont(t0, &coeff[0], t0); // t0 = coeff0*(X+Z) + fp2mul_mont(t1, &coeff[1], t1); // t1 = coeff1*(X-Z) + fp2add(t0, t1, t2); // t2 = coeff0*(X-Z) + coeff1*(X+Z) + fp2sub(t1, t0, t0); // t0 = coeff0*(X-Z) - coeff1*(X+Z) + fp2sqr_mont(t2, t2); // t2 = [coeff0*(X-Z) + coeff1*(X+Z)]^2 + fp2sqr_mont(t0, t0); // t1 = [coeff0*(X-Z) - coeff1*(X+Z)]^2 + fp2mul_mont(&Q->X, t2, &Q->X); // X3final = X*[coeff0*(X-Z) + coeff1*(X+Z)]^2 + fp2mul_mont(&Q->Z, t0, &Q->Z); // Z3final = Z*[coeff0*(X-Z) - coeff1*(X+Z)]^2 +} + + +void inv_3_way(f2elm_t *z1, f2elm_t *z2, f2elm_t *z3) +{ // 3-way simultaneous inversion + // Input: z1,z2,z3 + // Output: 1/z1,1/z2,1/z3 (override inputs). + f2elm_t _t0, _t1, _t2, _t3; + f2elm_t *t0=&_t0, *t1=&_t1; + f2elm_t *t2=&_t2, *t3=&_t3; + + fp2mul_mont(z1, z2, t0); // t0 = z1*z2 + fp2mul_mont(z3, t0, t1); // t1 = z1*z2*z3 + fp2inv_mont(t1); // t1 = 1/(z1*z2*z3) + fp2mul_mont(z3, t1, t2); // t2 = 1/(z1*z2) + fp2mul_mont(t2, z2, t3); // t3 = 1/z1 + fp2mul_mont(t2, z1, z2); // z2 = 1/z2 + fp2mul_mont(t0, t1, z3); // z3 = 1/z3 + fp2copy(t3, z1); // z1 = 1/z1 +} + + +void get_A(const f2elm_t *xP, const f2elm_t *xQ, const f2elm_t *xR, f2elm_t *A) +{ // Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A. + // Input: the x-coordinates xP, xQ, and xR of the points P, Q and R. + // Output: the coefficient A corresponding to the curve E_A: y^2=x^3+A*x^2+x. + f2elm_t _t0, _t1, one = {0}; + f2elm_t *t0=&_t0, *t1=&_t1; + + fpcopy((const digit_t*)&Montgomery_one, one.e[0]); + fp2add(xP, xQ, t1); // t1 = xP+xQ + fp2mul_mont(xP, xQ, t0); // t0 = xP*xQ + fp2mul_mont(xR, t1, A); // A = xR*t1 + fp2add(t0, A, A); // A = A+t0 + fp2mul_mont(t0, xR, t0); // t0 = t0*xR + fp2sub(A, &one, A); // A = A-1 + fp2add(t0, t0, t0); // t0 = t0+t0 + fp2add(t1, xR, t1); // t1 = t1+xR + fp2add(t0, t0, t0); // t0 = t0+t0 + fp2sqr_mont(A, A); // A = A^2 + fp2inv_mont(t0); // t0 = 1/t0 + fp2mul_mont(A, t0, A); // A = A*t0 + fp2sub(A, t1, A); // Afinal = A-t1 +} + + +void j_inv(const f2elm_t *A, const f2elm_t *C, f2elm_t *jinv) +{ // Computes the j-invariant of a Montgomery curve with projective constant. + // Input: A,C in GF(p^2). + // Output: j=256*(A^2-3*C^2)^3/(C^4*(A^2-4*C^2)), which is the j-invariant of the Montgomery curve B*y^2=x^3+(A/C)*x^2+x or (equivalently) j-invariant of B'*y^2=C*x^3+A*x^2+C*x. + f2elm_t _t0, _t1; + f2elm_t *t0=&_t0, *t1=&_t1; + + fp2sqr_mont(A, jinv); // jinv = A^2 + fp2sqr_mont(C, t1); // t1 = C^2 + fp2add(t1, t1, t0); // t0 = t1+t1 + fp2sub(jinv, t0, t0); // t0 = jinv-t0 + fp2sub(t0, t1, t0); // t0 = t0-t1 + fp2sub(t0, t1, jinv); // jinv = t0-t1 + fp2sqr_mont(t1, t1); // t1 = t1^2 + fp2mul_mont(jinv, t1, jinv); // jinv = jinv*t1 + fp2add(t0, t0, t0); // t0 = t0+t0 + fp2add(t0, t0, t0); // t0 = t0+t0 + fp2sqr_mont(t0, t1); // t1 = t0^2 + fp2mul_mont(t0, t1, t0); // t0 = t0*t1 + fp2add(t0, t0, t0); // t0 = t0+t0 + fp2add(t0, t0, t0); // t0 = t0+t0 + fp2inv_mont(jinv); // jinv = 1/jinv + fp2mul_mont(jinv, t0, jinv); // jinv = t0*jinv +} + + +void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t *xPQ, const f2elm_t *A24) +{ // Simultaneous doubling and differential addition. + // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4. + // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP. + f2elm_t _t0, _t1, _t2; + f2elm_t *t0=&_t0, *t1=&_t1, *t2=&_t2; + + fp2add(&P->X, &P->Z, t0); // t0 = XP+ZP + fp2sub(&P->X, &P->Z, t1); // t1 = XP-ZP + fp2sqr_mont(t0, &P->X); // XP = (XP+ZP)^2 + fp2sub(&Q->X, &Q->Z, t2); // t2 = XQ-ZQ + fp2correction(t2); + fp2add(&Q->X,&Q->Z, &Q->X); // XQ = XQ+ZQ + fp2mul_mont(t0, t2, t0); // t0 = (XP+ZP)*(XQ-ZQ) + fp2sqr_mont(t1, &P->Z); // ZP = (XP-ZP)^2 + fp2mul_mont(t1, &Q->X, t1); // t1 = (XP-ZP)*(XQ+ZQ) + fp2sub(&P->X, &P->Z, t2); // t2 = (XP+ZP)^2-(XP-ZP)^2 + fp2mul_mont(&P->X, &P->Z, &P->X); // XP = (XP+ZP)^2*(XP-ZP)^2 + fp2mul_mont(t2, A24, &Q->X); // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2] + fp2sub(t0, t1, &Q->Z); // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ) + fp2add(&Q->X, &P->Z, &P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2 + fp2add(t0, t1, &Q->X); // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ) + fp2mul_mont(&P->Z, t2, &P->Z); // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2] + fp2sqr_mont(&Q->Z, &Q->Z); // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 + fp2sqr_mont(&Q->X, &Q->X); // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 + fp2mul_mont(&Q->Z, xPQ, &Q->Z); // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 +} + + +static void swap_points(point_proj_t P, point_proj_t Q, const digit_t option) +{ // Swap points. + // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P + unsigned int i; + + for (i = 0; i < NWORDS_FIELD; i++) { + digit_t temp = option & (P->X.e[0][i] ^ Q->X.e[0][i]); + P->X.e[0][i] = temp ^ P->X.e[0][i]; + Q->X.e[0][i] = temp ^ Q->X.e[0][i]; + temp = option & (P->Z.e[0][i] ^ Q->Z.e[0][i]); + P->Z.e[0][i] = temp ^ P->Z.e[0][i]; + Q->Z.e[0][i] = temp ^ Q->Z.e[0][i]; + temp = option & (P->X.e[1][i] ^ Q->X.e[1][i]); + P->X.e[1][i] = temp ^ P->X.e[1][i]; + Q->X.e[1][i] = temp ^ Q->X.e[1][i]; + temp = option & (P->Z.e[1][i] ^ Q->Z.e[1][i]); + P->Z.e[1][i] = temp ^ P->Z.e[1][i]; + Q->Z.e[1][i] = temp ^ Q->Z.e[1][i]; + } +} + + +static void LADDER3PT(const f2elm_t *xP, const f2elm_t *xQ, const f2elm_t *xPQ, const digit_t* m, const unsigned int AliceOrBob, point_proj_t R, const f2elm_t *A) +{ + point_proj_t R0 = {0}, R2 = {0}; + f2elm_t _A24 = {0}; + f2elm_t *A24=&_A24; + int i, nbits, prevbit = 0; + + if (AliceOrBob == ALICE) { + nbits = OALICE_BITS; + } else { + nbits = OBOB_BITS; + } + + // Initializing constant + fpcopy((const digit_t*)&Montgomery_one, A24->e[0]); + fp2add(A24, A24, A24); + fp2add(A, A24, A24); + fp2div2(A24, A24); + fp2div2(A24, A24); // A24 = (A+2)/4 + + // Initializing points + fp2copy(xQ, &R0->X); + fpcopy((const digit_t*)&Montgomery_one, (digit_t*)R0->Z.e); + fp2copy(xPQ, &R2->X); + fpcopy((const digit_t*)&Montgomery_one, (digit_t*)R2->Z.e); + fp2copy(xP, &R->X); + fpcopy((const digit_t*)&Montgomery_one, (digit_t*)R->Z.e); + fpzero((digit_t*)(R->Z.e)[1]); + + // Main loop + for (i = 0; i < nbits; i++) { + int bit = (m[i >> LOG2RADIX] >> (i & (RADIX-1))) & 1; + int swap = bit ^ prevbit; + prevbit = bit; + digit_t mask = 0 - (digit_t)swap; + + swap_points(R, R2, mask); + xDBLADD(R0, R2, &R->X, A24); + fp2mul_mont(&R2->X, &R->Z, &R2->X); + } +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/fips202_r1.c b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/fips202_r1.c index 2a5ee4494e..6e753132ae 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/fips202_r1.c +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/fips202_r1.c @@ -1,429 +1,429 @@ -/******************************************************************************************** -* SHA3-derived functions: SHAKE and cSHAKE -* -* Based on the public domain implementation in crypto_hash/keccakc512/simple/ -* from http://bench.cr.yp.to/supercop.html by Ronny Van Keer -* and the public domain "TweetFips202" implementation from https://twitter.com/tweetfips202 -* by Gilles Van Assche, Daniel J. Bernstein, and Peter Schwabe -* -* See NIST Special Publication 800-185 for more information: -* http://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-185.pdf -* -*********************************************************************************************/ - -#include <stdint.h> -#include <assert.h> -#include "fips202_r1.h" - -#define NROUNDS 24 -#define ROL(a, offset) ((a << offset) ^ (a >> (64-offset))) - - -static uint64_t load64(const unsigned char *x) -{ - unsigned long long r = 0, i; - - for (i = 0; i < 8; ++i) { - r |= (unsigned long long)x[i] << 8 * i; - } - return r; -} - - -static void store64(uint8_t *x, uint64_t u) -{ - unsigned int i; - - for (i = 0; i < 8; ++i) { - x[i] = u; - u >>= 8; - } -} - - -static const uint64_t KeccakF_RoundConstants[NROUNDS] = -{ - (uint64_t)0x0000000000000001ULL, - (uint64_t)0x0000000000008082ULL, - (uint64_t)0x800000000000808aULL, - (uint64_t)0x8000000080008000ULL, - (uint64_t)0x000000000000808bULL, - (uint64_t)0x0000000080000001ULL, - (uint64_t)0x8000000080008081ULL, - (uint64_t)0x8000000000008009ULL, - (uint64_t)0x000000000000008aULL, - (uint64_t)0x0000000000000088ULL, - (uint64_t)0x0000000080008009ULL, - (uint64_t)0x000000008000000aULL, - (uint64_t)0x000000008000808bULL, - (uint64_t)0x800000000000008bULL, - (uint64_t)0x8000000000008089ULL, - (uint64_t)0x8000000000008003ULL, - (uint64_t)0x8000000000008002ULL, - (uint64_t)0x8000000000000080ULL, - (uint64_t)0x000000000000800aULL, - (uint64_t)0x800000008000000aULL, - (uint64_t)0x8000000080008081ULL, - (uint64_t)0x8000000000008080ULL, - (uint64_t)0x0000000080000001ULL, - (uint64_t)0x8000000080008008ULL -}; - - -void KeccakF1600_StatePermute(uint64_t * state) -{ - int round; - - uint64_t Aba, Abe, Abi, Abo, Abu; - uint64_t Aga, Age, Agi, Ago, Agu; - uint64_t Aka, Ake, Aki, Ako, Aku; - uint64_t Ama, Ame, Ami, Amo, Amu; - uint64_t Asa, Ase, Asi, Aso, Asu; - uint64_t BCa, BCe, BCi, BCo, BCu; - - //copyFromState(A, state) - Aba = state[ 0]; - Abe = state[ 1]; - Abi = state[ 2]; - Abo = state[ 3]; - Abu = state[ 4]; - Aga = state[ 5]; - Age = state[ 6]; - Agi = state[ 7]; - Ago = state[ 8]; - Agu = state[ 9]; - Aka = state[10]; - Ake = state[11]; - Aki = state[12]; - Ako = state[13]; - Aku = state[14]; - Ama = state[15]; - Ame = state[16]; - Ami = state[17]; - Amo = state[18]; - Amu = state[19]; - Asa = state[20]; - Ase = state[21]; - Asi = state[22]; - Aso = state[23]; - Asu = state[24]; - - for( round = 0; round < NROUNDS; round += 2 ) { - uint64_t Da, De, Di, Do, Du; - uint64_t Eba, Ebe, Ebi, Ebo, Ebu; - uint64_t Ega, Ege, Egi, Ego, Egu; - uint64_t Eka, Eke, Eki, Eko, Eku; - uint64_t Ema, Eme, Emi, Emo, Emu; - uint64_t Esa, Ese, Esi, Eso, Esu; - - // prepareTheta - BCa = Aba^Aga^Aka^Ama^Asa; - BCe = Abe^Age^Ake^Ame^Ase; - BCi = Abi^Agi^Aki^Ami^Asi; - BCo = Abo^Ago^Ako^Amo^Aso; - BCu = Abu^Agu^Aku^Amu^Asu; - - //thetaRhoPiChiIotaPrepareTheta(round , A, E) - Da = BCu^ROL(BCe, 1); - De = BCa^ROL(BCi, 1); - Di = BCe^ROL(BCo, 1); - Do = BCi^ROL(BCu, 1); - Du = BCo^ROL(BCa, 1); - - Aba ^= Da; - BCa = Aba; - Age ^= De; - BCe = ROL(Age, 44); - Aki ^= Di; - BCi = ROL(Aki, 43); - Amo ^= Do; - BCo = ROL(Amo, 21); - Asu ^= Du; - BCu = ROL(Asu, 14); - Eba = BCa ^((~BCe)& BCi ); - Eba ^= (uint64_t)KeccakF_RoundConstants[round]; - Ebe = BCe ^((~BCi)& BCo ); - Ebi = BCi ^((~BCo)& BCu ); - Ebo = BCo ^((~BCu)& BCa ); - Ebu = BCu ^((~BCa)& BCe ); - - Abo ^= Do; - BCa = ROL(Abo, 28); - Agu ^= Du; - BCe = ROL(Agu, 20); - Aka ^= Da; - BCi = ROL(Aka, 3); - Ame ^= De; - BCo = ROL(Ame, 45); - Asi ^= Di; - BCu = ROL(Asi, 61); - Ega = BCa ^((~BCe)& BCi ); - Ege = BCe ^((~BCi)& BCo ); - Egi = BCi ^((~BCo)& BCu ); - Ego = BCo ^((~BCu)& BCa ); - Egu = BCu ^((~BCa)& BCe ); - - Abe ^= De; - BCa = ROL(Abe, 1); - Agi ^= Di; - BCe = ROL(Agi, 6); - Ako ^= Do; - BCi = ROL(Ako, 25); - Amu ^= Du; - BCo = ROL(Amu, 8); - Asa ^= Da; - BCu = ROL(Asa, 18); - Eka = BCa ^((~BCe)& BCi ); - Eke = BCe ^((~BCi)& BCo ); - Eki = BCi ^((~BCo)& BCu ); - Eko = BCo ^((~BCu)& BCa ); - Eku = BCu ^((~BCa)& BCe ); - - Abu ^= Du; - BCa = ROL(Abu, 27); - Aga ^= Da; - BCe = ROL(Aga, 36); - Ake ^= De; - BCi = ROL(Ake, 10); - Ami ^= Di; - BCo = ROL(Ami, 15); - Aso ^= Do; - BCu = ROL(Aso, 56); - Ema = BCa ^((~BCe)& BCi ); - Eme = BCe ^((~BCi)& BCo ); - Emi = BCi ^((~BCo)& BCu ); - Emo = BCo ^((~BCu)& BCa ); - Emu = BCu ^((~BCa)& BCe ); - - Abi ^= Di; - BCa = ROL(Abi, 62); - Ago ^= Do; - BCe = ROL(Ago, 55); - Aku ^= Du; - BCi = ROL(Aku, 39); - Ama ^= Da; - BCo = ROL(Ama, 41); - Ase ^= De; - BCu = ROL(Ase, 2); - Esa = BCa ^((~BCe)& BCi ); - Ese = BCe ^((~BCi)& BCo ); - Esi = BCi ^((~BCo)& BCu ); - Eso = BCo ^((~BCu)& BCa ); - Esu = BCu ^((~BCa)& BCe ); - - // prepareTheta - BCa = Eba^Ega^Eka^Ema^Esa; - BCe = Ebe^Ege^Eke^Eme^Ese; - BCi = Ebi^Egi^Eki^Emi^Esi; - BCo = Ebo^Ego^Eko^Emo^Eso; - BCu = Ebu^Egu^Eku^Emu^Esu; - - //thetaRhoPiChiIotaPrepareTheta(round+1, E, A) - Da = BCu^ROL(BCe, 1); - De = BCa^ROL(BCi, 1); - Di = BCe^ROL(BCo, 1); - Do = BCi^ROL(BCu, 1); - Du = BCo^ROL(BCa, 1); - - Eba ^= Da; - BCa = Eba; - Ege ^= De; - BCe = ROL(Ege, 44); - Eki ^= Di; - BCi = ROL(Eki, 43); - Emo ^= Do; - BCo = ROL(Emo, 21); - Esu ^= Du; - BCu = ROL(Esu, 14); - Aba = BCa ^((~BCe)& BCi ); - Aba ^= (uint64_t)KeccakF_RoundConstants[round+1]; - Abe = BCe ^((~BCi)& BCo ); - Abi = BCi ^((~BCo)& BCu ); - Abo = BCo ^((~BCu)& BCa ); - Abu = BCu ^((~BCa)& BCe ); - - Ebo ^= Do; - BCa = ROL(Ebo, 28); - Egu ^= Du; - BCe = ROL(Egu, 20); - Eka ^= Da; - BCi = ROL(Eka, 3); - Eme ^= De; - BCo = ROL(Eme, 45); - Esi ^= Di; - BCu = ROL(Esi, 61); - Aga = BCa ^((~BCe)& BCi ); - Age = BCe ^((~BCi)& BCo ); - Agi = BCi ^((~BCo)& BCu ); - Ago = BCo ^((~BCu)& BCa ); - Agu = BCu ^((~BCa)& BCe ); - - Ebe ^= De; - BCa = ROL(Ebe, 1); - Egi ^= Di; - BCe = ROL(Egi, 6); - Eko ^= Do; - BCi = ROL(Eko, 25); - Emu ^= Du; - BCo = ROL(Emu, 8); - Esa ^= Da; - BCu = ROL(Esa, 18); - Aka = BCa ^((~BCe)& BCi ); - Ake = BCe ^((~BCi)& BCo ); - Aki = BCi ^((~BCo)& BCu ); - Ako = BCo ^((~BCu)& BCa ); - Aku = BCu ^((~BCa)& BCe ); - - Ebu ^= Du; - BCa = ROL(Ebu, 27); - Ega ^= Da; - BCe = ROL(Ega, 36); - Eke ^= De; - BCi = ROL(Eke, 10); - Emi ^= Di; - BCo = ROL(Emi, 15); - Eso ^= Do; - BCu = ROL(Eso, 56); - Ama = BCa ^((~BCe)& BCi ); - Ame = BCe ^((~BCi)& BCo ); - Ami = BCi ^((~BCo)& BCu ); - Amo = BCo ^((~BCu)& BCa ); - Amu = BCu ^((~BCa)& BCe ); - - Ebi ^= Di; - BCa = ROL(Ebi, 62); - Ego ^= Do; - BCe = ROL(Ego, 55); - Eku ^= Du; - BCi = ROL(Eku, 39); - Ema ^= Da; - BCo = ROL(Ema, 41); - Ese ^= De; - BCu = ROL(Ese, 2); - Asa = BCa ^((~BCe)& BCi ); - Ase = BCe ^((~BCi)& BCo ); - Asi = BCi ^((~BCo)& BCu ); - Aso = BCo ^((~BCu)& BCa ); - Asu = BCu ^((~BCa)& BCe ); - } - - //copyToState(state, A) - state[ 0] = Aba; - state[ 1] = Abe; - state[ 2] = Abi; - state[ 3] = Abo; - state[ 4] = Abu; - state[ 5] = Aga; - state[ 6] = Age; - state[ 7] = Agi; - state[ 8] = Ago; - state[ 9] = Agu; - state[10] = Aka; - state[11] = Ake; - state[12] = Aki; - state[13] = Ako; - state[14] = Aku; - state[15] = Ama; - state[16] = Ame; - state[17] = Ami; - state[18] = Amo; - state[19] = Amu; - state[20] = Asa; - state[21] = Ase; - state[22] = Asi; - state[23] = Aso; - state[24] = Asu; - - #undef round -} - -#include <string.h> -#define MIN(a, b) ((a) < (b) ? (a) : (b)) - - -static void keccak_absorb(uint64_t *s, unsigned int r, const unsigned char *m, unsigned long long int mlen, unsigned char p) -{ - unsigned long long i; - unsigned char t[200]; - - while (mlen >= r) - { - for (i = 0; i < r / 8; ++i) - s[i] ^= load64(m + 8 * i); - - KeccakF1600_StatePermute(s); - mlen -= r; - m += r; - } - - for (i = 0; i < r; ++i) - t[i] = 0; - for (i = 0; i < mlen; ++i) - t[i] = m[i]; - t[i] = p; - t[r - 1] |= 128; - for (i = 0; i < r / 8; ++i) - s[i] ^= load64(t + 8 * i); -} - - -static void keccak_squeezeblocks(unsigned char *h, unsigned long long int nblocks, uint64_t *s, unsigned int r) -{ - unsigned int i; - - while(nblocks > 0) - { - KeccakF1600_StatePermute(s); - for (i = 0; i < (r>>3); i++) - { - store64(h+8*i, s[i]); - } - h += r; - nblocks--; - } -} - -/********** cSHAKE256 ***********/ - -void cshake256_simple_absorb(uint64_t s[25], uint16_t cstm, const unsigned char *in, unsigned long long inlen) -{ - unsigned char *sep = (unsigned char*)s; - unsigned int i; - - for (i = 0; i < 25; i++) - s[i] = 0; - - /* Absorb customization (domain-separation) string */ - sep[0] = 0x01; - sep[1] = 0x88; - sep[2] = 0x01; - sep[3] = 0x00; - sep[4] = 0x01; - sep[5] = 16; // fixed bitlen of cstm - sep[6] = cstm & 0xff; - sep[7] = cstm >> 8; - - KeccakF1600_StatePermute(s); - - /* Absorb input */ - keccak_absorb(s, SHAKE256_RATE, in, inlen, 0x04); -} - -void cshake256_simple(unsigned char *output, unsigned long long outlen, uint16_t cstm, const unsigned char *in, unsigned long long inlen) -{ - uint64_t s[25]; - unsigned char t[SHAKE256_RATE]; - - cshake256_simple_absorb(s, cstm, in, inlen); - - /* Squeeze output */ - keccak_squeezeblocks(output, outlen/SHAKE256_RATE, s, SHAKE256_RATE); - output += (outlen/SHAKE256_RATE)*SHAKE256_RATE; - - if(outlen%SHAKE256_RATE) - { - keccak_squeezeblocks(t, 1, s, SHAKE256_RATE); - for (unsigned int i = 0; i < outlen%SHAKE256_RATE; i++) - output[i] = t[i]; - } -} +/******************************************************************************************** +* SHA3-derived functions: SHAKE and cSHAKE +* +* Based on the public domain implementation in crypto_hash/keccakc512/simple/ +* from http://bench.cr.yp.to/supercop.html by Ronny Van Keer +* and the public domain "TweetFips202" implementation from https://twitter.com/tweetfips202 +* by Gilles Van Assche, Daniel J. Bernstein, and Peter Schwabe +* +* See NIST Special Publication 800-185 for more information: +* http://nvlpubs.nist.gov/nistpubs/SpecialPublications/NIST.SP.800-185.pdf +* +*********************************************************************************************/ + +#include <stdint.h> +#include <assert.h> +#include "fips202_r1.h" + +#define NROUNDS 24 +#define ROL(a, offset) ((a << offset) ^ (a >> (64-offset))) + + +static uint64_t load64(const unsigned char *x) +{ + unsigned long long r = 0, i; + + for (i = 0; i < 8; ++i) { + r |= (unsigned long long)x[i] << 8 * i; + } + return r; +} + + +static void store64(uint8_t *x, uint64_t u) +{ + unsigned int i; + + for (i = 0; i < 8; ++i) { + x[i] = u; + u >>= 8; + } +} + + +static const uint64_t KeccakF_RoundConstants[NROUNDS] = +{ + (uint64_t)0x0000000000000001ULL, + (uint64_t)0x0000000000008082ULL, + (uint64_t)0x800000000000808aULL, + (uint64_t)0x8000000080008000ULL, + (uint64_t)0x000000000000808bULL, + (uint64_t)0x0000000080000001ULL, + (uint64_t)0x8000000080008081ULL, + (uint64_t)0x8000000000008009ULL, + (uint64_t)0x000000000000008aULL, + (uint64_t)0x0000000000000088ULL, + (uint64_t)0x0000000080008009ULL, + (uint64_t)0x000000008000000aULL, + (uint64_t)0x000000008000808bULL, + (uint64_t)0x800000000000008bULL, + (uint64_t)0x8000000000008089ULL, + (uint64_t)0x8000000000008003ULL, + (uint64_t)0x8000000000008002ULL, + (uint64_t)0x8000000000000080ULL, + (uint64_t)0x000000000000800aULL, + (uint64_t)0x800000008000000aULL, + (uint64_t)0x8000000080008081ULL, + (uint64_t)0x8000000000008080ULL, + (uint64_t)0x0000000080000001ULL, + (uint64_t)0x8000000080008008ULL +}; + + +void KeccakF1600_StatePermute(uint64_t * state) +{ + int round; + + uint64_t Aba, Abe, Abi, Abo, Abu; + uint64_t Aga, Age, Agi, Ago, Agu; + uint64_t Aka, Ake, Aki, Ako, Aku; + uint64_t Ama, Ame, Ami, Amo, Amu; + uint64_t Asa, Ase, Asi, Aso, Asu; + uint64_t BCa, BCe, BCi, BCo, BCu; + + //copyFromState(A, state) + Aba = state[ 0]; + Abe = state[ 1]; + Abi = state[ 2]; + Abo = state[ 3]; + Abu = state[ 4]; + Aga = state[ 5]; + Age = state[ 6]; + Agi = state[ 7]; + Ago = state[ 8]; + Agu = state[ 9]; + Aka = state[10]; + Ake = state[11]; + Aki = state[12]; + Ako = state[13]; + Aku = state[14]; + Ama = state[15]; + Ame = state[16]; + Ami = state[17]; + Amo = state[18]; + Amu = state[19]; + Asa = state[20]; + Ase = state[21]; + Asi = state[22]; + Aso = state[23]; + Asu = state[24]; + + for( round = 0; round < NROUNDS; round += 2 ) { + uint64_t Da, De, Di, Do, Du; + uint64_t Eba, Ebe, Ebi, Ebo, Ebu; + uint64_t Ega, Ege, Egi, Ego, Egu; + uint64_t Eka, Eke, Eki, Eko, Eku; + uint64_t Ema, Eme, Emi, Emo, Emu; + uint64_t Esa, Ese, Esi, Eso, Esu; + + // prepareTheta + BCa = Aba^Aga^Aka^Ama^Asa; + BCe = Abe^Age^Ake^Ame^Ase; + BCi = Abi^Agi^Aki^Ami^Asi; + BCo = Abo^Ago^Ako^Amo^Aso; + BCu = Abu^Agu^Aku^Amu^Asu; + + //thetaRhoPiChiIotaPrepareTheta(round , A, E) + Da = BCu^ROL(BCe, 1); + De = BCa^ROL(BCi, 1); + Di = BCe^ROL(BCo, 1); + Do = BCi^ROL(BCu, 1); + Du = BCo^ROL(BCa, 1); + + Aba ^= Da; + BCa = Aba; + Age ^= De; + BCe = ROL(Age, 44); + Aki ^= Di; + BCi = ROL(Aki, 43); + Amo ^= Do; + BCo = ROL(Amo, 21); + Asu ^= Du; + BCu = ROL(Asu, 14); + Eba = BCa ^((~BCe)& BCi ); + Eba ^= (uint64_t)KeccakF_RoundConstants[round]; + Ebe = BCe ^((~BCi)& BCo ); + Ebi = BCi ^((~BCo)& BCu ); + Ebo = BCo ^((~BCu)& BCa ); + Ebu = BCu ^((~BCa)& BCe ); + + Abo ^= Do; + BCa = ROL(Abo, 28); + Agu ^= Du; + BCe = ROL(Agu, 20); + Aka ^= Da; + BCi = ROL(Aka, 3); + Ame ^= De; + BCo = ROL(Ame, 45); + Asi ^= Di; + BCu = ROL(Asi, 61); + Ega = BCa ^((~BCe)& BCi ); + Ege = BCe ^((~BCi)& BCo ); + Egi = BCi ^((~BCo)& BCu ); + Ego = BCo ^((~BCu)& BCa ); + Egu = BCu ^((~BCa)& BCe ); + + Abe ^= De; + BCa = ROL(Abe, 1); + Agi ^= Di; + BCe = ROL(Agi, 6); + Ako ^= Do; + BCi = ROL(Ako, 25); + Amu ^= Du; + BCo = ROL(Amu, 8); + Asa ^= Da; + BCu = ROL(Asa, 18); + Eka = BCa ^((~BCe)& BCi ); + Eke = BCe ^((~BCi)& BCo ); + Eki = BCi ^((~BCo)& BCu ); + Eko = BCo ^((~BCu)& BCa ); + Eku = BCu ^((~BCa)& BCe ); + + Abu ^= Du; + BCa = ROL(Abu, 27); + Aga ^= Da; + BCe = ROL(Aga, 36); + Ake ^= De; + BCi = ROL(Ake, 10); + Ami ^= Di; + BCo = ROL(Ami, 15); + Aso ^= Do; + BCu = ROL(Aso, 56); + Ema = BCa ^((~BCe)& BCi ); + Eme = BCe ^((~BCi)& BCo ); + Emi = BCi ^((~BCo)& BCu ); + Emo = BCo ^((~BCu)& BCa ); + Emu = BCu ^((~BCa)& BCe ); + + Abi ^= Di; + BCa = ROL(Abi, 62); + Ago ^= Do; + BCe = ROL(Ago, 55); + Aku ^= Du; + BCi = ROL(Aku, 39); + Ama ^= Da; + BCo = ROL(Ama, 41); + Ase ^= De; + BCu = ROL(Ase, 2); + Esa = BCa ^((~BCe)& BCi ); + Ese = BCe ^((~BCi)& BCo ); + Esi = BCi ^((~BCo)& BCu ); + Eso = BCo ^((~BCu)& BCa ); + Esu = BCu ^((~BCa)& BCe ); + + // prepareTheta + BCa = Eba^Ega^Eka^Ema^Esa; + BCe = Ebe^Ege^Eke^Eme^Ese; + BCi = Ebi^Egi^Eki^Emi^Esi; + BCo = Ebo^Ego^Eko^Emo^Eso; + BCu = Ebu^Egu^Eku^Emu^Esu; + + //thetaRhoPiChiIotaPrepareTheta(round+1, E, A) + Da = BCu^ROL(BCe, 1); + De = BCa^ROL(BCi, 1); + Di = BCe^ROL(BCo, 1); + Do = BCi^ROL(BCu, 1); + Du = BCo^ROL(BCa, 1); + + Eba ^= Da; + BCa = Eba; + Ege ^= De; + BCe = ROL(Ege, 44); + Eki ^= Di; + BCi = ROL(Eki, 43); + Emo ^= Do; + BCo = ROL(Emo, 21); + Esu ^= Du; + BCu = ROL(Esu, 14); + Aba = BCa ^((~BCe)& BCi ); + Aba ^= (uint64_t)KeccakF_RoundConstants[round+1]; + Abe = BCe ^((~BCi)& BCo ); + Abi = BCi ^((~BCo)& BCu ); + Abo = BCo ^((~BCu)& BCa ); + Abu = BCu ^((~BCa)& BCe ); + + Ebo ^= Do; + BCa = ROL(Ebo, 28); + Egu ^= Du; + BCe = ROL(Egu, 20); + Eka ^= Da; + BCi = ROL(Eka, 3); + Eme ^= De; + BCo = ROL(Eme, 45); + Esi ^= Di; + BCu = ROL(Esi, 61); + Aga = BCa ^((~BCe)& BCi ); + Age = BCe ^((~BCi)& BCo ); + Agi = BCi ^((~BCo)& BCu ); + Ago = BCo ^((~BCu)& BCa ); + Agu = BCu ^((~BCa)& BCe ); + + Ebe ^= De; + BCa = ROL(Ebe, 1); + Egi ^= Di; + BCe = ROL(Egi, 6); + Eko ^= Do; + BCi = ROL(Eko, 25); + Emu ^= Du; + BCo = ROL(Emu, 8); + Esa ^= Da; + BCu = ROL(Esa, 18); + Aka = BCa ^((~BCe)& BCi ); + Ake = BCe ^((~BCi)& BCo ); + Aki = BCi ^((~BCo)& BCu ); + Ako = BCo ^((~BCu)& BCa ); + Aku = BCu ^((~BCa)& BCe ); + + Ebu ^= Du; + BCa = ROL(Ebu, 27); + Ega ^= Da; + BCe = ROL(Ega, 36); + Eke ^= De; + BCi = ROL(Eke, 10); + Emi ^= Di; + BCo = ROL(Emi, 15); + Eso ^= Do; + BCu = ROL(Eso, 56); + Ama = BCa ^((~BCe)& BCi ); + Ame = BCe ^((~BCi)& BCo ); + Ami = BCi ^((~BCo)& BCu ); + Amo = BCo ^((~BCu)& BCa ); + Amu = BCu ^((~BCa)& BCe ); + + Ebi ^= Di; + BCa = ROL(Ebi, 62); + Ego ^= Do; + BCe = ROL(Ego, 55); + Eku ^= Du; + BCi = ROL(Eku, 39); + Ema ^= Da; + BCo = ROL(Ema, 41); + Ese ^= De; + BCu = ROL(Ese, 2); + Asa = BCa ^((~BCe)& BCi ); + Ase = BCe ^((~BCi)& BCo ); + Asi = BCi ^((~BCo)& BCu ); + Aso = BCo ^((~BCu)& BCa ); + Asu = BCu ^((~BCa)& BCe ); + } + + //copyToState(state, A) + state[ 0] = Aba; + state[ 1] = Abe; + state[ 2] = Abi; + state[ 3] = Abo; + state[ 4] = Abu; + state[ 5] = Aga; + state[ 6] = Age; + state[ 7] = Agi; + state[ 8] = Ago; + state[ 9] = Agu; + state[10] = Aka; + state[11] = Ake; + state[12] = Aki; + state[13] = Ako; + state[14] = Aku; + state[15] = Ama; + state[16] = Ame; + state[17] = Ami; + state[18] = Amo; + state[19] = Amu; + state[20] = Asa; + state[21] = Ase; + state[22] = Asi; + state[23] = Aso; + state[24] = Asu; + + #undef round +} + +#include <string.h> +#define MIN(a, b) ((a) < (b) ? (a) : (b)) + + +static void keccak_absorb(uint64_t *s, unsigned int r, const unsigned char *m, unsigned long long int mlen, unsigned char p) +{ + unsigned long long i; + unsigned char t[200]; + + while (mlen >= r) + { + for (i = 0; i < r / 8; ++i) + s[i] ^= load64(m + 8 * i); + + KeccakF1600_StatePermute(s); + mlen -= r; + m += r; + } + + for (i = 0; i < r; ++i) + t[i] = 0; + for (i = 0; i < mlen; ++i) + t[i] = m[i]; + t[i] = p; + t[r - 1] |= 128; + for (i = 0; i < r / 8; ++i) + s[i] ^= load64(t + 8 * i); +} + + +static void keccak_squeezeblocks(unsigned char *h, unsigned long long int nblocks, uint64_t *s, unsigned int r) +{ + unsigned int i; + + while(nblocks > 0) + { + KeccakF1600_StatePermute(s); + for (i = 0; i < (r>>3); i++) + { + store64(h+8*i, s[i]); + } + h += r; + nblocks--; + } +} + +/********** cSHAKE256 ***********/ + +void cshake256_simple_absorb(uint64_t s[25], uint16_t cstm, const unsigned char *in, unsigned long long inlen) +{ + unsigned char *sep = (unsigned char*)s; + unsigned int i; + + for (i = 0; i < 25; i++) + s[i] = 0; + + /* Absorb customization (domain-separation) string */ + sep[0] = 0x01; + sep[1] = 0x88; + sep[2] = 0x01; + sep[3] = 0x00; + sep[4] = 0x01; + sep[5] = 16; // fixed bitlen of cstm + sep[6] = cstm & 0xff; + sep[7] = cstm >> 8; + + KeccakF1600_StatePermute(s); + + /* Absorb input */ + keccak_absorb(s, SHAKE256_RATE, in, inlen, 0x04); +} + +void cshake256_simple(unsigned char *output, unsigned long long outlen, uint16_t cstm, const unsigned char *in, unsigned long long inlen) +{ + uint64_t s[25]; + unsigned char t[SHAKE256_RATE]; + + cshake256_simple_absorb(s, cstm, in, inlen); + + /* Squeeze output */ + keccak_squeezeblocks(output, outlen/SHAKE256_RATE, s, SHAKE256_RATE); + output += (outlen/SHAKE256_RATE)*SHAKE256_RATE; + + if(outlen%SHAKE256_RATE) + { + keccak_squeezeblocks(t, 1, s, SHAKE256_RATE); + for (unsigned int i = 0; i < outlen%SHAKE256_RATE; i++) + output[i] = t[i]; + } +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/fips202_r1.h b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/fips202_r1.h index 128a0127bf..f7889ac110 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/fips202_r1.h +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/fips202_r1.h @@ -1,13 +1,13 @@ -#ifndef FIPS202_R1_H -#define FIPS202_R1_H - -#include "sike_r1_namespace.h" -#include <stdint.h> - -#define SHAKE128_RATE 168 -#define SHAKE256_RATE 136 - -void cshake256_simple_absorb(uint64_t *s, uint16_t cstm, const unsigned char *in, unsigned long long inlen); -void cshake256_simple(unsigned char *output, unsigned long long outlen, uint16_t cstm, const unsigned char *in, unsigned long long inlen); - -#endif // FIPS202_R1_H +#ifndef FIPS202_R1_H +#define FIPS202_R1_H + +#include "sike_r1_namespace.h" +#include <stdint.h> + +#define SHAKE128_RATE 168 +#define SHAKE256_RATE 136 + +void cshake256_simple_absorb(uint64_t *s, uint16_t cstm, const unsigned char *in, unsigned long long inlen); +void cshake256_simple(unsigned char *output, unsigned long long outlen, uint16_t cstm, const unsigned char *in, unsigned long long inlen); + +#endif // FIPS202_R1_H diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/fp_generic_r1.c b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/fp_generic_r1.c index 4a35a81f7f..06ed369b9d 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/fp_generic_r1.c +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/fp_generic_r1.c @@ -1,225 +1,225 @@ -/******************************************************************************************** -* Supersingular Isogeny Key Encapsulation Library -* -* Abstract: portable modular arithmetic for P503 -*********************************************************************************************/ - -#include "sike_r1_namespace.h" -#include "P503_internal_r1.h" - -// Global constants -extern const uint64_t p503[NWORDS_FIELD]; -extern const uint64_t p503p1[NWORDS_FIELD]; -extern const uint64_t p503x2[NWORDS_FIELD]; - -void fpadd503(const digit_t* a, const digit_t* b, digit_t* c) -{ // Modular addition, c = a+b mod p503. - // Inputs: a, b in [0, 2*p503-1] - // Output: c in [0, 2*p503-1] - unsigned int i, carry = 0; - digit_t mask; - - for (i = 0; i < NWORDS_FIELD; i++) { - ADDC(carry, a[i], b[i], carry, c[i]); - } - - carry = 0; - for (i = 0; i < NWORDS_FIELD; i++) { - SUBC(carry, c[i], ((const digit_t*)p503x2)[i], carry, c[i]); - } - mask = 0 - (digit_t)carry; - - carry = 0; - for (i = 0; i < NWORDS_FIELD; i++) { - ADDC(carry, c[i], ((const digit_t*)p503x2)[i] & mask, carry, c[i]); - } +/******************************************************************************************** +* Supersingular Isogeny Key Encapsulation Library +* +* Abstract: portable modular arithmetic for P503 +*********************************************************************************************/ + +#include "sike_r1_namespace.h" +#include "P503_internal_r1.h" + +// Global constants +extern const uint64_t p503[NWORDS_FIELD]; +extern const uint64_t p503p1[NWORDS_FIELD]; +extern const uint64_t p503x2[NWORDS_FIELD]; + +void fpadd503(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular addition, c = a+b mod p503. + // Inputs: a, b in [0, 2*p503-1] + // Output: c in [0, 2*p503-1] + unsigned int i, carry = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(carry, c[i], ((const digit_t*)p503x2)[i], carry, c[i]); + } + mask = 0 - (digit_t)carry; + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, c[i], ((const digit_t*)p503x2)[i] & mask, carry, c[i]); + } +} + + +void fpsub503(const digit_t* a, const digit_t* b, digit_t* c) +{ // Modular subtraction, c = a-b mod p503. + // Inputs: a, b in [0, 2*p503-1] + // Output: c in [0, 2*p503-1] + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((const digit_t*)p503x2)[i] & mask, borrow, c[i]); + } } - - -void fpsub503(const digit_t* a, const digit_t* b, digit_t* c) -{ // Modular subtraction, c = a-b mod p503. - // Inputs: a, b in [0, 2*p503-1] - // Output: c in [0, 2*p503-1] - unsigned int i, borrow = 0; - digit_t mask; - - for (i = 0; i < NWORDS_FIELD; i++) { - SUBC(borrow, a[i], b[i], borrow, c[i]); - } - mask = 0 - (digit_t)borrow; - - borrow = 0; - for (i = 0; i < NWORDS_FIELD; i++) { - ADDC(borrow, c[i], ((const digit_t*)p503x2)[i] & mask, borrow, c[i]); - } -} - - -void fpneg503(digit_t* a) -{ // Modular negation, a = -a mod p503. - // Input/output: a in [0, 2*p503-1] - unsigned int i, borrow = 0; - - for (i = 0; i < NWORDS_FIELD; i++) { - SUBC(borrow, ((const digit_t*)p503x2)[i], a[i], borrow, a[i]); - } -} - - -void fpdiv2_503(const digit_t* a, digit_t* c) -{ // Modular division by two, c = a/2 mod p503. - // Input : a in [0, 2*p503-1] - // Output: c in [0, 2*p503-1] - unsigned int i, carry = 0; - digit_t mask; - - mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p503 - for (i = 0; i < NWORDS_FIELD; i++) { - ADDC(carry, a[i], ((const digit_t*)p503)[i] & mask, carry, c[i]); - } - - mp_shiftr1(c, NWORDS_FIELD); + + +void fpneg503(digit_t* a) +{ // Modular negation, a = -a mod p503. + // Input/output: a in [0, 2*p503-1] + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, ((const digit_t*)p503x2)[i], a[i], borrow, a[i]); + } } - - -void fpcorrection503(digit_t* a) -{ // Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1]. - unsigned int i, borrow = 0; - digit_t mask; - - for (i = 0; i < NWORDS_FIELD; i++) { - SUBC(borrow, a[i], ((const digit_t*)p503)[i], borrow, a[i]); - } - mask = 0 - (digit_t)borrow; - - borrow = 0; - for (i = 0; i < NWORDS_FIELD; i++) { - ADDC(borrow, a[i], ((const digit_t*)p503)[i] & mask, borrow, a[i]); - } -} - - -void digit_x_digit(const digit_t a, const digit_t b, digit_t* c) -{ // Digit multiplication, digit * digit -> 2-digit result - register digit_t al, ah, bl, bh, temp; - digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry; - digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t)*4), mask_high = (digit_t)(-1) << (sizeof(digit_t)*4); - - al = a & mask_low; // Low part - ah = a >> (sizeof(digit_t) * 4); // High part - bl = b & mask_low; - bh = b >> (sizeof(digit_t) * 4); - - albl = al*bl; - albh = al*bh; - ahbl = ah*bl; - ahbh = ah*bh; - c[0] = albl & mask_low; // C00 - - res1 = albl >> (sizeof(digit_t) * 4); - res2 = ahbl & mask_low; - res3 = albh & mask_low; - temp = res1 + res2 + res3; - carry = temp >> (sizeof(digit_t) * 4); - c[0] ^= temp << (sizeof(digit_t) * 4); // C01 - - res1 = ahbl >> (sizeof(digit_t) * 4); - res2 = albh >> (sizeof(digit_t) * 4); - res3 = ahbh & mask_low; - temp = res1 + res2 + res3 + carry; - c[1] = temp & mask_low; // C10 - carry = temp & mask_high; - c[1] ^= (ahbh & mask_high) + carry; // C11 -} - - -void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords) -{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords. - unsigned int i, j, carry; - digit_t t = 0, u = 0, v = 0, UV[2]; - - for (i = 0; i < nwords; i++) { - for (j = 0; j <= i; j++) { - MUL(a[j], b[i-j], UV+1, UV[0]); - ADDC(0, UV[0], v, carry, v); - ADDC(carry, UV[1], u, carry, u); - t += carry; - } - c[i] = v; - v = u; - u = t; - t = 0; - } - - for (i = nwords; i < 2*nwords-1; i++) { - for (j = i-nwords+1; j < nwords; j++) { - MUL(a[j], b[i-j], UV+1, UV[0]); - ADDC(0, UV[0], v, carry, v); - ADDC(carry, UV[1], u, carry, u); - t += carry; - } - c[i] = v; + + +void fpdiv2_503(const digit_t* a, digit_t* c) +{ // Modular division by two, c = a/2 mod p503. + // Input : a in [0, 2*p503-1] + // Output: c in [0, 2*p503-1] + unsigned int i, carry = 0; + digit_t mask; + + mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p503 + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], ((const digit_t*)p503)[i] & mask, carry, c[i]); + } + + mp_shiftr1(c, NWORDS_FIELD); +} + + +void fpcorrection503(digit_t* a) +{ // Modular correction to reduce field element a in [0, 2*p503-1] to [0, p503-1]. + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], ((const digit_t*)p503)[i], borrow, a[i]); + } + mask = 0 - (digit_t)borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, a[i], ((const digit_t*)p503)[i] & mask, borrow, a[i]); + } +} + + +void digit_x_digit(const digit_t a, const digit_t b, digit_t* c) +{ // Digit multiplication, digit * digit -> 2-digit result + register digit_t al, ah, bl, bh, temp; + digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry; + digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t)*4), mask_high = (digit_t)(-1) << (sizeof(digit_t)*4); + + al = a & mask_low; // Low part + ah = a >> (sizeof(digit_t) * 4); // High part + bl = b & mask_low; + bh = b >> (sizeof(digit_t) * 4); + + albl = al*bl; + albh = al*bh; + ahbl = ah*bl; + ahbh = ah*bh; + c[0] = albl & mask_low; // C00 + + res1 = albl >> (sizeof(digit_t) * 4); + res2 = ahbl & mask_low; + res3 = albh & mask_low; + temp = res1 + res2 + res3; + carry = temp >> (sizeof(digit_t) * 4); + c[0] ^= temp << (sizeof(digit_t) * 4); // C01 + + res1 = ahbl >> (sizeof(digit_t) * 4); + res2 = albh >> (sizeof(digit_t) * 4); + res3 = ahbh & mask_low; + temp = res1 + res2 + res3 + carry; + c[1] = temp & mask_low; // C10 + carry = temp & mask_high; + c[1] ^= (ahbh & mask_high) + carry; // C11 +} + + +void mp_mul(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords) +{ // Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords. + unsigned int i, j, carry; + digit_t t = 0, u = 0, v = 0, UV[2]; + + for (i = 0; i < nwords; i++) { + for (j = 0; j <= i; j++) { + MUL(a[j], b[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = nwords; i < 2*nwords-1; i++) { + for (j = i-nwords+1; j < nwords; j++) { + MUL(a[j], b[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + c[2*nwords-1] = v; +} + + +void rdc_mont(const dfelm_t ma, felm_t mc) +{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p503. + // mc = ma*R^-1 mod p503x2, where R = 2^512. + // If ma < 2^512*p503, the output mc is in the range [0, 2*p503-1]. + // ma is assumed to be in Montgomery representation. + unsigned int i, j, carry, count = p503_ZERO_WORDS; + digit_t UV[2], t = 0, u = 0, v = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + mc[i] = 0; + } + + for (i = 0; i < NWORDS_FIELD; i++) { + for (j = 0; j < i; j++) { + if (j < (i-p503_ZERO_WORDS+1)) { + MUL(mc[j], ((const digit_t*)p503p1)[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i] = v; v = u; - u = t; - t = 0; - } - c[2*nwords-1] = v; -} - - -void rdc_mont(const dfelm_t ma, felm_t mc) -{ // Efficient Montgomery reduction using comba and exploiting the special form of the prime p503. - // mc = ma*R^-1 mod p503x2, where R = 2^512. - // If ma < 2^512*p503, the output mc is in the range [0, 2*p503-1]. - // ma is assumed to be in Montgomery representation. - unsigned int i, j, carry, count = p503_ZERO_WORDS; - digit_t UV[2], t = 0, u = 0, v = 0; - - for (i = 0; i < NWORDS_FIELD; i++) { - mc[i] = 0; - } - - for (i = 0; i < NWORDS_FIELD; i++) { - for (j = 0; j < i; j++) { - if (j < (i-p503_ZERO_WORDS+1)) { - MUL(mc[j], ((const digit_t*)p503p1)[i-j], UV+1, UV[0]); - ADDC(0, UV[0], v, carry, v); - ADDC(carry, UV[1], u, carry, u); + u = t; + t = 0; + } + + for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) { + if (count > 0) { + count -= 1; + } + for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) { + if (j < (NWORDS_FIELD-count)) { + MUL(mc[j], ((const digit_t*)p503p1)[i-j], UV+1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); t += carry; - } - } - ADDC(0, v, ma[i], carry, v); - ADDC(carry, u, 0, carry, u); - t += carry; - mc[i] = v; - v = u; - u = t; - t = 0; - } - - for (i = NWORDS_FIELD; i < 2*NWORDS_FIELD-1; i++) { - if (count > 0) { - count -= 1; - } - for (j = i-NWORDS_FIELD+1; j < NWORDS_FIELD; j++) { - if (j < (NWORDS_FIELD-count)) { - MUL(mc[j], ((const digit_t*)p503p1)[i-j], UV+1, UV[0]); - ADDC(0, UV[0], v, carry, v); - ADDC(carry, UV[1], u, carry, u); - t += carry; - } - } - ADDC(0, v, ma[i], carry, v); - ADDC(carry, u, 0, carry, u); - t += carry; - mc[i-NWORDS_FIELD] = v; - v = u; - u = t; - t = 0; - } - - /* `carry` isn't read after this, but it's still a necessary argument to the macro */ - /* cppcheck-suppress unreadVariable */ - ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v); - mc[NWORDS_FIELD-1] = v; -} + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i-NWORDS_FIELD] = v; + v = u; + u = t; + t = 0; + } + + /* `carry` isn't read after this, but it's still a necessary argument to the macro */ + /* cppcheck-suppress unreadVariable */ + ADDC(0, v, ma[2*NWORDS_FIELD-1], carry, v); + mc[NWORDS_FIELD-1] = v; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/fpx_r1.c b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/fpx_r1.c index 9661567985..2734fafdf5 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/fpx_r1.c +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/fpx_r1.c @@ -1,356 +1,356 @@ -/******************************************************************************************** -* Supersingular Isogeny Key Encapsulation Library -* -* Abstract: core functions over GF(p) and GF(p^2) -*********************************************************************************************/ - -#include "sike_r1_namespace.h" -#include "P503_internal_r1.h" - -__inline void fpcopy(const felm_t a, felm_t c) -{ // Copy a field element, c = a. - unsigned int i; - - for (i = 0; i < NWORDS_FIELD; i++) - c[i] = a[i]; -} - - -__inline void fpzero(felm_t a) -{ // Zero a field element, a = 0. - unsigned int i; - - for (i = 0; i < NWORDS_FIELD; i++) - a[i] = 0; -} - - -void to_mont(const felm_t a, felm_t mc) -{ // Conversion to Montgomery representation, - // mc = a*R^2*R^(-1) mod p = a*R mod p, where a in [0, p-1]. - // The Montgomery constant R^2 mod p is the global value "Montgomery_R2". - - fpmul_mont(a, (const digit_t*)&Montgomery_R2, mc); -} - - -void from_mont(const felm_t ma, felm_t c) -{ // Conversion from Montgomery representation to standard representation, - // c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1]. - digit_t one[NWORDS_FIELD] = {0}; - - one[0] = 1; - fpmul_mont(ma, one, c); - fpcorrection(c); -} - - -void copy_words(const digit_t* a, digit_t* c, const unsigned int nwords) -{ // Copy wordsize digits, c = a, where lng(a) = nwords. - unsigned int i; - - for (i = 0; i < nwords; i++) { - c[i] = a[i]; - } -} - - -void fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc) -{ // Multiprecision multiplication, c = a*b mod p. - dfelm_t temp = {0}; - - mp_mul(ma, mb, temp, NWORDS_FIELD); - rdc_mont(temp, mc); -} - - -void fpsqr_mont(const felm_t ma, felm_t mc) -{ // Multiprecision squaring, c = a^2 mod p. - dfelm_t temp = {0}; - - mp_mul(ma, ma, temp, NWORDS_FIELD); - rdc_mont(temp, mc); -} - - -void fpinv_mont(felm_t a) -{ // Field inversion using Montgomery arithmetic, a = a^(-1)*R mod p. - felm_t tt; - - fpcopy(a, tt); - fpinv_chain_mont(tt); - fpsqr_mont(tt, tt); - fpsqr_mont(tt, tt); - fpmul_mont(a, tt, a); -} - - -void fp2copy(const f2elm_t *a, f2elm_t *c) -{ // Copy a GF(p^2) element, c = a. - fpcopy(a->e[0], c->e[0]); - fpcopy(a->e[1], c->e[1]); -} - -void fp2neg(f2elm_t *a) -{ // GF(p^2) negation, a = -a in GF(p^2). - fpneg(a->e[0]); - fpneg(a->e[1]); -} - - -__inline void fp2add(const f2elm_t *a, const f2elm_t *b, f2elm_t *c) -{ // GF(p^2) addition, c = a+b in GF(p^2). - fpadd(a->e[0], b->e[0], c->e[0]); - fpadd(a->e[1], b->e[1], c->e[1]); -} - - -__inline void fp2sub(const f2elm_t *a, const f2elm_t *b, f2elm_t *c) -{ // GF(p^2) subtraction, c = a-b in GF(p^2). - fpsub(a->e[0], b->e[0], c->e[0]); - fpsub(a->e[1], b->e[1], c->e[1]); -} - - -void fp2div2(const f2elm_t *a, f2elm_t *c) -{ // GF(p^2) division by two, c = a/2 in GF(p^2). - fpdiv2(a->e[0], c->e[0]); - fpdiv2(a->e[1], c->e[1]); -} - - -void fp2correction(f2elm_t *a) -{ // Modular correction, a = a in GF(p^2). - fpcorrection(a->e[0]); - fpcorrection(a->e[1]); -} - - -__inline static void mp_addfast(const digit_t* a, const digit_t* b, digit_t* c) -{ // Multiprecision addition, c = a+b. - - mp_add(a, b, c, NWORDS_FIELD); -} - - -__inline static void mp_addfastx2(const digit_t* a, const digit_t* b, digit_t* c) -{ // Double-length multiprecision addition, c = a+b. - - mp_add(a, b, c, 2*NWORDS_FIELD); -} - - -void fp2sqr_mont(const f2elm_t *a, f2elm_t *c) -{ // GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2). - // Inputs: a = a0+a1*i, where a0, a1 are in [0, 2*p-1] - // Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1] - felm_t t1, t2, t3; - +/******************************************************************************************** +* Supersingular Isogeny Key Encapsulation Library +* +* Abstract: core functions over GF(p) and GF(p^2) +*********************************************************************************************/ + +#include "sike_r1_namespace.h" +#include "P503_internal_r1.h" + +__inline void fpcopy(const felm_t a, felm_t c) +{ // Copy a field element, c = a. + unsigned int i; + + for (i = 0; i < NWORDS_FIELD; i++) + c[i] = a[i]; +} + + +__inline void fpzero(felm_t a) +{ // Zero a field element, a = 0. + unsigned int i; + + for (i = 0; i < NWORDS_FIELD; i++) + a[i] = 0; +} + + +void to_mont(const felm_t a, felm_t mc) +{ // Conversion to Montgomery representation, + // mc = a*R^2*R^(-1) mod p = a*R mod p, where a in [0, p-1]. + // The Montgomery constant R^2 mod p is the global value "Montgomery_R2". + + fpmul_mont(a, (const digit_t*)&Montgomery_R2, mc); +} + + +void from_mont(const felm_t ma, felm_t c) +{ // Conversion from Montgomery representation to standard representation, + // c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1]. + digit_t one[NWORDS_FIELD] = {0}; + + one[0] = 1; + fpmul_mont(ma, one, c); + fpcorrection(c); +} + + +void copy_words(const digit_t* a, digit_t* c, const unsigned int nwords) +{ // Copy wordsize digits, c = a, where lng(a) = nwords. + unsigned int i; + + for (i = 0; i < nwords; i++) { + c[i] = a[i]; + } +} + + +void fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc) +{ // Multiprecision multiplication, c = a*b mod p. + dfelm_t temp = {0}; + + mp_mul(ma, mb, temp, NWORDS_FIELD); + rdc_mont(temp, mc); +} + + +void fpsqr_mont(const felm_t ma, felm_t mc) +{ // Multiprecision squaring, c = a^2 mod p. + dfelm_t temp = {0}; + + mp_mul(ma, ma, temp, NWORDS_FIELD); + rdc_mont(temp, mc); +} + + +void fpinv_mont(felm_t a) +{ // Field inversion using Montgomery arithmetic, a = a^(-1)*R mod p. + felm_t tt; + + fpcopy(a, tt); + fpinv_chain_mont(tt); + fpsqr_mont(tt, tt); + fpsqr_mont(tt, tt); + fpmul_mont(a, tt, a); +} + + +void fp2copy(const f2elm_t *a, f2elm_t *c) +{ // Copy a GF(p^2) element, c = a. + fpcopy(a->e[0], c->e[0]); + fpcopy(a->e[1], c->e[1]); +} + +void fp2neg(f2elm_t *a) +{ // GF(p^2) negation, a = -a in GF(p^2). + fpneg(a->e[0]); + fpneg(a->e[1]); +} + + +__inline void fp2add(const f2elm_t *a, const f2elm_t *b, f2elm_t *c) +{ // GF(p^2) addition, c = a+b in GF(p^2). + fpadd(a->e[0], b->e[0], c->e[0]); + fpadd(a->e[1], b->e[1], c->e[1]); +} + + +__inline void fp2sub(const f2elm_t *a, const f2elm_t *b, f2elm_t *c) +{ // GF(p^2) subtraction, c = a-b in GF(p^2). + fpsub(a->e[0], b->e[0], c->e[0]); + fpsub(a->e[1], b->e[1], c->e[1]); +} + + +void fp2div2(const f2elm_t *a, f2elm_t *c) +{ // GF(p^2) division by two, c = a/2 in GF(p^2). + fpdiv2(a->e[0], c->e[0]); + fpdiv2(a->e[1], c->e[1]); +} + + +void fp2correction(f2elm_t *a) +{ // Modular correction, a = a in GF(p^2). + fpcorrection(a->e[0]); + fpcorrection(a->e[1]); +} + + +__inline static void mp_addfast(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision addition, c = a+b. + + mp_add(a, b, c, NWORDS_FIELD); +} + + +__inline static void mp_addfastx2(const digit_t* a, const digit_t* b, digit_t* c) +{ // Double-length multiprecision addition, c = a+b. + + mp_add(a, b, c, 2*NWORDS_FIELD); +} + + +void fp2sqr_mont(const f2elm_t *a, f2elm_t *c) +{ // GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2). + // Inputs: a = a0+a1*i, where a0, a1 are in [0, 2*p-1] + // Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1] + felm_t t1, t2, t3; + + mp_addfast(a->e[0], a->e[1], t1); // t1 = a0+a1 + fpsub(a->e[0], a->e[1], t2); // t2 = a0-a1 + mp_addfast(a->e[0], a->e[0], t3); // t3 = 2a0 + fpmul_mont(t1, t2, c->e[0]); // c0 = (a0+a1)(a0-a1) + fpmul_mont(t3, a->e[1], c->e[1]); // c1 = 2a0*a1 +} + + +unsigned int mp_sub(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords) +{ // Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit. + unsigned int i, borrow = 0; + + for (i = 0; i < nwords; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + + return borrow; +} + + +__inline static digit_t mp_subfast(const digit_t* a, const digit_t* b, digit_t* c) +{ // Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD. + // If c < 0 then returns mask = 0xFF..F, else mask = 0x00..0 + + return (0 - (digit_t)mp_sub(a, b, c, 2*NWORDS_FIELD)); +} + + +void fp2mul_mont(const f2elm_t *a, const f2elm_t *b, f2elm_t *c) +{ // GF(p^2) multiplication using Montgomery arithmetic, c = a*b in GF(p^2). + // Inputs: a = a0+a1*i and b = b0+b1*i, where a0, a1, b0, b1 are in [0, 2*p-1] + // Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1] + felm_t t1, t2; + dfelm_t tt1, tt2, tt3; + digit_t mask; + unsigned int i, borrow = 0; + + mp_mul(a->e[0], b->e[0], tt1, NWORDS_FIELD); // tt1 = a0*b0 + mp_mul(a->e[1], b->e[1], tt2, NWORDS_FIELD); // tt2 = a1*b1 mp_addfast(a->e[0], a->e[1], t1); // t1 = a0+a1 - fpsub(a->e[0], a->e[1], t2); // t2 = a0-a1 - mp_addfast(a->e[0], a->e[0], t3); // t3 = 2a0 - fpmul_mont(t1, t2, c->e[0]); // c0 = (a0+a1)(a0-a1) - fpmul_mont(t3, a->e[1], c->e[1]); // c1 = 2a0*a1 -} - - -unsigned int mp_sub(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords) -{ // Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit. - unsigned int i, borrow = 0; - - for (i = 0; i < nwords; i++) { - SUBC(borrow, a[i], b[i], borrow, c[i]); - } - - return borrow; -} - - -__inline static digit_t mp_subfast(const digit_t* a, const digit_t* b, digit_t* c) -{ // Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD. - // If c < 0 then returns mask = 0xFF..F, else mask = 0x00..0 - - return (0 - (digit_t)mp_sub(a, b, c, 2*NWORDS_FIELD)); -} - - -void fp2mul_mont(const f2elm_t *a, const f2elm_t *b, f2elm_t *c) -{ // GF(p^2) multiplication using Montgomery arithmetic, c = a*b in GF(p^2). - // Inputs: a = a0+a1*i and b = b0+b1*i, where a0, a1, b0, b1 are in [0, 2*p-1] - // Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1] - felm_t t1, t2; - dfelm_t tt1, tt2, tt3; - digit_t mask; - unsigned int i, borrow = 0; - - mp_mul(a->e[0], b->e[0], tt1, NWORDS_FIELD); // tt1 = a0*b0 - mp_mul(a->e[1], b->e[1], tt2, NWORDS_FIELD); // tt2 = a1*b1 - mp_addfast(a->e[0], a->e[1], t1); // t1 = a0+a1 - mp_addfast(b->e[0], b->e[1], t2); // t2 = b0+b1 - mask = mp_subfast(tt1, tt2, tt3); // tt3 = a0*b0 - a1*b1. If tt3 < 0 then mask = 0xFF..F, else if tt3 >= 0 then mask = 0x00..0 - for (i = 0; i < NWORDS_FIELD; i++) { - ADDC(borrow, tt3[NWORDS_FIELD+i], ((const digit_t*)PRIME)[i] & mask, borrow, tt3[NWORDS_FIELD+i]); - } - rdc_mont(tt3, c->e[0]); // c[0] = a0*b0 - a1*b1 - mp_addfastx2(tt1, tt2, tt1); // tt1 = a0*b0 + a1*b1 - mp_mul(t1, t2, tt2, NWORDS_FIELD); // tt2 = (a0+a1)*(b0+b1) - mp_subfast(tt2, tt1, tt2); // tt2 = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 - rdc_mont(tt2, c->e[1]); // c[1] = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 -} - - -void fpinv_chain_mont(felm_t a) -{ // Chain to compute a^(p-3)/4 using Montgomery arithmetic. - unsigned int i, j; - felm_t t[15], tt; - - // Precomputed table - fpsqr_mont(a, tt); - fpmul_mont(a, tt, t[0]); - for (i = 0; i <= 13; i++) fpmul_mont(t[i], tt, t[i+1]); - - fpcopy(a, tt); - for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); - fpmul_mont(a, tt, tt); - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[8], tt, tt); - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[6], tt, tt); - for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[9], tt, tt); - for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[0], tt, tt); - for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); - fpmul_mont(a, tt, tt); - for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[6], tt, tt); - for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[2], tt, tt); - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[8], tt, tt); - for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); - fpmul_mont(a, tt, tt); - for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[10], tt, tt); - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[0], tt, tt); - for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[10], tt, tt); - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[10], tt, tt); - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[5], tt, tt); - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[2], tt, tt); - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[6], tt, tt); - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[3], tt, tt); - for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[5], tt, tt); - for (i = 0; i < 12; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[12], tt, tt); - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[8], tt, tt); - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[6], tt, tt); - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[12], tt, tt); - for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[11], tt, tt); - for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[6], tt, tt); - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[5], tt, tt); - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[14], tt, tt); - for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[14], tt, tt); - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[5], tt, tt); - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[6], tt, tt); - for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[8], tt, tt); - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(a, tt, tt); - for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[4], tt, tt); - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[6], tt, tt); - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[5], tt, tt); - for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[7], tt, tt); - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(a, tt, tt); - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[0], tt, tt); - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[11], tt, tt); - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[13], tt, tt); - for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[1], tt, tt); - for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[10], tt, tt); - for (j = 0; j < 49; j++) { - for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); - fpmul_mont(t[14], tt, tt); - } - fpcopy(tt, a); -} - - -void fp2inv_mont(f2elm_t *a) -{// GF(p^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2). - f2elm_t t1; - - fpsqr_mont(a->e[0], t1.e[0]); // t10 = a0^2 - fpsqr_mont(a->e[1], t1.e[1]); // t11 = a1^2 - fpadd(t1.e[0], t1.e[1], t1.e[0]); // t10 = a0^2+a1^2 - fpinv_mont(t1.e[0]); // t10 = (a0^2+a1^2)^-1 - fpneg(a->e[1]); // a = a0-i*a1 - fpmul_mont(a->e[0], t1.e[0], a->e[0]); - fpmul_mont(a->e[1], t1.e[0], a->e[1]); // a = (a0-i*a1)*(a0^2+a1^2)^-1 -} - - -void to_fp2mont(const f2elm_t *a, f2elm_t *mc) -{ // Conversion of a GF(p^2) element to Montgomery representation, - // mc_i = a_i*R^2*R^(-1) = a_i*R in GF(p^2). - - to_mont(a->e[0], mc->e[0]); - to_mont(a->e[1], mc->e[1]); -} - - -void from_fp2mont(const f2elm_t *ma, f2elm_t *c) -{ // Conversion of a GF(p^2) element from Montgomery representation to standard representation, - // c_i = ma_i*R^(-1) = a_i in GF(p^2). - - from_mont(ma->e[0], c->e[0]); - from_mont(ma->e[1], c->e[1]); -} - - -unsigned int mp_add(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords) -{ // Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit. - unsigned int i, carry = 0; - - for (i = 0; i < nwords; i++) { - ADDC(carry, a[i], b[i], carry, c[i]); - } - - return carry; -} - -void mp_shiftr1(digit_t* x, const unsigned int nwords) -{ // Multiprecision right shift by one. - unsigned int i; - - for (i = 0; i < nwords-1; i++) { - SHIFTR(x[i+1], x[i], 1, x[i], RADIX); - } - x[nwords-1] >>= 1; -} + mp_addfast(b->e[0], b->e[1], t2); // t2 = b0+b1 + mask = mp_subfast(tt1, tt2, tt3); // tt3 = a0*b0 - a1*b1. If tt3 < 0 then mask = 0xFF..F, else if tt3 >= 0 then mask = 0x00..0 + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, tt3[NWORDS_FIELD+i], ((const digit_t*)PRIME)[i] & mask, borrow, tt3[NWORDS_FIELD+i]); + } + rdc_mont(tt3, c->e[0]); // c[0] = a0*b0 - a1*b1 + mp_addfastx2(tt1, tt2, tt1); // tt1 = a0*b0 + a1*b1 + mp_mul(t1, t2, tt2, NWORDS_FIELD); // tt2 = (a0+a1)*(b0+b1) + mp_subfast(tt2, tt1, tt2); // tt2 = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 + rdc_mont(tt2, c->e[1]); // c[1] = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 +} + + +void fpinv_chain_mont(felm_t a) +{ // Chain to compute a^(p-3)/4 using Montgomery arithmetic. + unsigned int i, j; + felm_t t[15], tt; + + // Precomputed table + fpsqr_mont(a, tt); + fpmul_mont(a, tt, t[0]); + for (i = 0; i <= 13; i++) fpmul_mont(t[i], tt, t[i+1]); + + fpcopy(a, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(a, tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[8], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[9], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[0], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(a, tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[2], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[8], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(a, tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[10], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[0], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[10], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[10], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[5], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[2], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[3], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[5], tt, tt); + for (i = 0; i < 12; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[12], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[8], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[12], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[11], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[5], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[14], tt, tt); + for (i = 0; i < 7; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[14], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[5], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[8], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(a, tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[4], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[5], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[7], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(a, tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[0], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[11], tt, tt); + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[13], tt, tt); + for (i = 0; i < 8; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[1], tt, tt); + for (i = 0; i < 6; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[10], tt, tt); + for (j = 0; j < 49; j++) { + for (i = 0; i < 5; i++) fpsqr_mont(tt, tt); + fpmul_mont(t[14], tt, tt); + } + fpcopy(tt, a); +} + + +void fp2inv_mont(f2elm_t *a) +{// GF(p^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2). + f2elm_t t1; + + fpsqr_mont(a->e[0], t1.e[0]); // t10 = a0^2 + fpsqr_mont(a->e[1], t1.e[1]); // t11 = a1^2 + fpadd(t1.e[0], t1.e[1], t1.e[0]); // t10 = a0^2+a1^2 + fpinv_mont(t1.e[0]); // t10 = (a0^2+a1^2)^-1 + fpneg(a->e[1]); // a = a0-i*a1 + fpmul_mont(a->e[0], t1.e[0], a->e[0]); + fpmul_mont(a->e[1], t1.e[0], a->e[1]); // a = (a0-i*a1)*(a0^2+a1^2)^-1 +} + + +void to_fp2mont(const f2elm_t *a, f2elm_t *mc) +{ // Conversion of a GF(p^2) element to Montgomery representation, + // mc_i = a_i*R^2*R^(-1) = a_i*R in GF(p^2). + + to_mont(a->e[0], mc->e[0]); + to_mont(a->e[1], mc->e[1]); +} + + +void from_fp2mont(const f2elm_t *ma, f2elm_t *c) +{ // Conversion of a GF(p^2) element from Montgomery representation to standard representation, + // c_i = ma_i*R^(-1) = a_i in GF(p^2). + + from_mont(ma->e[0], c->e[0]); + from_mont(ma->e[1], c->e[1]); +} + + +unsigned int mp_add(const digit_t* a, const digit_t* b, digit_t* c, const unsigned int nwords) +{ // Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit. + unsigned int i, carry = 0; + + for (i = 0; i < nwords; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + return carry; +} + +void mp_shiftr1(digit_t* x, const unsigned int nwords) +{ // Multiprecision right shift by one. + unsigned int i; + + for (i = 0; i < nwords-1; i++) { + SHIFTR(x[i+1], x[i], 1, x[i], RADIX); + } + x[nwords-1] >>= 1; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/sidh_r1.c b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/sidh_r1.c index 7f3c63fd85..cc0d0162e2 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/sidh_r1.c +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/sidh_r1.c @@ -1,329 +1,329 @@ -/******************************************************************************************** -* Supersingular Isogeny Key Encapsulation Library -* -* Abstract: ephemeral supersingular isogeny Diffie-Hellman key exchange (SIDH) -*********************************************************************************************/ - -#include "sike_r1_namespace.h" -#include "P503_internal_r1.h" -#include "pq-crypto/s2n_pq_random.h" -#include "utils/s2n_safety.h" - -static void clear_words(void* mem, digit_t nwords) -{ // Clear digits from memory. "nwords" indicates the number of digits to be zeroed. - // This function uses the volatile type qualifier to inform the compiler not to optimize out the memory clearing. - unsigned int i; - volatile digit_t *v = mem; - - for (i = 0; i < nwords; i++) { - v[i] = 0; - } -} - -static void init_basis(const digit_t *gen, f2elm_t *XP, f2elm_t *XQ, f2elm_t *XR) -{ // Initialization of basis points - - fpcopy(gen, XP->e[0]); - fpcopy(gen + NWORDS_FIELD, XP->e[1]); - fpcopy(gen + 2*NWORDS_FIELD, XQ->e[0]); - fpzero(XQ->e[1]); - fpcopy(gen + 3*NWORDS_FIELD, XR->e[0]); - fpcopy(gen + 4*NWORDS_FIELD, XR->e[1]); -} - - -static void fp2_encode(const f2elm_t *x, unsigned char *enc) -{ // Conversion of GF(p^2) element from Montgomery to standard representation, and encoding by removing leading 0 bytes - unsigned int i; - f2elm_t t; - - from_fp2mont(x, &t); - for (i = 0; i < FP2_ENCODED_BYTES / 2; i++) { - enc[i] = ((unsigned char*)t.e)[i]; - enc[i + FP2_ENCODED_BYTES / 2] = ((unsigned char*)t.e)[i + MAXBITS_FIELD / 8]; - } -} - - -static void fp2_decode(const unsigned char *enc, f2elm_t *x) -{ // Parse byte sequence back into GF(p^2) element, and conversion to Montgomery representation - unsigned int i; - - for (i = 0; i < 2*(MAXBITS_FIELD / 8); i++) ((unsigned char *)x->e)[i] = 0; - for (i = 0; i < FP2_ENCODED_BYTES / 2; i++) { - ((unsigned char*)x->e)[i] = enc[i]; - ((unsigned char*)x->e)[i + MAXBITS_FIELD / 8] = enc[i + FP2_ENCODED_BYTES / 2]; - } - to_fp2mont(x, x); -} - -int random_mod_order_B(unsigned char* random_digits) -{ // Generation of Bob's secret key - // Outputs random value in [0, 2^Floor(Log(2, oB)) - 1] - unsigned long long nbytes = NBITS_TO_NBYTES(OBOB_BITS-1); - - clear_words((void*)random_digits, MAXWORDS_ORDER); - GUARD_AS_POSIX(s2n_get_random_bytes(random_digits, nbytes)); - random_digits[nbytes-1] &= MASK_BOB; // Masking last byte - - return S2N_SUCCESS; -} - - -int EphemeralKeyGeneration_A(const digit_t* PrivateKeyA, unsigned char* PublicKeyA) -{ // Alice's ephemeral public key generation - // Input: a private key PrivateKeyA in the range [0, 2^eA - 1]. - // Output: the public key PublicKeyA consisting of 3 elements in GF(p^2) which are encoded by removing leading 0 bytes. - point_proj_t R, phiP = {0}, phiQ = {0}, phiR = {0}, pts[MAX_INT_POINTS_ALICE]; - f2elm_t _XPA, _XQA, _XRA, coeff[3], _A24plus = {0}, _C24 = {0}, _A = {0}; - f2elm_t *XPA=&_XPA, *XQA=&_XQA, *XRA=&_XRA, *A24plus=&_A24plus, *C24=&_C24, *A=&_A; - unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0; - - - // Initialize basis points - init_basis((const digit_t*)A_gen, XPA, XQA, XRA); - init_basis((const digit_t*)B_gen, &phiP->X, &phiQ->X, &phiR->X); - fpcopy((const digit_t*)&Montgomery_one, (phiP->Z.e)[0]); - fpcopy((const digit_t*)&Montgomery_one, (phiQ->Z.e)[0]); - fpcopy((const digit_t*)&Montgomery_one, (phiR->Z.e)[0]); - - // Initialize constants - fpcopy((const digit_t*)&Montgomery_one, A24plus->e[0]); - fp2add(A24plus, A24plus, C24); - - // Retrieve kernel point - LADDER3PT(XPA, XQA, XRA, PrivateKeyA, ALICE, R, A); - - // Traverse tree - index = 0; - for (row = 1; row < MAX_Alice; row++) { - while (index < MAX_Alice-row) { - fp2copy(&R->X, &pts[npts]->X); - fp2copy(&R->Z, &pts[npts]->Z); - pts_index[npts++] = index; - m = strat_Alice[ii++]; - xDBLe(R, R, A24plus, C24, (int)(2*m)); - index += m; - } - get_4_isog(R, A24plus, C24, coeff); - - for (i = 0; i < npts; i++) { - eval_4_isog(pts[i], coeff); - } - eval_4_isog(phiP, coeff); - eval_4_isog(phiQ, coeff); - eval_4_isog(phiR, coeff); - - fp2copy(&pts[npts-1]->X, &R->X); - fp2copy(&pts[npts-1]->Z, &R->Z); - index = pts_index[npts-1]; - npts -= 1; - } - - get_4_isog(R, A24plus, C24, coeff); - eval_4_isog(phiP, coeff); - eval_4_isog(phiQ, coeff); - eval_4_isog(phiR, coeff); - - inv_3_way(&phiP->Z, &phiQ->Z, &phiR->Z); - fp2mul_mont(&phiP->X, &phiP->Z, &phiP->X); - fp2mul_mont(&phiQ->X, &phiQ->Z, &phiQ->X); - fp2mul_mont(&phiR->X, &phiR->Z, &phiR->X); - - // Format public key - fp2_encode(&phiP->X, PublicKeyA); - fp2_encode(&phiQ->X, PublicKeyA + FP2_ENCODED_BYTES); - fp2_encode(&phiR->X, PublicKeyA + 2*FP2_ENCODED_BYTES); - - return S2N_SUCCESS; -} - - -int EphemeralKeyGeneration_B(const digit_t* PrivateKeyB, unsigned char* PublicKeyB) -{ // Bob's ephemeral public key generation - // Input: a private key PrivateKeyB in the range [0, 2^Floor(Log(2,oB)) - 1]. - // Output: the public key PublicKeyB consisting of 3 elements in GF(p^2) which are encoded by removing leading 0 bytes. - point_proj_t R, phiP = {0}, phiQ = {0}, phiR = {0}, pts[MAX_INT_POINTS_BOB]; - f2elm_t _XPB, _XQB, _XRB, coeff[3], _A24plus = {0}, _A24minus = {0}, _A = {0}; - f2elm_t *XPB=&_XPB, *XQB=&_XQB, *XRB=&_XRB, *A24plus=&_A24plus, *A24minus=&_A24minus, *A=&_A; - unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0; - - // Initialize basis points - init_basis((const digit_t*)B_gen, XPB, XQB, XRB); - init_basis((const digit_t*)A_gen, &phiP->X, &phiQ->X, &phiR->X); - fpcopy((const digit_t*)&Montgomery_one, (phiP->Z.e)[0]); - fpcopy((const digit_t*)&Montgomery_one, (phiQ->Z.e)[0]); - fpcopy((const digit_t*)&Montgomery_one, (phiR->Z.e)[0]); - - // Initialize constants - fpcopy((const digit_t*)&Montgomery_one, A24plus->e[0]); - fp2add(A24plus, A24plus, A24plus); - fp2copy(A24plus, A24minus); - fp2neg(A24minus); - - // Retrieve kernel point - LADDER3PT(XPB, XQB, XRB, PrivateKeyB, BOB, R, A); - - // Traverse tree - index = 0; - for (row = 1; row < MAX_Bob; row++) { - while (index < MAX_Bob-row) { - fp2copy(&R->X, &pts[npts]->X); - fp2copy(&R->Z, &pts[npts]->Z); - pts_index[npts++] = index; - m = strat_Bob[ii++]; - xTPLe(R, R, A24minus, A24plus, (int)m); - index += m; - } - get_3_isog(R, A24minus, A24plus, coeff); - - for (i = 0; i < npts; i++) { - eval_3_isog(pts[i], coeff); - } - eval_3_isog(phiP, coeff); - eval_3_isog(phiQ, coeff); - eval_3_isog(phiR, coeff); - - fp2copy(&pts[npts-1]->X, &R->X); - fp2copy(&pts[npts-1]->Z, &R->Z); - index = pts_index[npts-1]; - npts -= 1; - } - - get_3_isog(R, A24minus, A24plus, coeff); - eval_3_isog(phiP, coeff); - eval_3_isog(phiQ, coeff); - eval_3_isog(phiR, coeff); - - inv_3_way(&phiP->Z, &phiQ->Z, &phiR->Z); - fp2mul_mont(&phiP->X, &phiP->Z, &phiP->X); - fp2mul_mont(&phiQ->X, &phiQ->Z, &phiQ->X); - fp2mul_mont(&phiR->X, &phiR->Z, &phiR->X); - - // Format public key - fp2_encode(&phiP->X, PublicKeyB); - fp2_encode(&phiQ->X, PublicKeyB + FP2_ENCODED_BYTES); - fp2_encode(&phiR->X, PublicKeyB + 2*FP2_ENCODED_BYTES); - - return S2N_SUCCESS; -} - - -int EphemeralSecretAgreement_A(const digit_t* PrivateKeyA, const unsigned char* PublicKeyB, unsigned char* SharedSecretA) -{ // Alice's ephemeral shared secret computation - // It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB - // Inputs: Alice's PrivateKeyA is an integer in the range [0, oA-1]. - // Bob's PublicKeyB consists of 3 elements in GF(p^2) encoded by removing leading 0 bytes. - // Output: a shared secret SharedSecretA that consists of one element in GF(p^2) encoded by removing leading 0 bytes. - point_proj_t R, pts[MAX_INT_POINTS_ALICE]; - f2elm_t coeff[3], PKB[3], _jinv; - f2elm_t _A24plus = {0}, _C24 = {0}, _A = {0}; - f2elm_t *jinv=&_jinv, *A24plus=&_A24plus, *C24=&_C24, *A=&_A; - unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0; - - // Initialize images of Bob's basis - fp2_decode(PublicKeyB, &PKB[0]); - fp2_decode(PublicKeyB + FP2_ENCODED_BYTES, &PKB[1]); - fp2_decode(PublicKeyB + 2*FP2_ENCODED_BYTES, &PKB[2]); - - // Initialize constants - get_A(&PKB[0], &PKB[1], &PKB[2], A); // TODO: Can return projective A? - fpadd((const digit_t*)&Montgomery_one, (const digit_t*)&Montgomery_one, C24->e[0]); - fp2add(A, C24, A24plus); - fpadd(C24->e[0], C24->e[0], C24->e[0]); - - // Retrieve kernel point - LADDER3PT(&PKB[0], &PKB[1], &PKB[2], PrivateKeyA, ALICE, R, A); - - // Traverse tree - index = 0; - for (row = 1; row < MAX_Alice; row++) { - while (index < MAX_Alice-row) { - fp2copy(&R->X, &pts[npts]->X); - fp2copy(&R->Z, &pts[npts]->Z); - pts_index[npts++] = index; - m = strat_Alice[ii++]; - xDBLe(R, R, A24plus, C24, (int)(2*m)); - index += m; - } - get_4_isog(R, A24plus, C24, coeff); - - for (i = 0; i < npts; i++) { - eval_4_isog(pts[i], coeff); - } - - fp2copy(&pts[npts-1]->X, &R->X); - fp2copy(&pts[npts-1]->Z, &R->Z); - index = pts_index[npts-1]; - npts -= 1; - } - - get_4_isog(R, A24plus, C24, coeff); - fp2div2(C24, C24); - fp2sub(A24plus, C24, A24plus); - fp2div2(C24, C24); - j_inv(A24plus, C24, jinv); - fp2_encode(jinv, SharedSecretA); // Format shared secret - - return S2N_SUCCESS; -} - - -int EphemeralSecretAgreement_B(const digit_t* PrivateKeyB, const unsigned char* PublicKeyA, unsigned char* SharedSecretB) -{ // Bob's ephemeral shared secret computation - // It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA - // Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,oB)) - 1]. - // Alice's PublicKeyA consists of 3 elements in GF(p^2) encoded by removing leading 0 bytes. - // Output: a shared secret SharedSecretB that consists of one element in GF(p^2) encoded by removing leading 0 bytes. - point_proj_t R, pts[MAX_INT_POINTS_BOB]; - f2elm_t coeff[3], PKB[3], _jinv; - f2elm_t _A24plus = {0}, _A24minus = {0}, _A = {0}; - f2elm_t *jinv=&_jinv, *A24plus=&_A24plus, *A24minus=&_A24minus, *A=&_A; - unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0; - - // Initialize images of Alice's basis - fp2_decode(PublicKeyA, &PKB[0]); - fp2_decode(PublicKeyA + FP2_ENCODED_BYTES, &PKB[1]); - fp2_decode(PublicKeyA + 2*FP2_ENCODED_BYTES, &PKB[2]); - - // Initialize constants - get_A(&PKB[0], &PKB[1], &PKB[2], A); // TODO: Can return projective A? - fpadd((const digit_t*)&Montgomery_one, (const digit_t*)&Montgomery_one, A24minus->e[0]); - fp2add(A, A24minus, A24plus); - fp2sub(A, A24minus, A24minus); - - // Retrieve kernel point - LADDER3PT(&PKB[0], &PKB[1], &PKB[2], PrivateKeyB, BOB, R, A); - - // Traverse tree - index = 0; - for (row = 1; row < MAX_Bob; row++) { - while (index < MAX_Bob-row) { - fp2copy(&R->X, &pts[npts]->X); - fp2copy(&R->Z, &pts[npts]->Z); - pts_index[npts++] = index; - m = strat_Bob[ii++]; - xTPLe(R, R, A24minus, A24plus, (int)m); - index += m; - } - get_3_isog(R, A24minus, A24plus, coeff); - - for (i = 0; i < npts; i++) { - eval_3_isog(pts[i], coeff); - } - - fp2copy(&pts[npts-1]->X, &R->X); - fp2copy(&pts[npts-1]->Z, &R->Z); - index = pts_index[npts-1]; - npts -= 1; - } - - get_3_isog(R, A24minus, A24plus, coeff); - fp2add(A24plus, A24minus, A); - fp2add(A, A, A); - fp2sub(A24plus, A24minus, A24plus); - j_inv(A, A24plus, jinv); - fp2_encode(jinv, SharedSecretB); // Format shared secret - - return S2N_SUCCESS; -} +/******************************************************************************************** +* Supersingular Isogeny Key Encapsulation Library +* +* Abstract: ephemeral supersingular isogeny Diffie-Hellman key exchange (SIDH) +*********************************************************************************************/ + +#include "sike_r1_namespace.h" +#include "P503_internal_r1.h" +#include "pq-crypto/s2n_pq_random.h" +#include "utils/s2n_safety.h" + +static void clear_words(void* mem, digit_t nwords) +{ // Clear digits from memory. "nwords" indicates the number of digits to be zeroed. + // This function uses the volatile type qualifier to inform the compiler not to optimize out the memory clearing. + unsigned int i; + volatile digit_t *v = mem; + + for (i = 0; i < nwords; i++) { + v[i] = 0; + } +} + +static void init_basis(const digit_t *gen, f2elm_t *XP, f2elm_t *XQ, f2elm_t *XR) +{ // Initialization of basis points + + fpcopy(gen, XP->e[0]); + fpcopy(gen + NWORDS_FIELD, XP->e[1]); + fpcopy(gen + 2*NWORDS_FIELD, XQ->e[0]); + fpzero(XQ->e[1]); + fpcopy(gen + 3*NWORDS_FIELD, XR->e[0]); + fpcopy(gen + 4*NWORDS_FIELD, XR->e[1]); +} + + +static void fp2_encode(const f2elm_t *x, unsigned char *enc) +{ // Conversion of GF(p^2) element from Montgomery to standard representation, and encoding by removing leading 0 bytes + unsigned int i; + f2elm_t t; + + from_fp2mont(x, &t); + for (i = 0; i < FP2_ENCODED_BYTES / 2; i++) { + enc[i] = ((unsigned char*)t.e)[i]; + enc[i + FP2_ENCODED_BYTES / 2] = ((unsigned char*)t.e)[i + MAXBITS_FIELD / 8]; + } +} + + +static void fp2_decode(const unsigned char *enc, f2elm_t *x) +{ // Parse byte sequence back into GF(p^2) element, and conversion to Montgomery representation + unsigned int i; + + for (i = 0; i < 2*(MAXBITS_FIELD / 8); i++) ((unsigned char *)x->e)[i] = 0; + for (i = 0; i < FP2_ENCODED_BYTES / 2; i++) { + ((unsigned char*)x->e)[i] = enc[i]; + ((unsigned char*)x->e)[i + MAXBITS_FIELD / 8] = enc[i + FP2_ENCODED_BYTES / 2]; + } + to_fp2mont(x, x); +} + +int random_mod_order_B(unsigned char* random_digits) +{ // Generation of Bob's secret key + // Outputs random value in [0, 2^Floor(Log(2, oB)) - 1] + unsigned long long nbytes = NBITS_TO_NBYTES(OBOB_BITS-1); + + clear_words((void*)random_digits, MAXWORDS_ORDER); + GUARD_AS_POSIX(s2n_get_random_bytes(random_digits, nbytes)); + random_digits[nbytes-1] &= MASK_BOB; // Masking last byte + + return S2N_SUCCESS; +} + + +int EphemeralKeyGeneration_A(const digit_t* PrivateKeyA, unsigned char* PublicKeyA) +{ // Alice's ephemeral public key generation + // Input: a private key PrivateKeyA in the range [0, 2^eA - 1]. + // Output: the public key PublicKeyA consisting of 3 elements in GF(p^2) which are encoded by removing leading 0 bytes. + point_proj_t R, phiP = {0}, phiQ = {0}, phiR = {0}, pts[MAX_INT_POINTS_ALICE]; + f2elm_t _XPA, _XQA, _XRA, coeff[3], _A24plus = {0}, _C24 = {0}, _A = {0}; + f2elm_t *XPA=&_XPA, *XQA=&_XQA, *XRA=&_XRA, *A24plus=&_A24plus, *C24=&_C24, *A=&_A; + unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0; + + + // Initialize basis points + init_basis((const digit_t*)A_gen, XPA, XQA, XRA); + init_basis((const digit_t*)B_gen, &phiP->X, &phiQ->X, &phiR->X); + fpcopy((const digit_t*)&Montgomery_one, (phiP->Z.e)[0]); + fpcopy((const digit_t*)&Montgomery_one, (phiQ->Z.e)[0]); + fpcopy((const digit_t*)&Montgomery_one, (phiR->Z.e)[0]); + + // Initialize constants + fpcopy((const digit_t*)&Montgomery_one, A24plus->e[0]); + fp2add(A24plus, A24plus, C24); + + // Retrieve kernel point + LADDER3PT(XPA, XQA, XRA, PrivateKeyA, ALICE, R, A); + + // Traverse tree + index = 0; + for (row = 1; row < MAX_Alice; row++) { + while (index < MAX_Alice-row) { + fp2copy(&R->X, &pts[npts]->X); + fp2copy(&R->Z, &pts[npts]->Z); + pts_index[npts++] = index; + m = strat_Alice[ii++]; + xDBLe(R, R, A24plus, C24, (int)(2*m)); + index += m; + } + get_4_isog(R, A24plus, C24, coeff); + + for (i = 0; i < npts; i++) { + eval_4_isog(pts[i], coeff); + } + eval_4_isog(phiP, coeff); + eval_4_isog(phiQ, coeff); + eval_4_isog(phiR, coeff); + + fp2copy(&pts[npts-1]->X, &R->X); + fp2copy(&pts[npts-1]->Z, &R->Z); + index = pts_index[npts-1]; + npts -= 1; + } + + get_4_isog(R, A24plus, C24, coeff); + eval_4_isog(phiP, coeff); + eval_4_isog(phiQ, coeff); + eval_4_isog(phiR, coeff); + + inv_3_way(&phiP->Z, &phiQ->Z, &phiR->Z); + fp2mul_mont(&phiP->X, &phiP->Z, &phiP->X); + fp2mul_mont(&phiQ->X, &phiQ->Z, &phiQ->X); + fp2mul_mont(&phiR->X, &phiR->Z, &phiR->X); + + // Format public key + fp2_encode(&phiP->X, PublicKeyA); + fp2_encode(&phiQ->X, PublicKeyA + FP2_ENCODED_BYTES); + fp2_encode(&phiR->X, PublicKeyA + 2*FP2_ENCODED_BYTES); + + return S2N_SUCCESS; +} + + +int EphemeralKeyGeneration_B(const digit_t* PrivateKeyB, unsigned char* PublicKeyB) +{ // Bob's ephemeral public key generation + // Input: a private key PrivateKeyB in the range [0, 2^Floor(Log(2,oB)) - 1]. + // Output: the public key PublicKeyB consisting of 3 elements in GF(p^2) which are encoded by removing leading 0 bytes. + point_proj_t R, phiP = {0}, phiQ = {0}, phiR = {0}, pts[MAX_INT_POINTS_BOB]; + f2elm_t _XPB, _XQB, _XRB, coeff[3], _A24plus = {0}, _A24minus = {0}, _A = {0}; + f2elm_t *XPB=&_XPB, *XQB=&_XQB, *XRB=&_XRB, *A24plus=&_A24plus, *A24minus=&_A24minus, *A=&_A; + unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0; + + // Initialize basis points + init_basis((const digit_t*)B_gen, XPB, XQB, XRB); + init_basis((const digit_t*)A_gen, &phiP->X, &phiQ->X, &phiR->X); + fpcopy((const digit_t*)&Montgomery_one, (phiP->Z.e)[0]); + fpcopy((const digit_t*)&Montgomery_one, (phiQ->Z.e)[0]); + fpcopy((const digit_t*)&Montgomery_one, (phiR->Z.e)[0]); + + // Initialize constants + fpcopy((const digit_t*)&Montgomery_one, A24plus->e[0]); + fp2add(A24plus, A24plus, A24plus); + fp2copy(A24plus, A24minus); + fp2neg(A24minus); + + // Retrieve kernel point + LADDER3PT(XPB, XQB, XRB, PrivateKeyB, BOB, R, A); + + // Traverse tree + index = 0; + for (row = 1; row < MAX_Bob; row++) { + while (index < MAX_Bob-row) { + fp2copy(&R->X, &pts[npts]->X); + fp2copy(&R->Z, &pts[npts]->Z); + pts_index[npts++] = index; + m = strat_Bob[ii++]; + xTPLe(R, R, A24minus, A24plus, (int)m); + index += m; + } + get_3_isog(R, A24minus, A24plus, coeff); + + for (i = 0; i < npts; i++) { + eval_3_isog(pts[i], coeff); + } + eval_3_isog(phiP, coeff); + eval_3_isog(phiQ, coeff); + eval_3_isog(phiR, coeff); + + fp2copy(&pts[npts-1]->X, &R->X); + fp2copy(&pts[npts-1]->Z, &R->Z); + index = pts_index[npts-1]; + npts -= 1; + } + + get_3_isog(R, A24minus, A24plus, coeff); + eval_3_isog(phiP, coeff); + eval_3_isog(phiQ, coeff); + eval_3_isog(phiR, coeff); + + inv_3_way(&phiP->Z, &phiQ->Z, &phiR->Z); + fp2mul_mont(&phiP->X, &phiP->Z, &phiP->X); + fp2mul_mont(&phiQ->X, &phiQ->Z, &phiQ->X); + fp2mul_mont(&phiR->X, &phiR->Z, &phiR->X); + + // Format public key + fp2_encode(&phiP->X, PublicKeyB); + fp2_encode(&phiQ->X, PublicKeyB + FP2_ENCODED_BYTES); + fp2_encode(&phiR->X, PublicKeyB + 2*FP2_ENCODED_BYTES); + + return S2N_SUCCESS; +} + + +int EphemeralSecretAgreement_A(const digit_t* PrivateKeyA, const unsigned char* PublicKeyB, unsigned char* SharedSecretA) +{ // Alice's ephemeral shared secret computation + // It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB + // Inputs: Alice's PrivateKeyA is an integer in the range [0, oA-1]. + // Bob's PublicKeyB consists of 3 elements in GF(p^2) encoded by removing leading 0 bytes. + // Output: a shared secret SharedSecretA that consists of one element in GF(p^2) encoded by removing leading 0 bytes. + point_proj_t R, pts[MAX_INT_POINTS_ALICE]; + f2elm_t coeff[3], PKB[3], _jinv; + f2elm_t _A24plus = {0}, _C24 = {0}, _A = {0}; + f2elm_t *jinv=&_jinv, *A24plus=&_A24plus, *C24=&_C24, *A=&_A; + unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0; + + // Initialize images of Bob's basis + fp2_decode(PublicKeyB, &PKB[0]); + fp2_decode(PublicKeyB + FP2_ENCODED_BYTES, &PKB[1]); + fp2_decode(PublicKeyB + 2*FP2_ENCODED_BYTES, &PKB[2]); + + // Initialize constants + get_A(&PKB[0], &PKB[1], &PKB[2], A); // TODO: Can return projective A? + fpadd((const digit_t*)&Montgomery_one, (const digit_t*)&Montgomery_one, C24->e[0]); + fp2add(A, C24, A24plus); + fpadd(C24->e[0], C24->e[0], C24->e[0]); + + // Retrieve kernel point + LADDER3PT(&PKB[0], &PKB[1], &PKB[2], PrivateKeyA, ALICE, R, A); + + // Traverse tree + index = 0; + for (row = 1; row < MAX_Alice; row++) { + while (index < MAX_Alice-row) { + fp2copy(&R->X, &pts[npts]->X); + fp2copy(&R->Z, &pts[npts]->Z); + pts_index[npts++] = index; + m = strat_Alice[ii++]; + xDBLe(R, R, A24plus, C24, (int)(2*m)); + index += m; + } + get_4_isog(R, A24plus, C24, coeff); + + for (i = 0; i < npts; i++) { + eval_4_isog(pts[i], coeff); + } + + fp2copy(&pts[npts-1]->X, &R->X); + fp2copy(&pts[npts-1]->Z, &R->Z); + index = pts_index[npts-1]; + npts -= 1; + } + + get_4_isog(R, A24plus, C24, coeff); + fp2div2(C24, C24); + fp2sub(A24plus, C24, A24plus); + fp2div2(C24, C24); + j_inv(A24plus, C24, jinv); + fp2_encode(jinv, SharedSecretA); // Format shared secret + + return S2N_SUCCESS; +} + + +int EphemeralSecretAgreement_B(const digit_t* PrivateKeyB, const unsigned char* PublicKeyA, unsigned char* SharedSecretB) +{ // Bob's ephemeral shared secret computation + // It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA + // Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,oB)) - 1]. + // Alice's PublicKeyA consists of 3 elements in GF(p^2) encoded by removing leading 0 bytes. + // Output: a shared secret SharedSecretB that consists of one element in GF(p^2) encoded by removing leading 0 bytes. + point_proj_t R, pts[MAX_INT_POINTS_BOB]; + f2elm_t coeff[3], PKB[3], _jinv; + f2elm_t _A24plus = {0}, _A24minus = {0}, _A = {0}; + f2elm_t *jinv=&_jinv, *A24plus=&_A24plus, *A24minus=&_A24minus, *A=&_A; + unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0; + + // Initialize images of Alice's basis + fp2_decode(PublicKeyA, &PKB[0]); + fp2_decode(PublicKeyA + FP2_ENCODED_BYTES, &PKB[1]); + fp2_decode(PublicKeyA + 2*FP2_ENCODED_BYTES, &PKB[2]); + + // Initialize constants + get_A(&PKB[0], &PKB[1], &PKB[2], A); // TODO: Can return projective A? + fpadd((const digit_t*)&Montgomery_one, (const digit_t*)&Montgomery_one, A24minus->e[0]); + fp2add(A, A24minus, A24plus); + fp2sub(A, A24minus, A24minus); + + // Retrieve kernel point + LADDER3PT(&PKB[0], &PKB[1], &PKB[2], PrivateKeyB, BOB, R, A); + + // Traverse tree + index = 0; + for (row = 1; row < MAX_Bob; row++) { + while (index < MAX_Bob-row) { + fp2copy(&R->X, &pts[npts]->X); + fp2copy(&R->Z, &pts[npts]->Z); + pts_index[npts++] = index; + m = strat_Bob[ii++]; + xTPLe(R, R, A24minus, A24plus, (int)m); + index += m; + } + get_3_isog(R, A24minus, A24plus, coeff); + + for (i = 0; i < npts; i++) { + eval_3_isog(pts[i], coeff); + } + + fp2copy(&pts[npts-1]->X, &R->X); + fp2copy(&pts[npts-1]->Z, &R->Z); + index = pts_index[npts-1]; + npts -= 1; + } + + get_3_isog(R, A24minus, A24plus, coeff); + fp2add(A24plus, A24minus, A); + fp2add(A, A, A); + fp2sub(A24plus, A24minus, A24plus); + j_inv(A, A24plus, jinv); + fp2_encode(jinv, SharedSecretB); // Format shared secret + + return S2N_SUCCESS; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/sike_r1_kem.c b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/sike_r1_kem.c index 858676fe67..00a98353d0 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/sike_r1_kem.c +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/sike_r1_kem.c @@ -1,121 +1,121 @@ -/******************************************************************************************** -* Supersingular Isogeny Key Encapsulation Library -* -* Abstract: supersingular isogeny key encapsulation (SIKE) protocol -*********************************************************************************************/ - -#include <string.h> -#include "P503_internal_r1.h" -#include "fips202_r1.h" -#include "pq-crypto/s2n_pq_random.h" -#include "utils/s2n_safety.h" -#include "tls/s2n_kem.h" - -int SIKE_P503_r1_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) -{ // SIKE's key generation - // Outputs: secret key sk (SIKE_P503_R1_SECRET_KEY_BYTES = MSG_BYTES + SECRETKEY_B_BYTES + SIKE_P503_R1_PUBLIC_KEY_BYTES bytes) - // public key pk (SIKE_P503_R1_PUBLIC_KEY_BYTES bytes) - - digit_t _sk[SECRETKEY_B_BYTES/sizeof(digit_t)]; - - // Generate lower portion of secret key sk <- s||SK - GUARD_AS_POSIX(s2n_get_random_bytes(sk, MSG_BYTES)); - GUARD(random_mod_order_B((unsigned char*)_sk)); - - // Generate public key pk - EphemeralKeyGeneration_B(_sk, pk); - - memcpy(sk + MSG_BYTES, _sk, SECRETKEY_B_BYTES); - // Append public key pk to secret key sk - memcpy(&sk[MSG_BYTES + SECRETKEY_B_BYTES], pk, SIKE_P503_R1_PUBLIC_KEY_BYTES); - - return 0; -} - - -int SIKE_P503_r1_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) -{ // SIKE's encapsulation - // Input: public key pk (SIKE_P503_R1_PUBLIC_KEY_BYTES bytes) - // Outputs: shared secret ss (SIKE_P503_R1_SHARED_SECRET_BYTES bytes) - // ciphertext message ct (SIKE_P503_R1_CIPHERTEXT_BYTES = SIKE_P503_R1_PUBLIC_KEY_BYTES + MSG_BYTES bytes) - const uint16_t G = 0; - const uint16_t H = 1; - const uint16_t P = 2; - union { - unsigned char b[SECRETKEY_A_BYTES]; - digit_t d[SECRETKEY_A_BYTES/sizeof(digit_t)]; - } ephemeralsk; - unsigned char jinvariant[FP2_ENCODED_BYTES]; - unsigned char h[MSG_BYTES]; - unsigned char temp[SIKE_P503_R1_CIPHERTEXT_BYTES+MSG_BYTES]; - unsigned int i; - - // Generate ephemeralsk <- G(m||pk) mod oA - GUARD_AS_POSIX(s2n_get_random_bytes(temp, MSG_BYTES)); - memcpy(&temp[MSG_BYTES], pk, SIKE_P503_R1_PUBLIC_KEY_BYTES); - cshake256_simple(ephemeralsk.b, SECRETKEY_A_BYTES, G, temp, SIKE_P503_R1_PUBLIC_KEY_BYTES+MSG_BYTES); - - /* ephemeralsk is a union; the memory set here through .b will get accessed through the .d member later */ - /* cppcheck-suppress unreadVariable */ - ephemeralsk.b[SECRETKEY_A_BYTES - 1] &= MASK_ALICE; - - // Encrypt - EphemeralKeyGeneration_A(ephemeralsk.d, ct); - EphemeralSecretAgreement_A(ephemeralsk.d, pk, jinvariant); - cshake256_simple(h, MSG_BYTES, P, jinvariant, FP2_ENCODED_BYTES); - for (i = 0; i < MSG_BYTES; i++) ct[i + SIKE_P503_R1_PUBLIC_KEY_BYTES] = temp[i] ^ h[i]; - - // Generate shared secret ss <- H(m||ct) - memcpy(&temp[MSG_BYTES], ct, SIKE_P503_R1_CIPHERTEXT_BYTES); - cshake256_simple(ss, SIKE_P503_R1_SHARED_SECRET_BYTES, H, temp, SIKE_P503_R1_CIPHERTEXT_BYTES+MSG_BYTES); - - return 0; -} - - -int SIKE_P503_r1_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) -{ // SIKE's decapsulation - // Input: secret key sk (SIKE_P503_R1_SECRET_KEY_BYTES = MSG_BYTES + SECRETKEY_B_BYTES + SIKE_P503_R1_PUBLIC_KEY_BYTES bytes) - // ciphertext message ct (SIKE_P503_R1_CIPHERTEXT_BYTES = SIKE_P503_R1_PUBLIC_KEY_BYTES + MSG_BYTES bytes) - // Outputs: shared secret ss (SIKE_P503_R1_SHARED_SECRET_BYTES bytes) - const uint16_t G = 0; - const uint16_t H = 1; - const uint16_t P = 2; - union { - unsigned char b[SECRETKEY_A_BYTES]; - digit_t d[SECRETKEY_A_BYTES/sizeof(digit_t)]; - } ephemeralsk_; - unsigned char jinvariant_[FP2_ENCODED_BYTES]; - unsigned char h_[MSG_BYTES]; - unsigned char c0_[SIKE_P503_R1_PUBLIC_KEY_BYTES]; - unsigned char temp[SIKE_P503_R1_CIPHERTEXT_BYTES+MSG_BYTES]; - unsigned int i; - - digit_t _sk[SECRETKEY_B_BYTES/sizeof(digit_t)]; - - memcpy(_sk, sk + MSG_BYTES, SECRETKEY_B_BYTES); - - // Decrypt - EphemeralSecretAgreement_B(_sk, ct, jinvariant_); - cshake256_simple(h_, MSG_BYTES, P, jinvariant_, FP2_ENCODED_BYTES); - for (i = 0; i < MSG_BYTES; i++) temp[i] = ct[i + SIKE_P503_R1_PUBLIC_KEY_BYTES] ^ h_[i]; - - // Generate ephemeralsk_ <- G(m||pk) mod oA - memcpy(&temp[MSG_BYTES], &sk[MSG_BYTES + SECRETKEY_B_BYTES], SIKE_P503_R1_PUBLIC_KEY_BYTES); - cshake256_simple(ephemeralsk_.b, SECRETKEY_A_BYTES, G, temp, SIKE_P503_R1_PUBLIC_KEY_BYTES+MSG_BYTES); - - /* ephemeralsk_ is a union; the memory set here through .b will get accessed through the .d member later */ - /* cppcheck-suppress unreadVariable */ - /* cppcheck-suppress uninitvar */ - ephemeralsk_.b[SECRETKEY_A_BYTES - 1] &= MASK_ALICE; - - // Generate shared secret ss <- H(m||ct) or output ss <- H(s||ct) - EphemeralKeyGeneration_A(ephemeralsk_.d, c0_); - if (memcmp(c0_, ct, SIKE_P503_R1_PUBLIC_KEY_BYTES) != 0) { - memcpy(temp, sk, MSG_BYTES); - } - memcpy(&temp[MSG_BYTES], ct, SIKE_P503_R1_CIPHERTEXT_BYTES); - cshake256_simple(ss, SIKE_P503_R1_SHARED_SECRET_BYTES, H, temp, SIKE_P503_R1_CIPHERTEXT_BYTES+MSG_BYTES); - - return 0; -} +/******************************************************************************************** +* Supersingular Isogeny Key Encapsulation Library +* +* Abstract: supersingular isogeny key encapsulation (SIKE) protocol +*********************************************************************************************/ + +#include <string.h> +#include "P503_internal_r1.h" +#include "fips202_r1.h" +#include "pq-crypto/s2n_pq_random.h" +#include "utils/s2n_safety.h" +#include "tls/s2n_kem.h" + +int SIKE_P503_r1_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) +{ // SIKE's key generation + // Outputs: secret key sk (SIKE_P503_R1_SECRET_KEY_BYTES = MSG_BYTES + SECRETKEY_B_BYTES + SIKE_P503_R1_PUBLIC_KEY_BYTES bytes) + // public key pk (SIKE_P503_R1_PUBLIC_KEY_BYTES bytes) + + digit_t _sk[SECRETKEY_B_BYTES/sizeof(digit_t)]; + + // Generate lower portion of secret key sk <- s||SK + GUARD_AS_POSIX(s2n_get_random_bytes(sk, MSG_BYTES)); + GUARD(random_mod_order_B((unsigned char*)_sk)); + + // Generate public key pk + EphemeralKeyGeneration_B(_sk, pk); + + memcpy(sk + MSG_BYTES, _sk, SECRETKEY_B_BYTES); + // Append public key pk to secret key sk + memcpy(&sk[MSG_BYTES + SECRETKEY_B_BYTES], pk, SIKE_P503_R1_PUBLIC_KEY_BYTES); + + return 0; +} + + +int SIKE_P503_r1_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) +{ // SIKE's encapsulation + // Input: public key pk (SIKE_P503_R1_PUBLIC_KEY_BYTES bytes) + // Outputs: shared secret ss (SIKE_P503_R1_SHARED_SECRET_BYTES bytes) + // ciphertext message ct (SIKE_P503_R1_CIPHERTEXT_BYTES = SIKE_P503_R1_PUBLIC_KEY_BYTES + MSG_BYTES bytes) + const uint16_t G = 0; + const uint16_t H = 1; + const uint16_t P = 2; + union { + unsigned char b[SECRETKEY_A_BYTES]; + digit_t d[SECRETKEY_A_BYTES/sizeof(digit_t)]; + } ephemeralsk; + unsigned char jinvariant[FP2_ENCODED_BYTES]; + unsigned char h[MSG_BYTES]; + unsigned char temp[SIKE_P503_R1_CIPHERTEXT_BYTES+MSG_BYTES]; + unsigned int i; + + // Generate ephemeralsk <- G(m||pk) mod oA + GUARD_AS_POSIX(s2n_get_random_bytes(temp, MSG_BYTES)); + memcpy(&temp[MSG_BYTES], pk, SIKE_P503_R1_PUBLIC_KEY_BYTES); + cshake256_simple(ephemeralsk.b, SECRETKEY_A_BYTES, G, temp, SIKE_P503_R1_PUBLIC_KEY_BYTES+MSG_BYTES); + + /* ephemeralsk is a union; the memory set here through .b will get accessed through the .d member later */ + /* cppcheck-suppress unreadVariable */ + ephemeralsk.b[SECRETKEY_A_BYTES - 1] &= MASK_ALICE; + + // Encrypt + EphemeralKeyGeneration_A(ephemeralsk.d, ct); + EphemeralSecretAgreement_A(ephemeralsk.d, pk, jinvariant); + cshake256_simple(h, MSG_BYTES, P, jinvariant, FP2_ENCODED_BYTES); + for (i = 0; i < MSG_BYTES; i++) ct[i + SIKE_P503_R1_PUBLIC_KEY_BYTES] = temp[i] ^ h[i]; + + // Generate shared secret ss <- H(m||ct) + memcpy(&temp[MSG_BYTES], ct, SIKE_P503_R1_CIPHERTEXT_BYTES); + cshake256_simple(ss, SIKE_P503_R1_SHARED_SECRET_BYTES, H, temp, SIKE_P503_R1_CIPHERTEXT_BYTES+MSG_BYTES); + + return 0; +} + + +int SIKE_P503_r1_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) +{ // SIKE's decapsulation + // Input: secret key sk (SIKE_P503_R1_SECRET_KEY_BYTES = MSG_BYTES + SECRETKEY_B_BYTES + SIKE_P503_R1_PUBLIC_KEY_BYTES bytes) + // ciphertext message ct (SIKE_P503_R1_CIPHERTEXT_BYTES = SIKE_P503_R1_PUBLIC_KEY_BYTES + MSG_BYTES bytes) + // Outputs: shared secret ss (SIKE_P503_R1_SHARED_SECRET_BYTES bytes) + const uint16_t G = 0; + const uint16_t H = 1; + const uint16_t P = 2; + union { + unsigned char b[SECRETKEY_A_BYTES]; + digit_t d[SECRETKEY_A_BYTES/sizeof(digit_t)]; + } ephemeralsk_; + unsigned char jinvariant_[FP2_ENCODED_BYTES]; + unsigned char h_[MSG_BYTES]; + unsigned char c0_[SIKE_P503_R1_PUBLIC_KEY_BYTES]; + unsigned char temp[SIKE_P503_R1_CIPHERTEXT_BYTES+MSG_BYTES]; + unsigned int i; + + digit_t _sk[SECRETKEY_B_BYTES/sizeof(digit_t)]; + + memcpy(_sk, sk + MSG_BYTES, SECRETKEY_B_BYTES); + + // Decrypt + EphemeralSecretAgreement_B(_sk, ct, jinvariant_); + cshake256_simple(h_, MSG_BYTES, P, jinvariant_, FP2_ENCODED_BYTES); + for (i = 0; i < MSG_BYTES; i++) temp[i] = ct[i + SIKE_P503_R1_PUBLIC_KEY_BYTES] ^ h_[i]; + + // Generate ephemeralsk_ <- G(m||pk) mod oA + memcpy(&temp[MSG_BYTES], &sk[MSG_BYTES + SECRETKEY_B_BYTES], SIKE_P503_R1_PUBLIC_KEY_BYTES); + cshake256_simple(ephemeralsk_.b, SECRETKEY_A_BYTES, G, temp, SIKE_P503_R1_PUBLIC_KEY_BYTES+MSG_BYTES); + + /* ephemeralsk_ is a union; the memory set here through .b will get accessed through the .d member later */ + /* cppcheck-suppress unreadVariable */ + /* cppcheck-suppress uninitvar */ + ephemeralsk_.b[SECRETKEY_A_BYTES - 1] &= MASK_ALICE; + + // Generate shared secret ss <- H(m||ct) or output ss <- H(s||ct) + EphemeralKeyGeneration_A(ephemeralsk_.d, c0_); + if (memcmp(c0_, ct, SIKE_P503_R1_PUBLIC_KEY_BYTES) != 0) { + memcpy(temp, sk, MSG_BYTES); + } + memcpy(&temp[MSG_BYTES], ct, SIKE_P503_R1_CIPHERTEXT_BYTES); + cshake256_simple(ss, SIKE_P503_R1_SHARED_SECRET_BYTES, H, temp, SIKE_P503_R1_CIPHERTEXT_BYTES+MSG_BYTES); + + return 0; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/sike_r1_namespace.h b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/sike_r1_namespace.h index 68d9b40c4b..915272575f 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r1/sike_r1_namespace.h +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r1/sike_r1_namespace.h @@ -1,60 +1,60 @@ -#ifndef SIKE_R1_NAMESPACE_H -#define SIKE_R1_NAMESPACE_H - -#define random_mod_order_B random_mod_order_B_r1 -#define EphemeralKeyGeneration_A EphemeralKeyGeneration_A_r1 -#define EphemeralKeyGeneration_B EphemeralKeyGeneration_B_r1 -#define EphemeralSecretAgreement_A EphemeralSecretAgreement_A_r1 -#define EphemeralSecretAgreement_B EphemeralSecretAgreement_B_r1 -#define xDBL xDBL_r1 -#define xDBLe xDBLe_r1 -#define get_4_isog get_4_isog_r1 -#define eval_4_isog eval_4_isog_r1 -#define xTPL xTPL_r1 -#define xTPLe xTPLe_r1 -#define get_3_isog get_3_isog_r1 -#define eval_3_isog eval_3_isog_r1 -#define inv_3_way inv_3_way_r1 -#define get_A get_A_r1 -#define j_inv j_inv_r1 -#define xDBLADD xDBLADD_r1 -#define swap_points swap_points_r1 -#define LADDER3PT LADDER3PT_r1 -#define load64 load64_r1 -#define store64 store64_r1 -#define KeccakF1600_StatePermute KeccakF1600_StatePermute_r1 -#define keccak_absorb keccak_absorb_r1 -#define keccak_squeezeblocks keccak_squeezeblocks_r1 -#define cshake256_simple_absorb cshake256_simple_absorb_r1 -#define cshake256_simple cshake256_simple_r1 -#define digit_x_digit digit_x_digit_r1 -#define mp_mul mp_mul_r1 -#define rdc_mont rdc_mont_r1 -#define to_mont to_mont_r1 -#define from_mont from_mont_r1 -#define copy_words copy_words_r1 -#define mp_addfast mp_addfast_r1 -#define mp_addfastx2 mp_addfastx2_r1 -#define mp_sub mp_sub_r1 -#define mp_subfast mp_subfast_r1 -#define to_fp2mont to_fp2mont_r1 -#define from_fp2mont from_fp2mont_r1 -#define mp_add mp_add_r1 -#define mp_shiftr1 mp_shiftr1_r1 -#define Alice_order Alice_order_r1 -#define Bob_order Bob_order_r1 -#define A_gen A_gen_r1 -#define B_gen B_gen_r1 -#define Montgomery_R2 Montgomery_R2_r1 -#define Montgomery_one Montgomery_one_r1 -#define Montgomery_Rprime Montgomery_Rprime_r1 -#define Montgomery_rprime Montgomery_rprime_r1 -#define Border_div3 Border_div3_r1 -#define strat_Alice strat_Alice_r1 -#define strat_Bob strat_Bob_r1 -#define clear_words clear_words_r1 -#define init_basis init_basis_r1 -#define fp2_encode fp2_encode_r1 -#define fp2_decode fp2_decode_r1 - -#endif // SIKE_R1_NAMESPACE_H +#ifndef SIKE_R1_NAMESPACE_H +#define SIKE_R1_NAMESPACE_H + +#define random_mod_order_B random_mod_order_B_r1 +#define EphemeralKeyGeneration_A EphemeralKeyGeneration_A_r1 +#define EphemeralKeyGeneration_B EphemeralKeyGeneration_B_r1 +#define EphemeralSecretAgreement_A EphemeralSecretAgreement_A_r1 +#define EphemeralSecretAgreement_B EphemeralSecretAgreement_B_r1 +#define xDBL xDBL_r1 +#define xDBLe xDBLe_r1 +#define get_4_isog get_4_isog_r1 +#define eval_4_isog eval_4_isog_r1 +#define xTPL xTPL_r1 +#define xTPLe xTPLe_r1 +#define get_3_isog get_3_isog_r1 +#define eval_3_isog eval_3_isog_r1 +#define inv_3_way inv_3_way_r1 +#define get_A get_A_r1 +#define j_inv j_inv_r1 +#define xDBLADD xDBLADD_r1 +#define swap_points swap_points_r1 +#define LADDER3PT LADDER3PT_r1 +#define load64 load64_r1 +#define store64 store64_r1 +#define KeccakF1600_StatePermute KeccakF1600_StatePermute_r1 +#define keccak_absorb keccak_absorb_r1 +#define keccak_squeezeblocks keccak_squeezeblocks_r1 +#define cshake256_simple_absorb cshake256_simple_absorb_r1 +#define cshake256_simple cshake256_simple_r1 +#define digit_x_digit digit_x_digit_r1 +#define mp_mul mp_mul_r1 +#define rdc_mont rdc_mont_r1 +#define to_mont to_mont_r1 +#define from_mont from_mont_r1 +#define copy_words copy_words_r1 +#define mp_addfast mp_addfast_r1 +#define mp_addfastx2 mp_addfastx2_r1 +#define mp_sub mp_sub_r1 +#define mp_subfast mp_subfast_r1 +#define to_fp2mont to_fp2mont_r1 +#define from_fp2mont from_fp2mont_r1 +#define mp_add mp_add_r1 +#define mp_shiftr1 mp_shiftr1_r1 +#define Alice_order Alice_order_r1 +#define Bob_order Bob_order_r1 +#define A_gen A_gen_r1 +#define B_gen B_gen_r1 +#define Montgomery_R2 Montgomery_R2_r1 +#define Montgomery_one Montgomery_one_r1 +#define Montgomery_Rprime Montgomery_Rprime_r1 +#define Montgomery_rprime Montgomery_rprime_r1 +#define Border_div3 Border_div3_r1 +#define strat_Alice strat_Alice_r1 +#define strat_Bob strat_Bob_r1 +#define clear_words clear_words_r1 +#define init_basis init_basis_r1 +#define fp2_encode fp2_encode_r1 +#define fp2_decode fp2_decode_r1 + +#endif // SIKE_R1_NAMESPACE_H diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/P434.c b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/P434.c index 4288a5d186..c276f9a716 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/P434.c +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/P434.c @@ -1,117 +1,117 @@ -/******************************************************************************************** -* SIDH: an efficient supersingular isogeny cryptography library -* -* Abstract: supersingular isogeny parameters and generation of functions for P434 -*********************************************************************************************/ - -#include "P434_api.h" -#include "P434_internal.h" -#include "pq-crypto/s2n_pq.h" - -// Encoding of field elements, elements over Z_order, elements over GF(p^2) and elliptic curve points: -// -------------------------------------------------------------------------------------------------- -// Elements over GF(p) and Z_order are encoded with the least significant octet (and digit) located at the leftmost position (i.e., little endian format). -// Elements (a+b*i) over GF(p^2), where a and b are defined over GF(p), are encoded as {a, b}, with a in the least significant position. -// Elliptic curve points P = (x,y) are encoded as {x, y}, with x in the least significant position. -// Internally, the number of digits used to represent all these elements is obtained by approximating the number of bits to the immediately greater multiple of 32. -// For example, a 434-bit field element is represented with Ceil(434 / 64) = 7 64-bit digits or Ceil(434 / 32) = 14 32-bit digits. - -// -// Curve isogeny system "SIDHp434". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p434^2), where A=6, B=1, C=1 and p434 = 2^216*3^137-1 -// - - -// The constants p434, p434p1, and p434x2 have been duplicated in -// sikep434r2_fp_x64_asm.S. If, for any reason, the constants are changed in -// one file, they should be updated in the other file as well. -const uint64_t p434[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFDC1767AE2FFFFFF, - 0x7BC65C783158AEA3, 0x6CFC5FD681C52056, 0x0002341F27177344}; -const uint64_t p434p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFDC1767AE3000000, - 0x7BC65C783158AEA3, 0x6CFC5FD681C52056, 0x0002341F27177344}; -const uint64_t p434x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFB82ECF5C5FFFFFF, - 0xF78CB8F062B15D47, 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688}; -// Order of Alice's subgroup -const uint64_t Alice_order[NWORDS64_ORDER] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000001000000}; -// Order of Bob's subgroup -const uint64_t Bob_order[NWORDS64_ORDER] = {0x58AEA3FDC1767AE3, 0xC520567BC65C7831, 0x1773446CFC5FD681, 0x0000000002341F27}; -// Alice's generator values {XPA0 + XPA1*i, XQA0 + xQA1*i, XRA0 + XRA1*i} in GF(p434^2), expressed in Montgomery representation -const uint64_t A_gen[6 * NWORDS64_FIELD] = {0x05ADF455C5C345BF, 0x91935C5CC767AC2B, 0xAFE4E879951F0257, 0x70E792DC89FA27B1, - 0xF797F526BB48C8CD, 0x2181DB6131AF621F, 0x00000A1C08B1ECC4, // XPA0 - 0x74840EB87CDA7788, 0x2971AA0ECF9F9D0B, 0xCB5732BDF41715D5, 0x8CD8E51F7AACFFAA, - 0xA7F424730D7E419F, 0xD671EB919A179E8C, 0x0000FFA26C5A924A, // XPA1 - 0xFEC6E64588B7273B, 0xD2A626D74CBBF1C6, 0xF8F58F07A78098C7, 0xE23941F470841B03, - 0x1B63EDA2045538DD, 0x735CFEB0FFD49215, 0x0001C4CB77542876, // XQA0 - 0xADB0F733C17FFDD6, 0x6AFFBD037DA0A050, 0x680EC43DB144E02F, 0x1E2E5D5FF524E374, - 0xE2DDA115260E2995, 0xA6E4B552E2EDE508, 0x00018ECCDDF4B53E, // XQA1 - 0x01BA4DB518CD6C7D, 0x2CB0251FE3CC0611, 0x259B0C6949A9121B, 0x60E17AC16D2F82AD, - 0x3AA41F1CE175D92D, 0x413FBE6A9B9BC4F3, 0x00022A81D8D55643, // XRA0 - 0xB8ADBC70FC82E54A, 0xEF9CDDB0D5FADDED, 0x5820C734C80096A0, 0x7799994BAA96E0E4, - 0x044961599E379AF8, 0xDB2B94FBF09F27E2, 0x0000B87FC716C0C6}; // XRA1 -// Bob's generator values {XPB0, XQB0, XRB0 + XRB1*i} in GF(p434^2), expressed in Montgomery representation -const uint64_t B_gen[6 * NWORDS64_FIELD] = {0x6E5497556EDD48A3, 0x2A61B501546F1C05, 0xEB919446D049887D, 0x5864A4A69D450C4F, - 0xB883F276A6490D2B, 0x22CC287022D5F5B9, 0x0001BED4772E551F, // XPB0 - 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, - 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, // XPB1 - 0xFAE2A3F93D8B6B8E, 0x494871F51700FE1C, 0xEF1A94228413C27C, 0x498FF4A4AF60BD62, - 0xB00AD2A708267E8A, 0xF4328294E017837F, 0x000034080181D8AE, // XQB0 - 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, - 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, // XQB1 - 0x283B34FAFEFDC8E4, 0x9208F44977C3E647, 0x7DEAE962816F4E9A, 0x68A2BA8AA262EC9D, - 0x8176F112EA43F45B, 0x02106D022634F504, 0x00007E8A50F02E37, // XRB0 - 0xB378B7C1DA22CCB1, 0x6D089C99AD1D9230, 0xEBE15711813E2369, 0x2B35A68239D48A53, - 0x445F6FD138407C93, 0xBEF93B29A3F6B54B, 0x000173FA910377D3}; // XRB1 -// Montgomery constant Montgomery_R2 = (2^448)^2 mod p434 -const uint64_t Montgomery_R2[NWORDS64_FIELD] = {0x28E55B65DCD69B30, 0xACEC7367768798C2, 0xAB27973F8311688D, 0x175CC6AF8D6C7C0B, - 0xABCD92BF2DDE347E, 0x69E16A61C7686D9A, 0x000025A89BCDD12A}; -// Value one in Montgomery representation -const uint64_t Montgomery_one[NWORDS64_FIELD] = {0x000000000000742C, 0x0000000000000000, 0x0000000000000000, 0xB90FF404FC000000, - 0xD801A4FB559FACD4, 0xE93254545F77410C, 0x0000ECEEA7BD2EDA}; - -// Fixed parameters for isogeny tree computation -const unsigned int strat_Alice[MAX_Alice - 1] = { - 48, 28, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 13, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, - 1, 1, 5, 4, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1, 21, 12, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 9, 5, 3, 2, 1, 1, - 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1}; - -const unsigned int strat_Bob[MAX_Bob - 1] = { - 66, 33, 17, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 1, - 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 32, 16, 8, 4, 3, 1, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, - 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1}; - -// Setting up macro defines and including GF(p), GF(p^2), curve, isogeny and kex functions -#define fpcopy fpcopy434 -#define fpzero fpzero434 -#define fpadd fpadd434 -#define fpsub fpsub434 -#define fpneg fpneg434 -#define fpdiv2 fpdiv2_434 -#define fpcorrection fpcorrection434 -#define fpmul_mont fpmul434_mont -#define fpsqr_mont fpsqr434_mont -#define fpinv_mont fpinv434_mont -#define fpinv_chain_mont fpinv434_chain_mont -#define fp2copy fp2copy434 -#define fp2zero fp2zero434 -#define fp2add fp2add434 -#define fp2sub fp2sub434 -#define fp2neg fp2neg434 -#define fp2div2 fp2div2_434 -#define fp2correction fp2correction434 -#define fp2mul_mont fp2mul434_mont -#define fp2sqr_mont fp2sqr434_mont -#define fp2inv_mont fp2inv434_mont -#define mp_add_asm mp_add434_asm -#define mp_subaddx2_asm mp_subadd434x2_asm -#define mp_dblsubx2_asm mp_dblsub434x2_asm -#define random_mod_order_A oqs_kem_sidh_p434_random_mod_order_A -#define random_mod_order_B oqs_kem_sidh_p434_random_mod_order_B -#define EphemeralKeyGeneration_A oqs_kem_sidh_p434_EphemeralKeyGeneration_A -#define EphemeralKeyGeneration_B oqs_kem_sidh_p434_EphemeralKeyGeneration_B -#define EphemeralSecretAgreement_A oqs_kem_sidh_p434_EphemeralSecretAgreement_A -#define EphemeralSecretAgreement_B oqs_kem_sidh_p434_EphemeralSecretAgreement_B - -#include "fp.c" -#include "fpx.c" -#include "ec_isogeny.c" -#include "sidh.c" -#include "sike_r2_kem.c" +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: supersingular isogeny parameters and generation of functions for P434 +*********************************************************************************************/ + +#include "P434_api.h" +#include "P434_internal.h" +#include "pq-crypto/s2n_pq.h" + +// Encoding of field elements, elements over Z_order, elements over GF(p^2) and elliptic curve points: +// -------------------------------------------------------------------------------------------------- +// Elements over GF(p) and Z_order are encoded with the least significant octet (and digit) located at the leftmost position (i.e., little endian format). +// Elements (a+b*i) over GF(p^2), where a and b are defined over GF(p), are encoded as {a, b}, with a in the least significant position. +// Elliptic curve points P = (x,y) are encoded as {x, y}, with x in the least significant position. +// Internally, the number of digits used to represent all these elements is obtained by approximating the number of bits to the immediately greater multiple of 32. +// For example, a 434-bit field element is represented with Ceil(434 / 64) = 7 64-bit digits or Ceil(434 / 32) = 14 32-bit digits. + +// +// Curve isogeny system "SIDHp434". Base curve: Montgomery curve By^2 = Cx^3 + Ax^2 + Cx defined over GF(p434^2), where A=6, B=1, C=1 and p434 = 2^216*3^137-1 +// + + +// The constants p434, p434p1, and p434x2 have been duplicated in +// sikep434r2_fp_x64_asm.S. If, for any reason, the constants are changed in +// one file, they should be updated in the other file as well. +const uint64_t p434[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFDC1767AE2FFFFFF, + 0x7BC65C783158AEA3, 0x6CFC5FD681C52056, 0x0002341F27177344}; +const uint64_t p434p1[NWORDS64_FIELD] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0xFDC1767AE3000000, + 0x7BC65C783158AEA3, 0x6CFC5FD681C52056, 0x0002341F27177344}; +const uint64_t p434x2[NWORDS64_FIELD] = {0xFFFFFFFFFFFFFFFE, 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF, 0xFB82ECF5C5FFFFFF, + 0xF78CB8F062B15D47, 0xD9F8BFAD038A40AC, 0x0004683E4E2EE688}; +// Order of Alice's subgroup +const uint64_t Alice_order[NWORDS64_ORDER] = {0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000001000000}; +// Order of Bob's subgroup +const uint64_t Bob_order[NWORDS64_ORDER] = {0x58AEA3FDC1767AE3, 0xC520567BC65C7831, 0x1773446CFC5FD681, 0x0000000002341F27}; +// Alice's generator values {XPA0 + XPA1*i, XQA0 + xQA1*i, XRA0 + XRA1*i} in GF(p434^2), expressed in Montgomery representation +const uint64_t A_gen[6 * NWORDS64_FIELD] = {0x05ADF455C5C345BF, 0x91935C5CC767AC2B, 0xAFE4E879951F0257, 0x70E792DC89FA27B1, + 0xF797F526BB48C8CD, 0x2181DB6131AF621F, 0x00000A1C08B1ECC4, // XPA0 + 0x74840EB87CDA7788, 0x2971AA0ECF9F9D0B, 0xCB5732BDF41715D5, 0x8CD8E51F7AACFFAA, + 0xA7F424730D7E419F, 0xD671EB919A179E8C, 0x0000FFA26C5A924A, // XPA1 + 0xFEC6E64588B7273B, 0xD2A626D74CBBF1C6, 0xF8F58F07A78098C7, 0xE23941F470841B03, + 0x1B63EDA2045538DD, 0x735CFEB0FFD49215, 0x0001C4CB77542876, // XQA0 + 0xADB0F733C17FFDD6, 0x6AFFBD037DA0A050, 0x680EC43DB144E02F, 0x1E2E5D5FF524E374, + 0xE2DDA115260E2995, 0xA6E4B552E2EDE508, 0x00018ECCDDF4B53E, // XQA1 + 0x01BA4DB518CD6C7D, 0x2CB0251FE3CC0611, 0x259B0C6949A9121B, 0x60E17AC16D2F82AD, + 0x3AA41F1CE175D92D, 0x413FBE6A9B9BC4F3, 0x00022A81D8D55643, // XRA0 + 0xB8ADBC70FC82E54A, 0xEF9CDDB0D5FADDED, 0x5820C734C80096A0, 0x7799994BAA96E0E4, + 0x044961599E379AF8, 0xDB2B94FBF09F27E2, 0x0000B87FC716C0C6}; // XRA1 +// Bob's generator values {XPB0, XQB0, XRB0 + XRB1*i} in GF(p434^2), expressed in Montgomery representation +const uint64_t B_gen[6 * NWORDS64_FIELD] = {0x6E5497556EDD48A3, 0x2A61B501546F1C05, 0xEB919446D049887D, 0x5864A4A69D450C4F, + 0xB883F276A6490D2B, 0x22CC287022D5F5B9, 0x0001BED4772E551F, // XPB0 + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, // XPB1 + 0xFAE2A3F93D8B6B8E, 0x494871F51700FE1C, 0xEF1A94228413C27C, 0x498FF4A4AF60BD62, + 0xB00AD2A708267E8A, 0xF4328294E017837F, 0x000034080181D8AE, // XQB0 + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, + 0x0000000000000000, 0x0000000000000000, 0x0000000000000000, // XQB1 + 0x283B34FAFEFDC8E4, 0x9208F44977C3E647, 0x7DEAE962816F4E9A, 0x68A2BA8AA262EC9D, + 0x8176F112EA43F45B, 0x02106D022634F504, 0x00007E8A50F02E37, // XRB0 + 0xB378B7C1DA22CCB1, 0x6D089C99AD1D9230, 0xEBE15711813E2369, 0x2B35A68239D48A53, + 0x445F6FD138407C93, 0xBEF93B29A3F6B54B, 0x000173FA910377D3}; // XRB1 +// Montgomery constant Montgomery_R2 = (2^448)^2 mod p434 +const uint64_t Montgomery_R2[NWORDS64_FIELD] = {0x28E55B65DCD69B30, 0xACEC7367768798C2, 0xAB27973F8311688D, 0x175CC6AF8D6C7C0B, + 0xABCD92BF2DDE347E, 0x69E16A61C7686D9A, 0x000025A89BCDD12A}; +// Value one in Montgomery representation +const uint64_t Montgomery_one[NWORDS64_FIELD] = {0x000000000000742C, 0x0000000000000000, 0x0000000000000000, 0xB90FF404FC000000, + 0xD801A4FB559FACD4, 0xE93254545F77410C, 0x0000ECEEA7BD2EDA}; + +// Fixed parameters for isogeny tree computation +const unsigned int strat_Alice[MAX_Alice - 1] = { + 48, 28, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 13, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, + 1, 1, 5, 4, 2, 1, 1, 2, 1, 1, 2, 1, 1, 1, 21, 12, 7, 4, 2, 1, 1, 2, 1, 1, 3, 2, 1, 1, 1, 1, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 9, 5, 3, 2, 1, 1, + 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1}; + +const unsigned int strat_Bob[MAX_Bob - 1] = { + 66, 33, 17, 9, 5, 3, 2, 1, 1, 1, 1, 2, 1, 1, 1, 4, 2, 1, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 1, + 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 32, 16, 8, 4, 3, 1, 1, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, + 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 16, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1, 8, 4, 2, 1, 1, 2, 1, 1, 4, 2, 1, 1, 2, 1, 1}; + +// Setting up macro defines and including GF(p), GF(p^2), curve, isogeny and kex functions +#define fpcopy fpcopy434 +#define fpzero fpzero434 +#define fpadd fpadd434 +#define fpsub fpsub434 +#define fpneg fpneg434 +#define fpdiv2 fpdiv2_434 +#define fpcorrection fpcorrection434 +#define fpmul_mont fpmul434_mont +#define fpsqr_mont fpsqr434_mont +#define fpinv_mont fpinv434_mont +#define fpinv_chain_mont fpinv434_chain_mont +#define fp2copy fp2copy434 +#define fp2zero fp2zero434 +#define fp2add fp2add434 +#define fp2sub fp2sub434 +#define fp2neg fp2neg434 +#define fp2div2 fp2div2_434 +#define fp2correction fp2correction434 +#define fp2mul_mont fp2mul434_mont +#define fp2sqr_mont fp2sqr434_mont +#define fp2inv_mont fp2inv434_mont +#define mp_add_asm mp_add434_asm +#define mp_subaddx2_asm mp_subadd434x2_asm +#define mp_dblsubx2_asm mp_dblsub434x2_asm +#define random_mod_order_A oqs_kem_sidh_p434_random_mod_order_A +#define random_mod_order_B oqs_kem_sidh_p434_random_mod_order_B +#define EphemeralKeyGeneration_A oqs_kem_sidh_p434_EphemeralKeyGeneration_A +#define EphemeralKeyGeneration_B oqs_kem_sidh_p434_EphemeralKeyGeneration_B +#define EphemeralSecretAgreement_A oqs_kem_sidh_p434_EphemeralSecretAgreement_A +#define EphemeralSecretAgreement_B oqs_kem_sidh_p434_EphemeralSecretAgreement_B + +#include "fp.c" +#include "fpx.c" +#include "ec_isogeny.c" +#include "sidh.c" +#include "sike_r2_kem.c" diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/P434_api.h b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/P434_api.h index bdf3eee8cd..6a61acf3cf 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/P434_api.h +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/P434_api.h @@ -1,70 +1,70 @@ -/******************************************************************************************** -* SIDH: an efficient supersingular isogeny cryptography library -* -* Abstract: API header file for P434 -*********************************************************************************************/ - -#ifndef P434_API_H -#define P434_API_H - -#include "P434_internal.h" - -/*********************** Key encapsulation mechanism API ***********************/ - -#define CRYPTO_SECRETKEYBYTES 374 // MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes -#define CRYPTO_PUBLICKEYBYTES 330 -#define CRYPTO_BYTES 16 -#define CRYPTO_CIPHERTEXTBYTES 346 // CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes - -// Encoding of keys for KEM-based isogeny system "SIKEp434" (wire format): -// ---------------------------------------------------------------------- -// Elements over GF(p434) are encoded in 55 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). -// Elements (a+b*i) over GF(p434^2), where a and b are defined over GF(p434), are encoded as {a, b}, with a in the lowest memory portion. -// -// Private keys sk consist of the concatenation of a 16-byte random value, a value in the range [0, 2^217-1] and the public key pk. In the SIKE API, -// private keys are encoded in 374 octets in little endian format. -// Public keys pk consist of 3 elements in GF(p434^2). In the SIKE API, pk is encoded in 330 octets. -// Ciphertexts ct consist of the concatenation of a public key value and a 16-byte value. In the SIKE API, ct is encoded in 330 + 16 = 346 octets. -// Shared keys ss consist of a value of 16 octets. - -/*********************** Ephemeral key exchange API ***********************/ - -// SECURITY NOTE: SIDH supports ephemeral Diffie-Hellman key exchange. It is NOT secure to use it with static keys. -// See "On the Security of Supersingular Isogeny Cryptosystems", S.D. Galbraith, C. Petit, B. Shani and Y.B. Ti, in ASIACRYPT 2016, 2016. -// Extended version available at: http://eprint.iacr.org/2016/859 - -// Generation of Alice's secret key -// Outputs random value in [0, 2^216 - 1] to be used as Alice's private key -int oqs_kem_sidh_p434_random_mod_order_A(unsigned char *random_digits); - -// Generation of Bob's secret key -// Outputs random value in [0, 2^Floor(Log(2,3^137)) - 1] to be used as Bob's private key -int oqs_kem_sidh_p434_random_mod_order_B(unsigned char *random_digits); - -// Alice's ephemeral public key generation -// Input: a private key PrivateKeyA in the range [0, 2^216 - 1], stored in 27 bytes. -// Output: the public key PublicKeyA consisting of 3 GF(p434^2) elements encoded in 330 bytes. -int oqs_kem_sidh_p434_EphemeralKeyGeneration_A(const digit_t *PrivateKeyA, unsigned char *PublicKeyA); - -// Bob's ephemeral key-pair generation -// It produces a private key PrivateKeyB and computes the public key PublicKeyB. -// The private key is an integer in the range [0, 2^Floor(Log(2,3^137)) - 1], stored in 28 bytes. -// The public key consists of 3 GF(p434^2) elements encoded in 330 bytes. -int oqs_kem_sidh_p434_EphemeralKeyGeneration_B(const digit_t *PrivateKeyB, unsigned char *PublicKeyB); - -// Alice's ephemeral shared secret computation -// It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB -// Inputs: Alice's PrivateKeyA is an integer in the range [0, 2^216 - 1], stored in 27 bytes. -// Bob's PublicKeyB consists of 3 GF(p434^2) elements encoded in 330 bytes. -// Output: a shared secret SharedSecretA that consists of one element in GF(p434^2) encoded in 110 bytes. -int oqs_kem_sidh_p434_EphemeralSecretAgreement_A(const digit_t *PrivateKeyA, const unsigned char *PublicKeyB, unsigned char *SharedSecretA); - -// Bob's ephemeral shared secret computation -// It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA -// Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,3^137)) - 1], stored in 28 bytes. -// Alice's PublicKeyA consists of 3 GF(p434^2) elements encoded in 330 bytes. -// Output: a shared secret SharedSecretB that consists of one element in GF(p434^2) encoded in 110 bytes. -int oqs_kem_sidh_p434_EphemeralSecretAgreement_B(const digit_t *PrivateKeyB, const unsigned char *PublicKeyA, unsigned char *SharedSecretB); - - -#endif +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: API header file for P434 +*********************************************************************************************/ + +#ifndef P434_API_H +#define P434_API_H + +#include "P434_internal.h" + +/*********************** Key encapsulation mechanism API ***********************/ + +#define CRYPTO_SECRETKEYBYTES 374 // MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes +#define CRYPTO_PUBLICKEYBYTES 330 +#define CRYPTO_BYTES 16 +#define CRYPTO_CIPHERTEXTBYTES 346 // CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes + +// Encoding of keys for KEM-based isogeny system "SIKEp434" (wire format): +// ---------------------------------------------------------------------- +// Elements over GF(p434) are encoded in 55 octets in little endian format (i.e., the least significant octet is located in the lowest memory address). +// Elements (a+b*i) over GF(p434^2), where a and b are defined over GF(p434), are encoded as {a, b}, with a in the lowest memory portion. +// +// Private keys sk consist of the concatenation of a 16-byte random value, a value in the range [0, 2^217-1] and the public key pk. In the SIKE API, +// private keys are encoded in 374 octets in little endian format. +// Public keys pk consist of 3 elements in GF(p434^2). In the SIKE API, pk is encoded in 330 octets. +// Ciphertexts ct consist of the concatenation of a public key value and a 16-byte value. In the SIKE API, ct is encoded in 330 + 16 = 346 octets. +// Shared keys ss consist of a value of 16 octets. + +/*********************** Ephemeral key exchange API ***********************/ + +// SECURITY NOTE: SIDH supports ephemeral Diffie-Hellman key exchange. It is NOT secure to use it with static keys. +// See "On the Security of Supersingular Isogeny Cryptosystems", S.D. Galbraith, C. Petit, B. Shani and Y.B. Ti, in ASIACRYPT 2016, 2016. +// Extended version available at: http://eprint.iacr.org/2016/859 + +// Generation of Alice's secret key +// Outputs random value in [0, 2^216 - 1] to be used as Alice's private key +int oqs_kem_sidh_p434_random_mod_order_A(unsigned char *random_digits); + +// Generation of Bob's secret key +// Outputs random value in [0, 2^Floor(Log(2,3^137)) - 1] to be used as Bob's private key +int oqs_kem_sidh_p434_random_mod_order_B(unsigned char *random_digits); + +// Alice's ephemeral public key generation +// Input: a private key PrivateKeyA in the range [0, 2^216 - 1], stored in 27 bytes. +// Output: the public key PublicKeyA consisting of 3 GF(p434^2) elements encoded in 330 bytes. +int oqs_kem_sidh_p434_EphemeralKeyGeneration_A(const digit_t *PrivateKeyA, unsigned char *PublicKeyA); + +// Bob's ephemeral key-pair generation +// It produces a private key PrivateKeyB and computes the public key PublicKeyB. +// The private key is an integer in the range [0, 2^Floor(Log(2,3^137)) - 1], stored in 28 bytes. +// The public key consists of 3 GF(p434^2) elements encoded in 330 bytes. +int oqs_kem_sidh_p434_EphemeralKeyGeneration_B(const digit_t *PrivateKeyB, unsigned char *PublicKeyB); + +// Alice's ephemeral shared secret computation +// It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB +// Inputs: Alice's PrivateKeyA is an integer in the range [0, 2^216 - 1], stored in 27 bytes. +// Bob's PublicKeyB consists of 3 GF(p434^2) elements encoded in 330 bytes. +// Output: a shared secret SharedSecretA that consists of one element in GF(p434^2) encoded in 110 bytes. +int oqs_kem_sidh_p434_EphemeralSecretAgreement_A(const digit_t *PrivateKeyA, const unsigned char *PublicKeyB, unsigned char *SharedSecretA); + +// Bob's ephemeral shared secret computation +// It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA +// Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,3^137)) - 1], stored in 28 bytes. +// Alice's PublicKeyA consists of 3 GF(p434^2) elements encoded in 330 bytes. +// Output: a shared secret SharedSecretB that consists of one element in GF(p434^2) encoded in 110 bytes. +int oqs_kem_sidh_p434_EphemeralSecretAgreement_B(const digit_t *PrivateKeyB, const unsigned char *PublicKeyA, unsigned char *SharedSecretB); + + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/P434_internal.h b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/P434_internal.h index 30056d455b..5167e143c5 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/P434_internal.h +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/P434_internal.h @@ -1,225 +1,225 @@ -/******************************************************************************************** -* SIDH: an efficient supersingular isogeny cryptography library -* -* Abstract: internal header file for P434 -*********************************************************************************************/ - -#ifndef P434_INTERNAL_H -#define P434_INTERNAL_H - -#include "config.h" - -#if (TARGET == TARGET_AMD64) -#define NWORDS_FIELD 7 // Number of words of a 434-bit field element -#define p434_ZERO_WORDS 3 // Number of "0" digits in the least significant part of p434 + 1 -#elif (TARGET == TARGET_x86) -#define NWORDS_FIELD 14 -#define p434_ZERO_WORDS 6 -#elif (TARGET == TARGET_ARM) -#define NWORDS_FIELD 14 -#define p434_ZERO_WORDS 6 -#elif (TARGET == TARGET_ARM64) -#define NWORDS_FIELD 7 -#define p434_ZERO_WORDS 3 -#endif - -// Basic constants - -#define NBITS_FIELD 434 -#define MAXBITS_FIELD 448 -#define MAXWORDS_FIELD ((MAXBITS_FIELD + RADIX - 1) / RADIX) // Max. number of words to represent field elements -#define NWORDS64_FIELD ((NBITS_FIELD + 63) / 64) // Number of 64-bit words of a 434-bit field element -#define NBITS_ORDER 256 -#define NWORDS_ORDER ((NBITS_ORDER + RADIX - 1) / RADIX) // Number of words of oA and oB, where oA and oB are the subgroup orders of Alice and Bob, resp. -#define NWORDS64_ORDER ((NBITS_ORDER + 63) / 64) // Number of 64-bit words of a 224-bit element -#define MAXBITS_ORDER NBITS_ORDER -#define ALICE 0 -#define BOB 1 -#define OALICE_BITS 216 -#define OBOB_BITS 218 -#define OBOB_EXPON 137 -#define MASK_ALICE 0xFF -#define MASK_BOB 0x01 -#define PRIME p434 -#define PARAM_A 6 -#define PARAM_C 1 -// Fixed parameters for isogeny tree computation -#define MAX_INT_POINTS_ALICE 7 -#define MAX_INT_POINTS_BOB 8 -#define MAX_Alice 108 -#define MAX_Bob 137 -#define MSG_BYTES 16 -#define SECRETKEY_A_BYTES ((OALICE_BITS + 7) / 8) -#define SECRETKEY_B_BYTES ((OBOB_BITS - 1 + 7) / 8) -#define FP2_ENCODED_BYTES 2 * ((NBITS_FIELD + 7) / 8) - -// SIDH's basic element definitions and point representations - -typedef digit_t felm_t[NWORDS_FIELD]; // Datatype for representing 434-bit field elements (448-bit max.) -typedef digit_t dfelm_t[2 * NWORDS_FIELD]; // Datatype for representing double-precision 2x434-bit field elements (448-bit max.) -typedef struct felm_s { - felm_t e[2]; -} f2elm_t; // Datatype for representing quadratic extension field elements GF(p434^2) - -typedef struct { - f2elm_t X; - f2elm_t Z; -} point_proj; // Point representation in projective XZ Montgomery coordinates. -typedef point_proj point_proj_t[1]; - -/**************** Function prototypes ****************/ -/************* Multiprecision functions **************/ - -// Copy wordsize digits, c = a, where lng(a) = nwords -void copy_words(const digit_t *a, digit_t *c, const unsigned int nwords); - -// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit -unsigned int mp_add(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords); - -// 434-bit multiprecision addition, c = a+b -void mp_add434_asm(const digit_t *a, const digit_t *b, digit_t *c); - -// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit -unsigned int mp_sub(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords); - -// 2x434-bit multiprecision subtraction followed by addition with p434*2^448, c = a-b+(p434*2^448) if a-b < 0, otherwise c=a-b -void mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c); -void mp_subadd434x2_asm(const digit_t *a, const digit_t *b, digit_t *c); - -// Double 2x434-bit multiprecision subtraction, c = c-a-b, where c > a and c > b -void mp_dblsub434x2_asm(const digit_t *a, const digit_t *b, digit_t *c); - -// Multiprecision right shift by one -void mp_shiftr1(digit_t *x, const unsigned int nwords); - -// Digit multiplication, digit * digit -> 2-digit result -void digit_x_digit(const digit_t a, const digit_t b, digit_t *c); - -// Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords. -void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords); - -/************ Field arithmetic functions *************/ - -// Copy of a field element, c = a -void fpcopy434(const digit_t *a, digit_t *c); - -// Zeroing a field element, a = 0 -void fpzero434(digit_t *a); - -// Modular addition, c = a+b mod p434 -extern void fpadd434(const digit_t *a, const digit_t *b, digit_t *c); -extern void fpadd434_asm(const digit_t *a, const digit_t *b, digit_t *c); - -// Modular subtraction, c = a-b mod p434 -extern void fpsub434(const digit_t *a, const digit_t *b, digit_t *c); -extern void fpsub434_asm(const digit_t *a, const digit_t *b, digit_t *c); - -// Modular negation, a = -a mod p434 -extern void fpneg434(digit_t *a); - -// Modular division by two, c = a/2 mod p434. -void fpdiv2_434(const digit_t *a, digit_t *c); - -// Modular correction to reduce field element a in [0, 2*p434-1] to [0, p434-1]. -void fpcorrection434(digit_t *a); - -// 434-bit Montgomery reduction, c = a mod p -void rdc_mont(const digit_t *a, digit_t *c); - -// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p434, where R=2^768 -void fpmul434_mont(const digit_t *a, const digit_t *b, digit_t *c); -void mul434_asm(const digit_t *a, const digit_t *b, digit_t *c); -void rdc434_asm(const digit_t *ma, digit_t *mc); - -// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p434, where R=2^768 -void fpsqr434_mont(const digit_t *ma, digit_t *mc); - -// Conversion to Montgomery representation -void to_mont(const digit_t *a, digit_t *mc); - -// Conversion from Montgomery representation to standard representation -void from_mont(const digit_t *ma, digit_t *c); - -// Field inversion, a = a^-1 in GF(p434) -void fpinv434_mont(digit_t *a); - -// Chain to compute (p434-3)/4 using Montgomery arithmetic -void fpinv434_chain_mont(digit_t *a); - -/************ GF(p^2) arithmetic functions *************/ - -// Copy of a GF(p434^2) element, c = a -void fp2copy434(const f2elm_t *a, f2elm_t *c); - -// Zeroing a GF(p434^2) element, a = 0 -void fp2zero434(f2elm_t *a); - -// GF(p434^2) negation, a = -a in GF(p434^2) -void fp2neg434(f2elm_t *a); - -// GF(p434^2) addition, c = a+b in GF(p434^2) -void fp2add434(const f2elm_t *a, const f2elm_t *b, f2elm_t *c); - -// GF(p434^2) subtraction, c = a-b in GF(p434^2) -extern void fp2sub434(const f2elm_t *a, const f2elm_t *b, f2elm_t *c); - -// GF(p434^2) division by two, c = a/2 in GF(p434^2) -void fp2div2_434(const f2elm_t *a, f2elm_t *c); - -// Modular correction, a = a in GF(p434^2) -void fp2correction434(f2elm_t *a); - -// GF(p434^2) squaring using Montgomery arithmetic, c = a^2 in GF(p434^2) -void fp2sqr434_mont(const f2elm_t *a, f2elm_t *c); - -// GF(p434^2) multiplication using Montgomery arithmetic, c = a*b in GF(p434^2) -void fp2mul434_mont(const f2elm_t *a, const f2elm_t *b, f2elm_t *c); - -// Conversion of a GF(p434^2) element to Montgomery representation -void to_fp2mont(const f2elm_t *a, f2elm_t *mc); - -// Conversion of a GF(p434^2) element from Montgomery representation to standard representation -void from_fp2mont(const f2elm_t *ma, f2elm_t *c); - -// GF(p434^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2) -void fp2inv434_mont(f2elm_t *a); - -/************ Elliptic curve and isogeny functions *************/ - -// Computes the j-invariant of a Montgomery curve with projective constant. -void j_inv(const f2elm_t *A, const f2elm_t *C, f2elm_t *jinv); - -// Simultaneous doubling and differential addition. -void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t *xPQ, const f2elm_t *A24); - -// Doubling of a Montgomery point in projective coordinates (X:Z). -void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t *A24plus, const f2elm_t *C24); - -// Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings. -void xDBLe(const point_proj_t P, point_proj_t Q, const f2elm_t *A24plus, const f2elm_t *C24, const int e); - -// Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4. -void get_4_isog(const point_proj_t P, f2elm_t *A24plus, f2elm_t *C24, f2elm_t *coeff); - -// Evaluates the isogeny at the point (X:Z) in the domain of the isogeny. -void eval_4_isog(point_proj_t P, f2elm_t *coeff); - -// Tripling of a Montgomery point in projective coordinates (X:Z). -void xTPL(const point_proj_t P, point_proj_t Q, const f2elm_t *A24minus, const f2elm_t *A24plus); - -// Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings. -void xTPLe(const point_proj_t P, point_proj_t Q, const f2elm_t *A24minus, const f2elm_t *A24plus, const int e); - -// Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3. -void get_3_isog(const point_proj_t P, f2elm_t *A24minus, f2elm_t *A24plus, f2elm_t *coeff); - -// Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and a point P with coefficients given in coeff. -void eval_3_isog(point_proj_t Q, const f2elm_t *coeff); - -// 3-way simultaneous inversion -void inv_3_way(f2elm_t *z1, f2elm_t *z2, f2elm_t *z3); - -// Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A. -void get_A(const f2elm_t *xP, const f2elm_t *xQ, const f2elm_t *xR, f2elm_t *A); - -#endif +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: internal header file for P434 +*********************************************************************************************/ + +#ifndef P434_INTERNAL_H +#define P434_INTERNAL_H + +#include "config.h" + +#if (TARGET == TARGET_AMD64) +#define NWORDS_FIELD 7 // Number of words of a 434-bit field element +#define p434_ZERO_WORDS 3 // Number of "0" digits in the least significant part of p434 + 1 +#elif (TARGET == TARGET_x86) +#define NWORDS_FIELD 14 +#define p434_ZERO_WORDS 6 +#elif (TARGET == TARGET_ARM) +#define NWORDS_FIELD 14 +#define p434_ZERO_WORDS 6 +#elif (TARGET == TARGET_ARM64) +#define NWORDS_FIELD 7 +#define p434_ZERO_WORDS 3 +#endif + +// Basic constants + +#define NBITS_FIELD 434 +#define MAXBITS_FIELD 448 +#define MAXWORDS_FIELD ((MAXBITS_FIELD + RADIX - 1) / RADIX) // Max. number of words to represent field elements +#define NWORDS64_FIELD ((NBITS_FIELD + 63) / 64) // Number of 64-bit words of a 434-bit field element +#define NBITS_ORDER 256 +#define NWORDS_ORDER ((NBITS_ORDER + RADIX - 1) / RADIX) // Number of words of oA and oB, where oA and oB are the subgroup orders of Alice and Bob, resp. +#define NWORDS64_ORDER ((NBITS_ORDER + 63) / 64) // Number of 64-bit words of a 224-bit element +#define MAXBITS_ORDER NBITS_ORDER +#define ALICE 0 +#define BOB 1 +#define OALICE_BITS 216 +#define OBOB_BITS 218 +#define OBOB_EXPON 137 +#define MASK_ALICE 0xFF +#define MASK_BOB 0x01 +#define PRIME p434 +#define PARAM_A 6 +#define PARAM_C 1 +// Fixed parameters for isogeny tree computation +#define MAX_INT_POINTS_ALICE 7 +#define MAX_INT_POINTS_BOB 8 +#define MAX_Alice 108 +#define MAX_Bob 137 +#define MSG_BYTES 16 +#define SECRETKEY_A_BYTES ((OALICE_BITS + 7) / 8) +#define SECRETKEY_B_BYTES ((OBOB_BITS - 1 + 7) / 8) +#define FP2_ENCODED_BYTES 2 * ((NBITS_FIELD + 7) / 8) + +// SIDH's basic element definitions and point representations + +typedef digit_t felm_t[NWORDS_FIELD]; // Datatype for representing 434-bit field elements (448-bit max.) +typedef digit_t dfelm_t[2 * NWORDS_FIELD]; // Datatype for representing double-precision 2x434-bit field elements (448-bit max.) +typedef struct felm_s { + felm_t e[2]; +} f2elm_t; // Datatype for representing quadratic extension field elements GF(p434^2) + +typedef struct { + f2elm_t X; + f2elm_t Z; +} point_proj; // Point representation in projective XZ Montgomery coordinates. +typedef point_proj point_proj_t[1]; + +/**************** Function prototypes ****************/ +/************* Multiprecision functions **************/ + +// Copy wordsize digits, c = a, where lng(a) = nwords +void copy_words(const digit_t *a, digit_t *c, const unsigned int nwords); + +// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit +unsigned int mp_add(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords); + +// 434-bit multiprecision addition, c = a+b +void mp_add434_asm(const digit_t *a, const digit_t *b, digit_t *c); + +// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit +unsigned int mp_sub(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords); + +// 2x434-bit multiprecision subtraction followed by addition with p434*2^448, c = a-b+(p434*2^448) if a-b < 0, otherwise c=a-b +void mp_subaddx2_asm(const digit_t *a, const digit_t *b, digit_t *c); +void mp_subadd434x2_asm(const digit_t *a, const digit_t *b, digit_t *c); + +// Double 2x434-bit multiprecision subtraction, c = c-a-b, where c > a and c > b +void mp_dblsub434x2_asm(const digit_t *a, const digit_t *b, digit_t *c); + +// Multiprecision right shift by one +void mp_shiftr1(digit_t *x, const unsigned int nwords); + +// Digit multiplication, digit * digit -> 2-digit result +void digit_x_digit(const digit_t a, const digit_t b, digit_t *c); + +// Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords. +void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords); + +/************ Field arithmetic functions *************/ + +// Copy of a field element, c = a +void fpcopy434(const digit_t *a, digit_t *c); + +// Zeroing a field element, a = 0 +void fpzero434(digit_t *a); + +// Modular addition, c = a+b mod p434 +extern void fpadd434(const digit_t *a, const digit_t *b, digit_t *c); +extern void fpadd434_asm(const digit_t *a, const digit_t *b, digit_t *c); + +// Modular subtraction, c = a-b mod p434 +extern void fpsub434(const digit_t *a, const digit_t *b, digit_t *c); +extern void fpsub434_asm(const digit_t *a, const digit_t *b, digit_t *c); + +// Modular negation, a = -a mod p434 +extern void fpneg434(digit_t *a); + +// Modular division by two, c = a/2 mod p434. +void fpdiv2_434(const digit_t *a, digit_t *c); + +// Modular correction to reduce field element a in [0, 2*p434-1] to [0, p434-1]. +void fpcorrection434(digit_t *a); + +// 434-bit Montgomery reduction, c = a mod p +void rdc_mont(const digit_t *a, digit_t *c); + +// Field multiplication using Montgomery arithmetic, c = a*b*R^-1 mod p434, where R=2^768 +void fpmul434_mont(const digit_t *a, const digit_t *b, digit_t *c); +void mul434_asm(const digit_t *a, const digit_t *b, digit_t *c); +void rdc434_asm(const digit_t *ma, digit_t *mc); + +// Field squaring using Montgomery arithmetic, c = a*b*R^-1 mod p434, where R=2^768 +void fpsqr434_mont(const digit_t *ma, digit_t *mc); + +// Conversion to Montgomery representation +void to_mont(const digit_t *a, digit_t *mc); + +// Conversion from Montgomery representation to standard representation +void from_mont(const digit_t *ma, digit_t *c); + +// Field inversion, a = a^-1 in GF(p434) +void fpinv434_mont(digit_t *a); + +// Chain to compute (p434-3)/4 using Montgomery arithmetic +void fpinv434_chain_mont(digit_t *a); + +/************ GF(p^2) arithmetic functions *************/ + +// Copy of a GF(p434^2) element, c = a +void fp2copy434(const f2elm_t *a, f2elm_t *c); + +// Zeroing a GF(p434^2) element, a = 0 +void fp2zero434(f2elm_t *a); + +// GF(p434^2) negation, a = -a in GF(p434^2) +void fp2neg434(f2elm_t *a); + +// GF(p434^2) addition, c = a+b in GF(p434^2) +void fp2add434(const f2elm_t *a, const f2elm_t *b, f2elm_t *c); + +// GF(p434^2) subtraction, c = a-b in GF(p434^2) +extern void fp2sub434(const f2elm_t *a, const f2elm_t *b, f2elm_t *c); + +// GF(p434^2) division by two, c = a/2 in GF(p434^2) +void fp2div2_434(const f2elm_t *a, f2elm_t *c); + +// Modular correction, a = a in GF(p434^2) +void fp2correction434(f2elm_t *a); + +// GF(p434^2) squaring using Montgomery arithmetic, c = a^2 in GF(p434^2) +void fp2sqr434_mont(const f2elm_t *a, f2elm_t *c); + +// GF(p434^2) multiplication using Montgomery arithmetic, c = a*b in GF(p434^2) +void fp2mul434_mont(const f2elm_t *a, const f2elm_t *b, f2elm_t *c); + +// Conversion of a GF(p434^2) element to Montgomery representation +void to_fp2mont(const f2elm_t *a, f2elm_t *mc); + +// Conversion of a GF(p434^2) element from Montgomery representation to standard representation +void from_fp2mont(const f2elm_t *ma, f2elm_t *c); + +// GF(p434^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2) +void fp2inv434_mont(f2elm_t *a); + +/************ Elliptic curve and isogeny functions *************/ + +// Computes the j-invariant of a Montgomery curve with projective constant. +void j_inv(const f2elm_t *A, const f2elm_t *C, f2elm_t *jinv); + +// Simultaneous doubling and differential addition. +void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t *xPQ, const f2elm_t *A24); + +// Doubling of a Montgomery point in projective coordinates (X:Z). +void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t *A24plus, const f2elm_t *C24); + +// Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings. +void xDBLe(const point_proj_t P, point_proj_t Q, const f2elm_t *A24plus, const f2elm_t *C24, const int e); + +// Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4. +void get_4_isog(const point_proj_t P, f2elm_t *A24plus, f2elm_t *C24, f2elm_t *coeff); + +// Evaluates the isogeny at the point (X:Z) in the domain of the isogeny. +void eval_4_isog(point_proj_t P, f2elm_t *coeff); + +// Tripling of a Montgomery point in projective coordinates (X:Z). +void xTPL(const point_proj_t P, point_proj_t Q, const f2elm_t *A24minus, const f2elm_t *A24plus); + +// Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings. +void xTPLe(const point_proj_t P, point_proj_t Q, const f2elm_t *A24minus, const f2elm_t *A24plus, const int e); + +// Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3. +void get_3_isog(const point_proj_t P, f2elm_t *A24minus, f2elm_t *A24plus, f2elm_t *coeff); + +// Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and a point P with coefficients given in coeff. +void eval_3_isog(point_proj_t Q, const f2elm_t *coeff); + +// 3-way simultaneous inversion +void inv_3_way(f2elm_t *z1, f2elm_t *z2, f2elm_t *z3); + +// Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A. +void get_A(const f2elm_t *xP, const f2elm_t *xQ, const f2elm_t *xR, f2elm_t *A); + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/config.h b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/config.h index 6199e5a708..a87268757f 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/config.h +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/config.h @@ -1,218 +1,218 @@ -/******************************************************************************************** -* SIDH: an efficient supersingular isogeny cryptography library -* -* Abstract: configuration file and platform-dependent macros -*********************************************************************************************/ - -#ifndef SIKE_CONFIG_H -#define SIKE_CONFIG_H - -#include <stdint.h> -#include <stdbool.h> -#include <stddef.h> - -// Definition of operating system - -#define OS_WIN 1 -#define OS_LINUX 2 - -#if defined(_WIN32) // Microsoft Windows OS -#define OS_TARGET OS_WIN -#else -#define OS_TARGET OS_LINUX // default to Linux -#endif - -// Definition of compiler (removed in OQS) - -#define COMPILER_GCC 1 -#define COMPILER_CLANG 2 - -#if defined(__GNUC__) // GNU GCC compiler -#define COMPILER COMPILER_GCC -#elif defined(__clang__) // Clang compiler -#define COMPILER COMPILER_CLANG -#else -#error -- "Unsupported COMPILER" -#endif - -// Definition of the targeted architecture and basic data types -#define TARGET_AMD64 1 -#define TARGET_x86 2 -#define TARGET_ARM 3 -#define TARGET_ARM64 4 - -#if defined(__x86_64__) -#define TARGET TARGET_AMD64 -#define RADIX 64 -#define LOG2RADIX 6 -typedef uint64_t digit_t; // Unsigned 64-bit digit -typedef uint32_t hdigit_t; // Unsigned 32-bit digit -#elif defined(__i386__) -#define TARGET TARGET_x86 -#define RADIX 32 -#define LOG2RADIX 5 -typedef uint32_t digit_t; // Unsigned 32-bit digit -typedef uint16_t hdigit_t; // Unsigned 16-bit digit -#elif defined(__arm__) -#define TARGET TARGET_ARM -#define RADIX 32 -#define LOG2RADIX 5 -typedef uint32_t digit_t; // Unsigned 32-bit digit -typedef uint16_t hdigit_t; // Unsigned 16-bit digit -#elif defined(__aarch64__) -#define TARGET TARGET_ARM64 -#define RADIX 64 -#define LOG2RADIX 6 -typedef uint64_t digit_t; // Unsigned 64-bit digit -typedef uint32_t hdigit_t; // Unsigned 32-bit digit -#else -#error-- "Unsupported ARCHITECTURE" -#endif - -#define RADIX64 64 - -// Extended datatype support -#if !defined(S2N_SIKEP434R2_ASM) -typedef uint64_t uint128_t[2]; -#elif (TARGET == TARGET_AMD64 && OS_TARGET == OS_LINUX) -typedef unsigned uint128_t __attribute__((mode(TI))); -#elif (TARGET == TARGET_ARM64 && OS_TARGET == OS_LINUX) -typedef unsigned uint128_t __attribute__((mode(TI))); -#elif (TARGET == TARGET_AMD64 && OS_TARGET == OS_WIN) -typedef uint64_t uint128_t[2]; -#endif - -// Macro definitions - -#define NBITS_TO_NBYTES(nbits) (((nbits) + 7) / 8) // Conversion macro from number of bits to number of bytes -#define NBITS_TO_NWORDS(nbits) (((nbits) + (sizeof(digit_t) * 8) - 1) / (sizeof(digit_t) * 8)) // Conversion macro from number of bits to number of computer words -#define NBYTES_TO_NWORDS(nbytes) (((nbytes) + sizeof(digit_t) - 1) / sizeof(digit_t)) // Conversion macro from number of bytes to number of computer words - -// Macro to avoid compiler warnings when detecting unreferenced parameters -#define UNREFERENCED_PARAMETER(PAR) ((void) (PAR)) - -/********************** Constant-time unsigned comparisons ***********************/ - -// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise - -unsigned int is_digit_nonzero_ct(digit_t x) { // Is x != 0? - return (unsigned int) ((x | (0 - x)) >> (RADIX - 1)); -} - -unsigned int is_digit_zero_ct(digit_t x) { // Is x = 0? - return (unsigned int) (1 ^ is_digit_nonzero_ct(x)); -} - -unsigned int is_digit_lessthan_ct(digit_t x, digit_t y) { // Is x < y? - return (unsigned int) ((x ^ ((x ^ y) | ((x - y) ^ y))) >> (RADIX - 1)); -} - -/********************** Macros for platform-dependent operations **********************/ - -#if (!defined(S2N_SIKEP434R2_ASM)) || (TARGET == TARGET_ARM) - -// Digit multiplication -#define MUL(multiplier, multiplicand, hi, lo) \ - digit_x_digit((multiplier), (multiplicand), &(lo)); - -// Digit addition with carry -#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \ - { \ - digit_t tempReg = (addend1) + (digit_t)(carryIn); \ - (sumOut) = (addend2) + tempReg; \ - (carryOut) = (is_digit_lessthan_ct(tempReg, (digit_t)(carryIn)) | is_digit_lessthan_ct((sumOut), tempReg)); \ - } - -// Digit subtraction with borrow -#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \ - { \ - digit_t tempReg = (minuend) - (subtrahend); \ - unsigned int borrowReg = (is_digit_lessthan_ct((minuend), (subtrahend)) | ((borrowIn) &is_digit_zero_ct(tempReg))); \ - (differenceOut) = tempReg - (digit_t)(borrowIn); \ - (borrowOut) = borrowReg; \ - } - -// Shift right with flexible datatype -#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \ - (shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (DigitSize - (shift))); - -// Shift left with flexible datatype -#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \ - (shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (DigitSize - (shift))); - -#elif (TARGET == TARGET_AMD64 && OS_TARGET == OS_WIN) - -// Digit multiplication -#define MUL(multiplier, multiplicand, hi, lo) \ - (lo) = _umul128((multiplier), (multiplicand), (hi)); - -// Digit addition with carry -#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \ - (carryOut) = _addcarry_u64((carryIn), (addend1), (addend2), &(sumOut)); - -// Digit subtraction with borrow -#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \ - (borrowOut) = _subborrow_u64((borrowIn), (minuend), (subtrahend), &(differenceOut)); - -// Digit shift right -#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \ - (shiftOut) = __shiftright128((lowIn), (highIn), (shift)); - -// Digit shift left -#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \ - (shiftOut) = __shiftleft128((lowIn), (highIn), (shift)); - -// 64x64-bit multiplication -#define MUL128(multiplier, multiplicand, product) \ - (product)[0] = _umul128((multiplier), (multiplicand), &(product)[1]); - -// 128-bit addition with output carry -#define ADC128(addend1, addend2, carry, addition) \ - (carry) = _addcarry_u64(0, (addend1)[0], (addend2)[0], &(addition)[0]); \ - (carry) = _addcarry_u64((carry), (addend1)[1], (addend2)[1], &(addition)[1]); - -#define MULADD128(multiplier, multiplicand, addend, carry, result) \ - ; \ - { \ - uint128_t product; \ - MUL128(multiplier, multiplicand, product); \ - ADC128(addend, product, carry, result); \ - } - -#elif ((TARGET == TARGET_AMD64 || TARGET == TARGET_ARM64) && OS_TARGET == OS_LINUX) - -// Digit multiplication -#define MUL(multiplier, multiplicand, hi, lo) \ - { \ - uint128_t tempReg = (uint128_t)(multiplier) * (uint128_t)(multiplicand); \ - *(hi) = (digit_t)(tempReg >> RADIX); \ - (lo) = (digit_t) tempReg; \ - } - -// Digit addition with carry -#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \ - { \ - uint128_t tempReg = (uint128_t)(addend1) + (uint128_t)(addend2) + (uint128_t)(carryIn); \ - (carryOut) = (digit_t)(tempReg >> RADIX); \ - (sumOut) = (digit_t) tempReg; \ - } - -// Digit subtraction with borrow -#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \ - { \ - uint128_t tempReg = (uint128_t)(minuend) - (uint128_t)(subtrahend) - (uint128_t)(borrowIn); \ - (borrowOut) = (digit_t)(tempReg >> (sizeof(uint128_t) * 8 - 1)); \ - (differenceOut) = (digit_t) tempReg; \ - } - -// Digit shift right -#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \ - (shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (RADIX - (shift))); - -// Digit shift left -#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \ - (shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (RADIX - (shift))); - -#endif - -#endif +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: configuration file and platform-dependent macros +*********************************************************************************************/ + +#ifndef SIKE_CONFIG_H +#define SIKE_CONFIG_H + +#include <stdint.h> +#include <stdbool.h> +#include <stddef.h> + +// Definition of operating system + +#define OS_WIN 1 +#define OS_LINUX 2 + +#if defined(_WIN32) // Microsoft Windows OS +#define OS_TARGET OS_WIN +#else +#define OS_TARGET OS_LINUX // default to Linux +#endif + +// Definition of compiler (removed in OQS) + +#define COMPILER_GCC 1 +#define COMPILER_CLANG 2 + +#if defined(__GNUC__) // GNU GCC compiler +#define COMPILER COMPILER_GCC +#elif defined(__clang__) // Clang compiler +#define COMPILER COMPILER_CLANG +#else +#error -- "Unsupported COMPILER" +#endif + +// Definition of the targeted architecture and basic data types +#define TARGET_AMD64 1 +#define TARGET_x86 2 +#define TARGET_ARM 3 +#define TARGET_ARM64 4 + +#if defined(__x86_64__) +#define TARGET TARGET_AMD64 +#define RADIX 64 +#define LOG2RADIX 6 +typedef uint64_t digit_t; // Unsigned 64-bit digit +typedef uint32_t hdigit_t; // Unsigned 32-bit digit +#elif defined(__i386__) +#define TARGET TARGET_x86 +#define RADIX 32 +#define LOG2RADIX 5 +typedef uint32_t digit_t; // Unsigned 32-bit digit +typedef uint16_t hdigit_t; // Unsigned 16-bit digit +#elif defined(__arm__) +#define TARGET TARGET_ARM +#define RADIX 32 +#define LOG2RADIX 5 +typedef uint32_t digit_t; // Unsigned 32-bit digit +typedef uint16_t hdigit_t; // Unsigned 16-bit digit +#elif defined(__aarch64__) +#define TARGET TARGET_ARM64 +#define RADIX 64 +#define LOG2RADIX 6 +typedef uint64_t digit_t; // Unsigned 64-bit digit +typedef uint32_t hdigit_t; // Unsigned 32-bit digit +#else +#error-- "Unsupported ARCHITECTURE" +#endif + +#define RADIX64 64 + +// Extended datatype support +#if !defined(S2N_SIKEP434R2_ASM) +typedef uint64_t uint128_t[2]; +#elif (TARGET == TARGET_AMD64 && OS_TARGET == OS_LINUX) +typedef unsigned uint128_t __attribute__((mode(TI))); +#elif (TARGET == TARGET_ARM64 && OS_TARGET == OS_LINUX) +typedef unsigned uint128_t __attribute__((mode(TI))); +#elif (TARGET == TARGET_AMD64 && OS_TARGET == OS_WIN) +typedef uint64_t uint128_t[2]; +#endif + +// Macro definitions + +#define NBITS_TO_NBYTES(nbits) (((nbits) + 7) / 8) // Conversion macro from number of bits to number of bytes +#define NBITS_TO_NWORDS(nbits) (((nbits) + (sizeof(digit_t) * 8) - 1) / (sizeof(digit_t) * 8)) // Conversion macro from number of bits to number of computer words +#define NBYTES_TO_NWORDS(nbytes) (((nbytes) + sizeof(digit_t) - 1) / sizeof(digit_t)) // Conversion macro from number of bytes to number of computer words + +// Macro to avoid compiler warnings when detecting unreferenced parameters +#define UNREFERENCED_PARAMETER(PAR) ((void) (PAR)) + +/********************** Constant-time unsigned comparisons ***********************/ + +// The following functions return 1 (TRUE) if condition is true, 0 (FALSE) otherwise + +unsigned int is_digit_nonzero_ct(digit_t x) { // Is x != 0? + return (unsigned int) ((x | (0 - x)) >> (RADIX - 1)); +} + +unsigned int is_digit_zero_ct(digit_t x) { // Is x = 0? + return (unsigned int) (1 ^ is_digit_nonzero_ct(x)); +} + +unsigned int is_digit_lessthan_ct(digit_t x, digit_t y) { // Is x < y? + return (unsigned int) ((x ^ ((x ^ y) | ((x - y) ^ y))) >> (RADIX - 1)); +} + +/********************** Macros for platform-dependent operations **********************/ + +#if (!defined(S2N_SIKEP434R2_ASM)) || (TARGET == TARGET_ARM) + +// Digit multiplication +#define MUL(multiplier, multiplicand, hi, lo) \ + digit_x_digit((multiplier), (multiplicand), &(lo)); + +// Digit addition with carry +#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \ + { \ + digit_t tempReg = (addend1) + (digit_t)(carryIn); \ + (sumOut) = (addend2) + tempReg; \ + (carryOut) = (is_digit_lessthan_ct(tempReg, (digit_t)(carryIn)) | is_digit_lessthan_ct((sumOut), tempReg)); \ + } + +// Digit subtraction with borrow +#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \ + { \ + digit_t tempReg = (minuend) - (subtrahend); \ + unsigned int borrowReg = (is_digit_lessthan_ct((minuend), (subtrahend)) | ((borrowIn) &is_digit_zero_ct(tempReg))); \ + (differenceOut) = tempReg - (digit_t)(borrowIn); \ + (borrowOut) = borrowReg; \ + } + +// Shift right with flexible datatype +#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (DigitSize - (shift))); + +// Shift left with flexible datatype +#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (DigitSize - (shift))); + +#elif (TARGET == TARGET_AMD64 && OS_TARGET == OS_WIN) + +// Digit multiplication +#define MUL(multiplier, multiplicand, hi, lo) \ + (lo) = _umul128((multiplier), (multiplicand), (hi)); + +// Digit addition with carry +#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \ + (carryOut) = _addcarry_u64((carryIn), (addend1), (addend2), &(sumOut)); + +// Digit subtraction with borrow +#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \ + (borrowOut) = _subborrow_u64((borrowIn), (minuend), (subtrahend), &(differenceOut)); + +// Digit shift right +#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = __shiftright128((lowIn), (highIn), (shift)); + +// Digit shift left +#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = __shiftleft128((lowIn), (highIn), (shift)); + +// 64x64-bit multiplication +#define MUL128(multiplier, multiplicand, product) \ + (product)[0] = _umul128((multiplier), (multiplicand), &(product)[1]); + +// 128-bit addition with output carry +#define ADC128(addend1, addend2, carry, addition) \ + (carry) = _addcarry_u64(0, (addend1)[0], (addend2)[0], &(addition)[0]); \ + (carry) = _addcarry_u64((carry), (addend1)[1], (addend2)[1], &(addition)[1]); + +#define MULADD128(multiplier, multiplicand, addend, carry, result) \ + ; \ + { \ + uint128_t product; \ + MUL128(multiplier, multiplicand, product); \ + ADC128(addend, product, carry, result); \ + } + +#elif ((TARGET == TARGET_AMD64 || TARGET == TARGET_ARM64) && OS_TARGET == OS_LINUX) + +// Digit multiplication +#define MUL(multiplier, multiplicand, hi, lo) \ + { \ + uint128_t tempReg = (uint128_t)(multiplier) * (uint128_t)(multiplicand); \ + *(hi) = (digit_t)(tempReg >> RADIX); \ + (lo) = (digit_t) tempReg; \ + } + +// Digit addition with carry +#define ADDC(carryIn, addend1, addend2, carryOut, sumOut) \ + { \ + uint128_t tempReg = (uint128_t)(addend1) + (uint128_t)(addend2) + (uint128_t)(carryIn); \ + (carryOut) = (digit_t)(tempReg >> RADIX); \ + (sumOut) = (digit_t) tempReg; \ + } + +// Digit subtraction with borrow +#define SUBC(borrowIn, minuend, subtrahend, borrowOut, differenceOut) \ + { \ + uint128_t tempReg = (uint128_t)(minuend) - (uint128_t)(subtrahend) - (uint128_t)(borrowIn); \ + (borrowOut) = (digit_t)(tempReg >> (sizeof(uint128_t) * 8 - 1)); \ + (differenceOut) = (digit_t) tempReg; \ + } + +// Digit shift right +#define SHIFTR(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((lowIn) >> (shift)) ^ ((highIn) << (RADIX - (shift))); + +// Digit shift left +#define SHIFTL(highIn, lowIn, shift, shiftOut, DigitSize) \ + (shiftOut) = ((highIn) << (shift)) ^ ((lowIn) >> (RADIX - (shift))); + +#endif + +#endif diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/ec_isogeny.c b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/ec_isogeny.c index 8a3f85e92b..278bb14b58 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/ec_isogeny.c +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/ec_isogeny.c @@ -1,313 +1,313 @@ -/******************************************************************************************** -* SIDH: an efficient supersingular isogeny cryptography library -* -* Abstract: elliptic curve and isogeny functions -*********************************************************************************************/ - -void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t *A24plus, const f2elm_t *C24) { // Doubling of a Montgomery point in projective coordinates (X:Z). - // Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constants A+2C and 4C. - // Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2). - f2elm_t _t0, _t1; - f2elm_t *t0=&_t0, *t1=&_t1; - - fp2sub(&P->X, &P->Z, t0); // t0 = X1-Z1 - fp2add(&P->X, &P->Z, t1); // t1 = X1+Z1 - fp2sqr_mont(t0, t0); // t0 = (X1-Z1)^2 - fp2sqr_mont(t1, t1); // t1 = (X1+Z1)^2 - fp2mul_mont(C24, t0, &Q->Z); // Z2 = C24*(X1-Z1)^2 - fp2mul_mont(t1, &Q->Z, &Q->X); // X2 = C24*(X1-Z1)^2*(X1+Z1)^2 - fp2sub(t1, t0, t1); // t1 = (X1+Z1)^2-(X1-Z1)^2 - fp2mul_mont(A24plus, t1, t0); // t0 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] - fp2add(&Q->Z, t0, &Q->Z); // Z2 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2 - fp2mul_mont(&Q->Z, t1, &Q->Z); // Z2 = [A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2] -} - -void xDBLe(const point_proj_t P, point_proj_t Q, const f2elm_t *A24plus, const f2elm_t *C24, const int e) { // Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings. - // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A+2C and 4C. - // Output: projective Montgomery x-coordinates Q <- (2^e)*P. - int i; - - copy_words((const digit_t *) P, (digit_t *) Q, 2 * 2 * NWORDS_FIELD); - - for (i = 0; i < e; i++) { - xDBL(Q, Q, A24plus, C24); - } -} - -void get_4_isog(const point_proj_t P, f2elm_t *A24plus, f2elm_t *C24, f2elm_t *coeff) { // Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4. - // Input: projective point of order four P = (X4:Z4). - // Output: the 4-isogenous Montgomery curve with projective coefficients A+2C/4C and the 3 coefficients - // that are used to evaluate the isogeny at a point in eval_4_isog(). - - fp2sub(&P->X, &P->Z, &coeff[1]); // coeff[1] = X4-Z4 - fp2add(&P->X, &P->Z, &coeff[2]); // coeff[2] = X4+Z4 - fp2sqr_mont(&P->Z, &coeff[0]); // coeff[0] = Z4^2 - fp2add(&coeff[0], &coeff[0], &coeff[0]); // coeff[0] = 2*Z4^2 - fp2sqr_mont(&coeff[0], C24); // C24 = 4*Z4^4 - fp2add(&coeff[0], &coeff[0], &coeff[0]); // coeff[0] = 4*Z4^2 - fp2sqr_mont(&P->X, A24plus); // A24plus = X4^2 - fp2add(A24plus, A24plus, A24plus); // A24plus = 2*X4^2 - fp2sqr_mont(A24plus, A24plus); // A24plus = 4*X4^4 -} - -void eval_4_isog(point_proj_t P, f2elm_t *coeff) { // Evaluates the isogeny at the point (X:Z) in the domain of the isogeny, given a 4-isogeny phi defined - // by the 3 coefficients in coeff (computed in the function get_4_isog()). - // Inputs: the coefficients defining the isogeny, and the projective point P = (X:Z). - // Output: the projective point P = phi(P) = (X:Z) in the codomain. - f2elm_t _t0, _t1; - f2elm_t *t0=&_t0, *t1=&_t1; - - fp2add(&P->X, &P->Z, t0); // t0 = X+Z - fp2sub(&P->X, &P->Z, t1); // t1 = X-Z - fp2mul_mont(t0, &coeff[1], &P->X); // X = (X+Z)*coeff[1] - fp2mul_mont(t1, &coeff[2], &P->Z); // Z = (X-Z)*coeff[2] - fp2mul_mont(t0, t1, t0); // t0 = (X+Z)*(X-Z) - fp2mul_mont(t0, &coeff[0], t0); // t0 = coeff[0]*(X+Z)*(X-Z) - fp2add(&P->X, &P->Z, t1); // t1 = (X-Z)*coeff[2] + (X+Z)*coeff[1] - fp2sub(&P->X, &P->Z, &P->Z); // Z = (X-Z)*coeff[2] - (X+Z)*coeff[1] - fp2sqr_mont(t1, t1); // t1 = [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2 - fp2sqr_mont(&P->Z, &P->Z); // Z = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 - fp2add(t1, t0, &P->X); // X = coeff[0]*(X+Z)*(X-Z) + [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2 - fp2sub(&P->Z, t0, t0); // t0 = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 - coeff[0]*(X+Z)*(X-Z) - fp2mul_mont(&P->X, t1, &P->X); // Xfinal - fp2mul_mont(&P->Z, t0, &P->Z); // Zfinal -} - -void xTPL(const point_proj_t P, point_proj_t Q, const f2elm_t *A24minus, const f2elm_t *A24plus) { // Tripling of a Montgomery point in projective coordinates (X:Z). - // Input: projective Montgomery x-coordinates P = (X:Z), where x=X/Z and Montgomery curve constants A24plus = A+2C and A24minus = A-2C. - // Output: projective Montgomery x-coordinates Q = 3*P = (X3:Z3). - f2elm_t _t0, _t1, _t2, _t3, _t4, _t5, _t6; - f2elm_t *t0=&_t0, *t1=&_t1, *t2=&_t2, *t3=&_t3, *t4=&_t4, *t5=&_t5, *t6=&_t6; - - fp2sub(&P->X, &P->Z, t0); // t0 = X-Z - fp2sqr_mont(t0, t2); // t2 = (X-Z)^2 - fp2add(&P->X, &P->Z, t1); // t1 = X+Z - fp2sqr_mont(t1, t3); // t3 = (X+Z)^2 - fp2add(t0, t1, t4); // t4 = 2*X - fp2sub(t1, t0, t0); // t0 = 2*Z - fp2sqr_mont(t4, t1); // t1 = 4*X^2 - fp2sub(t1, t3, t1); // t1 = 4*X^2 - (X+Z)^2 - fp2sub(t1, t2, t1); // t1 = 4*X^2 - (X+Z)^2 - (X-Z)^2 - fp2mul_mont(t3, A24plus, t5); // t5 = A24plus*(X+Z)^2 - fp2mul_mont(t3, t5, t3); // t3 = A24plus*(X+Z)^3 - fp2mul_mont(A24minus, t2, t6); // t6 = A24minus*(X-Z)^2 - fp2mul_mont(t2, t6, t2); // t2 = A24minus*(X-Z)^3 - fp2sub(t2, t3, t3); // t3 = A24minus*(X-Z)^3 - coeff*(X+Z)^3 - fp2sub(t5, t6, t2); // t2 = A24plus*(X+Z)^2 - A24minus*(X-Z)^2 - fp2mul_mont(t1, t2, t1); // t1 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] - fp2add(t3, t1, t2); // t2 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + A24minus*(X-Z)^3 - coeff*(X+Z)^3 - fp2sqr_mont(t2, t2); // t2 = t2^2 - fp2mul_mont(t4, t2, &Q->X); // X3 = 2*X*t2 - fp2sub(t3, t1, t1); // t1 = A24minus*(X-Z)^3 - A24plus*(X+Z)^3 - [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] - fp2sqr_mont(t1, t1); // t1 = t1^2 - fp2mul_mont(t0, t1, &Q->Z); // Z3 = 2*Z*t1 -} - -void xTPLe(const point_proj_t P, point_proj_t Q, const f2elm_t *A24minus, const f2elm_t *A24plus, const int e) { // Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings. - // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A24plus = A+2C and A24minus = A-2C. - // Output: projective Montgomery x-coordinates Q <- (3^e)*P. - int i; - - copy_words((const digit_t *) P, (digit_t *) Q, 2 * 2 * NWORDS_FIELD); - - for (i = 0; i < e; i++) { - xTPL(Q, Q, A24minus, A24plus); - } -} - -void get_3_isog(const point_proj_t P, f2elm_t *A24minus, f2elm_t *A24plus, f2elm_t *coeff) { // Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3. - // Input: projective point of order three P = (X3:Z3). - // Output: the 3-isogenous Montgomery curve with projective coefficient A/C. - f2elm_t _t0, _t1, _t2, _t3, _t4; - f2elm_t *t0=&_t0, *t1=&_t1, *t2=&_t2, *t3=&_t3, *t4=&_t4; - - fp2sub(&P->X, &P->Z, &coeff[0]); // coeff0 = X-Z - fp2sqr_mont(&coeff[0], t0); // t0 = (X-Z)^2 - fp2add(&P->X, &P->Z, &coeff[1]); // coeff1 = X+Z - fp2sqr_mont(&coeff[1], t1); // t1 = (X+Z)^2 - fp2add(t0, t1, t2); // t2 = (X+Z)^2 + (X-Z)^2 - fp2add(&coeff[0], &coeff[1], t3); // t3 = 2*X - fp2sqr_mont(t3, t3); // t3 = 4*X^2 - fp2sub(t3, t2, t3); // t3 = 4*X^2 - (X+Z)^2 - (X-Z)^2 - fp2add(t1, t3, t2); // t2 = 4*X^2 - (X-Z)^2 - fp2add(t3, t0, t3); // t3 = 4*X^2 - (X+Z)^2 - fp2add(t0, t3, t4); // t4 = 4*X^2 - (X+Z)^2 + (X-Z)^2 - fp2add(t4, t4, t4); // t4 = 2(4*X^2 - (X+Z)^2 + (X-Z)^2) - fp2add(t1, t4, t4); // t4 = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2 - fp2mul_mont(t2, t4, A24minus); // A24minus = [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2] - fp2add(t1, t2, t4); // t4 = 4*X^2 + (X+Z)^2 - (X-Z)^2 - fp2add(t4, t4, t4); // t4 = 2(4*X^2 + (X+Z)^2 - (X-Z)^2) - fp2add(t0, t4, t4); // t4 = 8*X^2 + 2*(X+Z)^2 - (X-Z)^2 - fp2mul_mont(t3, t4, A24plus); // A24plus = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2] -} - -void eval_3_isog(point_proj_t Q, const f2elm_t *coeff) { // Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and - // a point P with 2 coefficients in coeff (computed in the function get_3_isog()). - // Inputs: projective points P = (X3:Z3) and Q = (X:Z). - // Output: the projective point Q <- phi(Q) = (X3:Z3). - f2elm_t _t0, _t1, _t2; - f2elm_t *t0=&_t0, *t1=&_t1, *t2=&_t2; - - fp2add(&Q->X, &Q->Z, t0); // t0 = X+Z - fp2sub(&Q->X, &Q->Z, t1); // t1 = X-Z - fp2mul_mont(t0, &coeff[0], t0); // t0 = coeff0*(X+Z) - fp2mul_mont(t1, &coeff[1], t1); // t1 = coeff1*(X-Z) - fp2add(t0, t1, t2); // t2 = coeff0*(X+Z) + coeff1*(X-Z) - fp2sub(t1, t0, t0); // t0 = coeff1*(X-Z) - coeff0*(X+Z) - fp2sqr_mont(t2, t2); // t2 = [coeff0*(X+Z) + coeff1*(X-Z)]^2 - fp2sqr_mont(t0, t0); // t0 = [coeff1*(X-Z) - coeff0*(X+Z)]^2 - fp2mul_mont(&Q->X, t2, &Q->X); // X3final = X*[coeff0*(X+Z) + coeff1*(X-Z)]^2 - fp2mul_mont(&Q->Z, t0, &Q->Z); // Z3final = Z*[coeff1*(X-Z) - coeff0*(X+Z)]^2 -} - -void inv_3_way(f2elm_t *z1, f2elm_t *z2, f2elm_t *z3) { // 3-way simultaneous inversion - // Input: z1,z2,z3 - // Output: 1/z1,1/z2,1/z3 (override inputs). - f2elm_t _t0, _t1, _t2, _t3; - f2elm_t *t0=&_t0, *t1=&_t1, *t2=&_t2, *t3=&_t3; - - fp2mul_mont(z1, z2, t0); // t0 = z1*z2 - fp2mul_mont(z3, t0, t1); // t1 = z1*z2*z3 - fp2inv_mont(t1); // t1 = 1/(z1*z2*z3) - fp2mul_mont(z3, t1, t2); // t2 = 1/(z1*z2) - fp2mul_mont(t2, z2, t3); // t3 = 1/z1 - fp2mul_mont(t2, z1, z2); // z2 = 1/z2 - fp2mul_mont(t0, t1, z3); // z3 = 1/z3 - fp2copy(t3, z1); // z1 = 1/z1 -} - -void get_A(const f2elm_t *xP, const f2elm_t *xQ, const f2elm_t *xR, f2elm_t *A) { // Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A. - // Input: the x-coordinates xP, xQ, and xR of the points P, Q and R. - // Output: the coefficient A corresponding to the curve E_A: y^2=x^3+A*x^2+x. - f2elm_t _t0, _t1, one = {0}; - f2elm_t *t0=&_t0, *t1=&_t1; - - fpcopy((const digit_t *) &Montgomery_one, one.e[0]); - fp2add(xP, xQ, t1); // t1 = xP+xQ - fp2mul_mont(xP, xQ, t0); // t0 = xP*xQ - fp2mul_mont(xR, t1, A); // A = xR*t1 - fp2add(t0, A, A); // A = A+t0 - fp2mul_mont(t0, xR, t0); // t0 = t0*xR - fp2sub(A, &one, A); // A = A-1 - fp2add(t0, t0, t0); // t0 = t0+t0 - fp2add(t1, xR, t1); // t1 = t1+xR - fp2add(t0, t0, t0); // t0 = t0+t0 - fp2sqr_mont(A, A); // A = A^2 - fp2inv_mont(t0); // t0 = 1/t0 - fp2mul_mont(A, t0, A); // A = A*t0 - fp2sub(A, t1, A); // Afinal = A-t1 -} - -void j_inv(const f2elm_t *A, const f2elm_t *C, f2elm_t *jinv) { // Computes the j-invariant of a Montgomery curve with projective constant. - // Input: A,C in GF(p^2). - // Output: j=256*(A^2-3*C^2)^3/(C^4*(A^2-4*C^2)), which is the j-invariant of the Montgomery curve B*y^2=x^3+(A/C)*x^2+x or (equivalently) j-invariant of B'*y^2=C*x^3+A*x^2+C*x. - f2elm_t _t0, _t1; - f2elm_t *t0=&_t0, *t1=&_t1; - - fp2sqr_mont(A, jinv); // jinv = A^2 - fp2sqr_mont(C, t1); // t1 = C^2 - fp2add(t1, t1, t0); // t0 = t1+t1 - fp2sub(jinv, t0, t0); // t0 = jinv-t0 - fp2sub(t0, t1, t0); // t0 = t0-t1 - fp2sub(t0, t1, jinv); // jinv = t0-t1 - fp2sqr_mont(t1, t1); // t1 = t1^2 - fp2mul_mont(jinv, t1, jinv); // jinv = jinv*t1 - fp2add(t0, t0, t0); // t0 = t0+t0 - fp2add(t0, t0, t0); // t0 = t0+t0 - fp2sqr_mont(t0, t1); // t1 = t0^2 - fp2mul_mont(t0, t1, t0); // t0 = t0*t1 - fp2add(t0, t0, t0); // t0 = t0+t0 - fp2add(t0, t0, t0); // t0 = t0+t0 - fp2inv_mont(jinv); // jinv = 1/jinv - fp2mul_mont(jinv, t0, jinv); // jinv = t0*jinv -} - -void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t *xPQ, const f2elm_t *A24) { // Simultaneous doubling and differential addition. - // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4. - // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP. - f2elm_t _t0, _t1, _t2; - f2elm_t *t0=&_t0, *t1=&_t1, *t2=&_t2; - - fp2add(&P->X, &P->Z, t0); // t0 = XP+ZP - fp2sub(&P->X, &P->Z, t1); // t1 = XP-ZP - fp2sqr_mont(t0, &P->X); // XP = (XP+ZP)^2 - fp2sub(&Q->X, &Q->Z, t2); // t2 = XQ-ZQ - fp2correction(t2); - fp2add(&Q->X, &Q->Z, &Q->X); // XQ = XQ+ZQ - fp2mul_mont(t0, t2, t0); // t0 = (XP+ZP)*(XQ-ZQ) - fp2sqr_mont(t1, &P->Z); // ZP = (XP-ZP)^2 - fp2mul_mont(t1, &Q->X, t1); // t1 = (XP-ZP)*(XQ+ZQ) - fp2sub(&P->X, &P->Z, t2); // t2 = (XP+ZP)^2-(XP-ZP)^2 - fp2mul_mont(&P->X, &P->Z, &P->X); // XP = (XP+ZP)^2*(XP-ZP)^2 - fp2mul_mont(t2, A24, &Q->X); // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2] - fp2sub(t0, t1, &Q->Z); // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ) - fp2add(&Q->X, &P->Z, &P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2 - fp2add(t0, t1, &Q->X); // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ) - fp2mul_mont(&P->Z, t2, &P->Z); // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2] - fp2sqr_mont(&Q->Z, &Q->Z); // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 - fp2sqr_mont(&Q->X, &Q->X); // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 - fp2mul_mont(&Q->Z, xPQ, &Q->Z); // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 -} - -static void swap_points(point_proj_t P, point_proj_t Q, const digit_t option) { // Swap points. - // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P - for (unsigned int i = 0; i < NWORDS_FIELD; i++) { - digit_t temp = option & (P->X.e[0][i] ^ Q->X.e[0][i]); - P->X.e[0][i] = temp ^ P->X.e[0][i]; - Q->X.e[0][i] = temp ^ Q->X.e[0][i]; - temp = option & (P->Z.e[0][i] ^ Q->Z.e[0][i]); - P->Z.e[0][i] = temp ^ P->Z.e[0][i]; - Q->Z.e[0][i] = temp ^ Q->Z.e[0][i]; - temp = option & (P->X.e[1][i] ^ Q->X.e[1][i]); - P->X.e[1][i] = temp ^ P->X.e[1][i]; - Q->X.e[1][i] = temp ^ Q->X.e[1][i]; - temp = option & (P->Z.e[1][i] ^ Q->Z.e[1][i]); - P->Z.e[1][i] = temp ^ P->Z.e[1][i]; - Q->Z.e[1][i] = temp ^ Q->Z.e[1][i]; - } -} - -void LADDER3PT(const f2elm_t *xP, const f2elm_t *xQ, const f2elm_t *xPQ, const digit_t *m, const unsigned int AliceOrBob, point_proj_t R, const f2elm_t *A) { - point_proj_t R0 = {0}, R2 = {0}; - f2elm_t _A24 = {0}; - f2elm_t *A24=&_A24; - digit_t mask; - int i, nbits, swap, prevbit = 0; - - if (AliceOrBob == ALICE) { - nbits = OALICE_BITS; - } else { - nbits = OBOB_BITS - 1; - } - - // Initializing constant - fpcopy((const digit_t *) &Montgomery_one, A24->e[0]); - fp2add(A24, A24, A24); - fp2add(A, A24, A24); - fp2div2(A24, A24); - fp2div2(A24, A24); // A24 = (A+2)/4 - - // Initializing points - fp2copy(xQ, &R0->X); - fpcopy((const digit_t *) &Montgomery_one, (digit_t *) R0->Z.e); - fp2copy(xPQ, &R2->X); - fpcopy((const digit_t *) &Montgomery_one, (digit_t *) R2->Z.e); - fp2copy(xP, &R->X); - fpcopy((const digit_t *) &Montgomery_one, (digit_t *) R->Z.e); - fpzero((digit_t *) (R->Z.e)[1]); - - // Main loop - for (i = 0; i < nbits; i++) { - int bit = (m[i >> LOG2RADIX] >> (i & (RADIX - 1))) & 1; - swap = bit ^ prevbit; - prevbit = bit; - mask = 0 - (digit_t) swap; - - swap_points(R, R2, mask); - xDBLADD(R0, R2, &R->X, A24); - fp2mul_mont(&R2->X, &R->Z, &R2->X); - } - swap = 0 ^ prevbit; - mask = 0 - (digit_t) swap; - swap_points(R, R2, mask); -} +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: elliptic curve and isogeny functions +*********************************************************************************************/ + +void xDBL(const point_proj_t P, point_proj_t Q, const f2elm_t *A24plus, const f2elm_t *C24) { // Doubling of a Montgomery point in projective coordinates (X:Z). + // Input: projective Montgomery x-coordinates P = (X1:Z1), where x1=X1/Z1 and Montgomery curve constants A+2C and 4C. + // Output: projective Montgomery x-coordinates Q = 2*P = (X2:Z2). + f2elm_t _t0, _t1; + f2elm_t *t0=&_t0, *t1=&_t1; + + fp2sub(&P->X, &P->Z, t0); // t0 = X1-Z1 + fp2add(&P->X, &P->Z, t1); // t1 = X1+Z1 + fp2sqr_mont(t0, t0); // t0 = (X1-Z1)^2 + fp2sqr_mont(t1, t1); // t1 = (X1+Z1)^2 + fp2mul_mont(C24, t0, &Q->Z); // Z2 = C24*(X1-Z1)^2 + fp2mul_mont(t1, &Q->Z, &Q->X); // X2 = C24*(X1-Z1)^2*(X1+Z1)^2 + fp2sub(t1, t0, t1); // t1 = (X1+Z1)^2-(X1-Z1)^2 + fp2mul_mont(A24plus, t1, t0); // t0 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] + fp2add(&Q->Z, t0, &Q->Z); // Z2 = A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2 + fp2mul_mont(&Q->Z, t1, &Q->Z); // Z2 = [A24plus*[(X1+Z1)^2-(X1-Z1)^2] + C24*(X1-Z1)^2]*[(X1+Z1)^2-(X1-Z1)^2] +} + +void xDBLe(const point_proj_t P, point_proj_t Q, const f2elm_t *A24plus, const f2elm_t *C24, const int e) { // Computes [2^e](X:Z) on Montgomery curve with projective constant via e repeated doublings. + // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A+2C and 4C. + // Output: projective Montgomery x-coordinates Q <- (2^e)*P. + int i; + + copy_words((const digit_t *) P, (digit_t *) Q, 2 * 2 * NWORDS_FIELD); + + for (i = 0; i < e; i++) { + xDBL(Q, Q, A24plus, C24); + } +} + +void get_4_isog(const point_proj_t P, f2elm_t *A24plus, f2elm_t *C24, f2elm_t *coeff) { // Computes the corresponding 4-isogeny of a projective Montgomery point (X4:Z4) of order 4. + // Input: projective point of order four P = (X4:Z4). + // Output: the 4-isogenous Montgomery curve with projective coefficients A+2C/4C and the 3 coefficients + // that are used to evaluate the isogeny at a point in eval_4_isog(). + + fp2sub(&P->X, &P->Z, &coeff[1]); // coeff[1] = X4-Z4 + fp2add(&P->X, &P->Z, &coeff[2]); // coeff[2] = X4+Z4 + fp2sqr_mont(&P->Z, &coeff[0]); // coeff[0] = Z4^2 + fp2add(&coeff[0], &coeff[0], &coeff[0]); // coeff[0] = 2*Z4^2 + fp2sqr_mont(&coeff[0], C24); // C24 = 4*Z4^4 + fp2add(&coeff[0], &coeff[0], &coeff[0]); // coeff[0] = 4*Z4^2 + fp2sqr_mont(&P->X, A24plus); // A24plus = X4^2 + fp2add(A24plus, A24plus, A24plus); // A24plus = 2*X4^2 + fp2sqr_mont(A24plus, A24plus); // A24plus = 4*X4^4 +} + +void eval_4_isog(point_proj_t P, f2elm_t *coeff) { // Evaluates the isogeny at the point (X:Z) in the domain of the isogeny, given a 4-isogeny phi defined + // by the 3 coefficients in coeff (computed in the function get_4_isog()). + // Inputs: the coefficients defining the isogeny, and the projective point P = (X:Z). + // Output: the projective point P = phi(P) = (X:Z) in the codomain. + f2elm_t _t0, _t1; + f2elm_t *t0=&_t0, *t1=&_t1; + + fp2add(&P->X, &P->Z, t0); // t0 = X+Z + fp2sub(&P->X, &P->Z, t1); // t1 = X-Z + fp2mul_mont(t0, &coeff[1], &P->X); // X = (X+Z)*coeff[1] + fp2mul_mont(t1, &coeff[2], &P->Z); // Z = (X-Z)*coeff[2] + fp2mul_mont(t0, t1, t0); // t0 = (X+Z)*(X-Z) + fp2mul_mont(t0, &coeff[0], t0); // t0 = coeff[0]*(X+Z)*(X-Z) + fp2add(&P->X, &P->Z, t1); // t1 = (X-Z)*coeff[2] + (X+Z)*coeff[1] + fp2sub(&P->X, &P->Z, &P->Z); // Z = (X-Z)*coeff[2] - (X+Z)*coeff[1] + fp2sqr_mont(t1, t1); // t1 = [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2 + fp2sqr_mont(&P->Z, &P->Z); // Z = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 + fp2add(t1, t0, &P->X); // X = coeff[0]*(X+Z)*(X-Z) + [(X-Z)*coeff[2] + (X+Z)*coeff[1]]^2 + fp2sub(&P->Z, t0, t0); // t0 = [(X-Z)*coeff[2] - (X+Z)*coeff[1]]^2 - coeff[0]*(X+Z)*(X-Z) + fp2mul_mont(&P->X, t1, &P->X); // Xfinal + fp2mul_mont(&P->Z, t0, &P->Z); // Zfinal +} + +void xTPL(const point_proj_t P, point_proj_t Q, const f2elm_t *A24minus, const f2elm_t *A24plus) { // Tripling of a Montgomery point in projective coordinates (X:Z). + // Input: projective Montgomery x-coordinates P = (X:Z), where x=X/Z and Montgomery curve constants A24plus = A+2C and A24minus = A-2C. + // Output: projective Montgomery x-coordinates Q = 3*P = (X3:Z3). + f2elm_t _t0, _t1, _t2, _t3, _t4, _t5, _t6; + f2elm_t *t0=&_t0, *t1=&_t1, *t2=&_t2, *t3=&_t3, *t4=&_t4, *t5=&_t5, *t6=&_t6; + + fp2sub(&P->X, &P->Z, t0); // t0 = X-Z + fp2sqr_mont(t0, t2); // t2 = (X-Z)^2 + fp2add(&P->X, &P->Z, t1); // t1 = X+Z + fp2sqr_mont(t1, t3); // t3 = (X+Z)^2 + fp2add(t0, t1, t4); // t4 = 2*X + fp2sub(t1, t0, t0); // t0 = 2*Z + fp2sqr_mont(t4, t1); // t1 = 4*X^2 + fp2sub(t1, t3, t1); // t1 = 4*X^2 - (X+Z)^2 + fp2sub(t1, t2, t1); // t1 = 4*X^2 - (X+Z)^2 - (X-Z)^2 + fp2mul_mont(t3, A24plus, t5); // t5 = A24plus*(X+Z)^2 + fp2mul_mont(t3, t5, t3); // t3 = A24plus*(X+Z)^3 + fp2mul_mont(A24minus, t2, t6); // t6 = A24minus*(X-Z)^2 + fp2mul_mont(t2, t6, t2); // t2 = A24minus*(X-Z)^3 + fp2sub(t2, t3, t3); // t3 = A24minus*(X-Z)^3 - coeff*(X+Z)^3 + fp2sub(t5, t6, t2); // t2 = A24plus*(X+Z)^2 - A24minus*(X-Z)^2 + fp2mul_mont(t1, t2, t1); // t1 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + fp2add(t3, t1, t2); // t2 = [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + A24minus*(X-Z)^3 - coeff*(X+Z)^3 + fp2sqr_mont(t2, t2); // t2 = t2^2 + fp2mul_mont(t4, t2, &Q->X); // X3 = 2*X*t2 + fp2sub(t3, t1, t1); // t1 = A24minus*(X-Z)^3 - A24plus*(X+Z)^3 - [4*X^2 - (X+Z)^2 - (X-Z)^2]*[A24plus*(X+Z)^2 - A24minus*(X-Z)^2] + fp2sqr_mont(t1, t1); // t1 = t1^2 + fp2mul_mont(t0, t1, &Q->Z); // Z3 = 2*Z*t1 +} + +void xTPLe(const point_proj_t P, point_proj_t Q, const f2elm_t *A24minus, const f2elm_t *A24plus, const int e) { // Computes [3^e](X:Z) on Montgomery curve with projective constant via e repeated triplings. + // Input: projective Montgomery x-coordinates P = (XP:ZP), such that xP=XP/ZP and Montgomery curve constants A24plus = A+2C and A24minus = A-2C. + // Output: projective Montgomery x-coordinates Q <- (3^e)*P. + int i; + + copy_words((const digit_t *) P, (digit_t *) Q, 2 * 2 * NWORDS_FIELD); + + for (i = 0; i < e; i++) { + xTPL(Q, Q, A24minus, A24plus); + } +} + +void get_3_isog(const point_proj_t P, f2elm_t *A24minus, f2elm_t *A24plus, f2elm_t *coeff) { // Computes the corresponding 3-isogeny of a projective Montgomery point (X3:Z3) of order 3. + // Input: projective point of order three P = (X3:Z3). + // Output: the 3-isogenous Montgomery curve with projective coefficient A/C. + f2elm_t _t0, _t1, _t2, _t3, _t4; + f2elm_t *t0=&_t0, *t1=&_t1, *t2=&_t2, *t3=&_t3, *t4=&_t4; + + fp2sub(&P->X, &P->Z, &coeff[0]); // coeff0 = X-Z + fp2sqr_mont(&coeff[0], t0); // t0 = (X-Z)^2 + fp2add(&P->X, &P->Z, &coeff[1]); // coeff1 = X+Z + fp2sqr_mont(&coeff[1], t1); // t1 = (X+Z)^2 + fp2add(t0, t1, t2); // t2 = (X+Z)^2 + (X-Z)^2 + fp2add(&coeff[0], &coeff[1], t3); // t3 = 2*X + fp2sqr_mont(t3, t3); // t3 = 4*X^2 + fp2sub(t3, t2, t3); // t3 = 4*X^2 - (X+Z)^2 - (X-Z)^2 + fp2add(t1, t3, t2); // t2 = 4*X^2 - (X-Z)^2 + fp2add(t3, t0, t3); // t3 = 4*X^2 - (X+Z)^2 + fp2add(t0, t3, t4); // t4 = 4*X^2 - (X+Z)^2 + (X-Z)^2 + fp2add(t4, t4, t4); // t4 = 2(4*X^2 - (X+Z)^2 + (X-Z)^2) + fp2add(t1, t4, t4); // t4 = 8*X^2 - (X+Z)^2 + 2*(X-Z)^2 + fp2mul_mont(t2, t4, A24minus); // A24minus = [4*X^2 - (X-Z)^2]*[8*X^2 - (X+Z)^2 + 2*(X-Z)^2] + fp2add(t1, t2, t4); // t4 = 4*X^2 + (X+Z)^2 - (X-Z)^2 + fp2add(t4, t4, t4); // t4 = 2(4*X^2 + (X+Z)^2 - (X-Z)^2) + fp2add(t0, t4, t4); // t4 = 8*X^2 + 2*(X+Z)^2 - (X-Z)^2 + fp2mul_mont(t3, t4, A24plus); // A24plus = [4*X^2 - (X+Z)^2]*[8*X^2 + 2*(X+Z)^2 - (X-Z)^2] +} + +void eval_3_isog(point_proj_t Q, const f2elm_t *coeff) { // Computes the 3-isogeny R=phi(X:Z), given projective point (X3:Z3) of order 3 on a Montgomery curve and + // a point P with 2 coefficients in coeff (computed in the function get_3_isog()). + // Inputs: projective points P = (X3:Z3) and Q = (X:Z). + // Output: the projective point Q <- phi(Q) = (X3:Z3). + f2elm_t _t0, _t1, _t2; + f2elm_t *t0=&_t0, *t1=&_t1, *t2=&_t2; + + fp2add(&Q->X, &Q->Z, t0); // t0 = X+Z + fp2sub(&Q->X, &Q->Z, t1); // t1 = X-Z + fp2mul_mont(t0, &coeff[0], t0); // t0 = coeff0*(X+Z) + fp2mul_mont(t1, &coeff[1], t1); // t1 = coeff1*(X-Z) + fp2add(t0, t1, t2); // t2 = coeff0*(X+Z) + coeff1*(X-Z) + fp2sub(t1, t0, t0); // t0 = coeff1*(X-Z) - coeff0*(X+Z) + fp2sqr_mont(t2, t2); // t2 = [coeff0*(X+Z) + coeff1*(X-Z)]^2 + fp2sqr_mont(t0, t0); // t0 = [coeff1*(X-Z) - coeff0*(X+Z)]^2 + fp2mul_mont(&Q->X, t2, &Q->X); // X3final = X*[coeff0*(X+Z) + coeff1*(X-Z)]^2 + fp2mul_mont(&Q->Z, t0, &Q->Z); // Z3final = Z*[coeff1*(X-Z) - coeff0*(X+Z)]^2 +} + +void inv_3_way(f2elm_t *z1, f2elm_t *z2, f2elm_t *z3) { // 3-way simultaneous inversion + // Input: z1,z2,z3 + // Output: 1/z1,1/z2,1/z3 (override inputs). + f2elm_t _t0, _t1, _t2, _t3; + f2elm_t *t0=&_t0, *t1=&_t1, *t2=&_t2, *t3=&_t3; + + fp2mul_mont(z1, z2, t0); // t0 = z1*z2 + fp2mul_mont(z3, t0, t1); // t1 = z1*z2*z3 + fp2inv_mont(t1); // t1 = 1/(z1*z2*z3) + fp2mul_mont(z3, t1, t2); // t2 = 1/(z1*z2) + fp2mul_mont(t2, z2, t3); // t3 = 1/z1 + fp2mul_mont(t2, z1, z2); // z2 = 1/z2 + fp2mul_mont(t0, t1, z3); // z3 = 1/z3 + fp2copy(t3, z1); // z1 = 1/z1 +} + +void get_A(const f2elm_t *xP, const f2elm_t *xQ, const f2elm_t *xR, f2elm_t *A) { // Given the x-coordinates of P, Q, and R, returns the value A corresponding to the Montgomery curve E_A: y^2=x^3+A*x^2+x such that R=Q-P on E_A. + // Input: the x-coordinates xP, xQ, and xR of the points P, Q and R. + // Output: the coefficient A corresponding to the curve E_A: y^2=x^3+A*x^2+x. + f2elm_t _t0, _t1, one = {0}; + f2elm_t *t0=&_t0, *t1=&_t1; + + fpcopy((const digit_t *) &Montgomery_one, one.e[0]); + fp2add(xP, xQ, t1); // t1 = xP+xQ + fp2mul_mont(xP, xQ, t0); // t0 = xP*xQ + fp2mul_mont(xR, t1, A); // A = xR*t1 + fp2add(t0, A, A); // A = A+t0 + fp2mul_mont(t0, xR, t0); // t0 = t0*xR + fp2sub(A, &one, A); // A = A-1 + fp2add(t0, t0, t0); // t0 = t0+t0 + fp2add(t1, xR, t1); // t1 = t1+xR + fp2add(t0, t0, t0); // t0 = t0+t0 + fp2sqr_mont(A, A); // A = A^2 + fp2inv_mont(t0); // t0 = 1/t0 + fp2mul_mont(A, t0, A); // A = A*t0 + fp2sub(A, t1, A); // Afinal = A-t1 +} + +void j_inv(const f2elm_t *A, const f2elm_t *C, f2elm_t *jinv) { // Computes the j-invariant of a Montgomery curve with projective constant. + // Input: A,C in GF(p^2). + // Output: j=256*(A^2-3*C^2)^3/(C^4*(A^2-4*C^2)), which is the j-invariant of the Montgomery curve B*y^2=x^3+(A/C)*x^2+x or (equivalently) j-invariant of B'*y^2=C*x^3+A*x^2+C*x. + f2elm_t _t0, _t1; + f2elm_t *t0=&_t0, *t1=&_t1; + + fp2sqr_mont(A, jinv); // jinv = A^2 + fp2sqr_mont(C, t1); // t1 = C^2 + fp2add(t1, t1, t0); // t0 = t1+t1 + fp2sub(jinv, t0, t0); // t0 = jinv-t0 + fp2sub(t0, t1, t0); // t0 = t0-t1 + fp2sub(t0, t1, jinv); // jinv = t0-t1 + fp2sqr_mont(t1, t1); // t1 = t1^2 + fp2mul_mont(jinv, t1, jinv); // jinv = jinv*t1 + fp2add(t0, t0, t0); // t0 = t0+t0 + fp2add(t0, t0, t0); // t0 = t0+t0 + fp2sqr_mont(t0, t1); // t1 = t0^2 + fp2mul_mont(t0, t1, t0); // t0 = t0*t1 + fp2add(t0, t0, t0); // t0 = t0+t0 + fp2add(t0, t0, t0); // t0 = t0+t0 + fp2inv_mont(jinv); // jinv = 1/jinv + fp2mul_mont(jinv, t0, jinv); // jinv = t0*jinv +} + +void xDBLADD(point_proj_t P, point_proj_t Q, const f2elm_t *xPQ, const f2elm_t *A24) { // Simultaneous doubling and differential addition. + // Input: projective Montgomery points P=(XP:ZP) and Q=(XQ:ZQ) such that xP=XP/ZP and xQ=XQ/ZQ, affine difference xPQ=x(P-Q) and Montgomery curve constant A24=(A+2)/4. + // Output: projective Montgomery points P <- 2*P = (X2P:Z2P) such that x(2P)=X2P/Z2P, and Q <- P+Q = (XQP:ZQP) such that = x(Q+P)=XQP/ZQP. + f2elm_t _t0, _t1, _t2; + f2elm_t *t0=&_t0, *t1=&_t1, *t2=&_t2; + + fp2add(&P->X, &P->Z, t0); // t0 = XP+ZP + fp2sub(&P->X, &P->Z, t1); // t1 = XP-ZP + fp2sqr_mont(t0, &P->X); // XP = (XP+ZP)^2 + fp2sub(&Q->X, &Q->Z, t2); // t2 = XQ-ZQ + fp2correction(t2); + fp2add(&Q->X, &Q->Z, &Q->X); // XQ = XQ+ZQ + fp2mul_mont(t0, t2, t0); // t0 = (XP+ZP)*(XQ-ZQ) + fp2sqr_mont(t1, &P->Z); // ZP = (XP-ZP)^2 + fp2mul_mont(t1, &Q->X, t1); // t1 = (XP-ZP)*(XQ+ZQ) + fp2sub(&P->X, &P->Z, t2); // t2 = (XP+ZP)^2-(XP-ZP)^2 + fp2mul_mont(&P->X, &P->Z, &P->X); // XP = (XP+ZP)^2*(XP-ZP)^2 + fp2mul_mont(t2, A24, &Q->X); // XQ = A24*[(XP+ZP)^2-(XP-ZP)^2] + fp2sub(t0, t1, &Q->Z); // ZQ = (XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ) + fp2add(&Q->X, &P->Z, &P->Z); // ZP = A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2 + fp2add(t0, t1, &Q->X); // XQ = (XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ) + fp2mul_mont(&P->Z, t2, &P->Z); // ZP = [A24*[(XP+ZP)^2-(XP-ZP)^2]+(XP-ZP)^2]*[(XP+ZP)^2-(XP-ZP)^2] + fp2sqr_mont(&Q->Z, &Q->Z); // ZQ = [(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 + fp2sqr_mont(&Q->X, &Q->X); // XQ = [(XP+ZP)*(XQ-ZQ)+(XP-ZP)*(XQ+ZQ)]^2 + fp2mul_mont(&Q->Z, xPQ, &Q->Z); // ZQ = xPQ*[(XP+ZP)*(XQ-ZQ)-(XP-ZP)*(XQ+ZQ)]^2 +} + +static void swap_points(point_proj_t P, point_proj_t Q, const digit_t option) { // Swap points. + // If option = 0 then P <- P and Q <- Q, else if option = 0xFF...FF then P <- Q and Q <- P + for (unsigned int i = 0; i < NWORDS_FIELD; i++) { + digit_t temp = option & (P->X.e[0][i] ^ Q->X.e[0][i]); + P->X.e[0][i] = temp ^ P->X.e[0][i]; + Q->X.e[0][i] = temp ^ Q->X.e[0][i]; + temp = option & (P->Z.e[0][i] ^ Q->Z.e[0][i]); + P->Z.e[0][i] = temp ^ P->Z.e[0][i]; + Q->Z.e[0][i] = temp ^ Q->Z.e[0][i]; + temp = option & (P->X.e[1][i] ^ Q->X.e[1][i]); + P->X.e[1][i] = temp ^ P->X.e[1][i]; + Q->X.e[1][i] = temp ^ Q->X.e[1][i]; + temp = option & (P->Z.e[1][i] ^ Q->Z.e[1][i]); + P->Z.e[1][i] = temp ^ P->Z.e[1][i]; + Q->Z.e[1][i] = temp ^ Q->Z.e[1][i]; + } +} + +void LADDER3PT(const f2elm_t *xP, const f2elm_t *xQ, const f2elm_t *xPQ, const digit_t *m, const unsigned int AliceOrBob, point_proj_t R, const f2elm_t *A) { + point_proj_t R0 = {0}, R2 = {0}; + f2elm_t _A24 = {0}; + f2elm_t *A24=&_A24; + digit_t mask; + int i, nbits, swap, prevbit = 0; + + if (AliceOrBob == ALICE) { + nbits = OALICE_BITS; + } else { + nbits = OBOB_BITS - 1; + } + + // Initializing constant + fpcopy((const digit_t *) &Montgomery_one, A24->e[0]); + fp2add(A24, A24, A24); + fp2add(A, A24, A24); + fp2div2(A24, A24); + fp2div2(A24, A24); // A24 = (A+2)/4 + + // Initializing points + fp2copy(xQ, &R0->X); + fpcopy((const digit_t *) &Montgomery_one, (digit_t *) R0->Z.e); + fp2copy(xPQ, &R2->X); + fpcopy((const digit_t *) &Montgomery_one, (digit_t *) R2->Z.e); + fp2copy(xP, &R->X); + fpcopy((const digit_t *) &Montgomery_one, (digit_t *) R->Z.e); + fpzero((digit_t *) (R->Z.e)[1]); + + // Main loop + for (i = 0; i < nbits; i++) { + int bit = (m[i >> LOG2RADIX] >> (i & (RADIX - 1))) & 1; + swap = bit ^ prevbit; + prevbit = bit; + mask = 0 - (digit_t) swap; + + swap_points(R, R2, mask); + xDBLADD(R0, R2, &R->X, A24); + fp2mul_mont(&R2->X, &R->Z, &R2->X); + } + swap = 0 ^ prevbit; + mask = 0 - (digit_t) swap; + swap_points(R, R2, mask); +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/fips202.c b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/fips202.c index 8289a526b3..64bc852e07 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/fips202.c +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/fips202.c @@ -1,461 +1,461 @@ -/* Based on the public domain implementation in - * crypto_hash/keccakc512/simple/ from http://bench.cr.yp.to/supercop.html - * by Ronny Van Keer - * and the public domain "TweetFips202" implementation - * from https://twitter.com/tweetfips202 - * by Gilles Van Assche, Daniel J. Bernstein, and Peter Schwabe */ - -#include <stddef.h> -#include <stdint.h> -#include "fips202.h" - -#define NROUNDS 24 -#define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64 - (offset)))) - -/************************************************* - * Name: load64 - * - * Description: Load 8 bytes into uint64_t in little-endian order - * - * Arguments: - const uint8_t *x: pointer to input byte array - * - * Returns the loaded 64-bit unsigned integer - **************************************************/ -static uint64_t load64(const uint8_t *x) { - uint64_t r = 0; - for (size_t i = 0; i < 8; ++i) { - r |= (uint64_t) x[i] << 8 * i; - } - - return r; -} - -/************************************************* - * Name: store64 - * - * Description: Store a 64-bit integer to a byte array in little-endian order - * - * Arguments: - uint8_t *x: pointer to the output byte array - * - uint64_t u: input 64-bit unsigned integer - **************************************************/ -static void store64(uint8_t *x, uint64_t u) { - for (size_t i = 0; i < 8; ++i) { - x[i] = (uint8_t) (u >> 8 * i); - } -} - -/* Keccak round constants */ -static const uint64_t KeccakF_RoundConstants[NROUNDS] = { - 0x0000000000000001ULL, 0x0000000000008082ULL, - 0x800000000000808aULL, 0x8000000080008000ULL, - 0x000000000000808bULL, 0x0000000080000001ULL, - 0x8000000080008081ULL, 0x8000000000008009ULL, - 0x000000000000008aULL, 0x0000000000000088ULL, - 0x0000000080008009ULL, 0x000000008000000aULL, - 0x000000008000808bULL, 0x800000000000008bULL, - 0x8000000000008089ULL, 0x8000000000008003ULL, - 0x8000000000008002ULL, 0x8000000000000080ULL, - 0x000000000000800aULL, 0x800000008000000aULL, - 0x8000000080008081ULL, 0x8000000000008080ULL, - 0x0000000080000001ULL, 0x8000000080008008ULL}; - -/************************************************* - * Name: KeccakF1600_StatePermute - * - * Description: The Keccak F1600 Permutation - * - * Arguments: - uint64_t *state: pointer to input/output Keccak state - **************************************************/ -static void KeccakF1600_StatePermute(uint64_t *state) { - int round; - - uint64_t Aba, Abe, Abi, Abo, Abu; - uint64_t Aga, Age, Agi, Ago, Agu; - uint64_t Aka, Ake, Aki, Ako, Aku; - uint64_t Ama, Ame, Ami, Amo, Amu; - uint64_t Asa, Ase, Asi, Aso, Asu; - uint64_t BCa, BCe, BCi, BCo, BCu; - - // copyFromState(A, state) - Aba = state[0]; - Abe = state[1]; - Abi = state[2]; - Abo = state[3]; - Abu = state[4]; - Aga = state[5]; - Age = state[6]; - Agi = state[7]; - Ago = state[8]; - Agu = state[9]; - Aka = state[10]; - Ake = state[11]; - Aki = state[12]; - Ako = state[13]; - Aku = state[14]; - Ama = state[15]; - Ame = state[16]; - Ami = state[17]; - Amo = state[18]; - Amu = state[19]; - Asa = state[20]; - Ase = state[21]; - Asi = state[22]; - Aso = state[23]; - Asu = state[24]; - - for (round = 0; round < NROUNDS; round += 2) { - uint64_t Da, De, Di, Do, Du; - uint64_t Eba, Ebe, Ebi, Ebo, Ebu; - uint64_t Ega, Ege, Egi, Ego, Egu; - uint64_t Eka, Eke, Eki, Eko, Eku; - uint64_t Ema, Eme, Emi, Emo, Emu; - uint64_t Esa, Ese, Esi, Eso, Esu; - - // prepareTheta - BCa = Aba ^ Aga ^ Aka ^ Ama ^ Asa; - BCe = Abe ^ Age ^ Ake ^ Ame ^ Ase; - BCi = Abi ^ Agi ^ Aki ^ Ami ^ Asi; - BCo = Abo ^ Ago ^ Ako ^ Amo ^ Aso; - BCu = Abu ^ Agu ^ Aku ^ Amu ^ Asu; - - // thetaRhoPiChiIotaPrepareTheta(round , A, E) - Da = BCu ^ ROL(BCe, 1); - De = BCa ^ ROL(BCi, 1); - Di = BCe ^ ROL(BCo, 1); - Do = BCi ^ ROL(BCu, 1); - Du = BCo ^ ROL(BCa, 1); - - Aba ^= Da; - BCa = Aba; - Age ^= De; - BCe = ROL(Age, 44); - Aki ^= Di; - BCi = ROL(Aki, 43); - Amo ^= Do; - BCo = ROL(Amo, 21); - Asu ^= Du; - BCu = ROL(Asu, 14); - Eba = BCa ^ ((~BCe) & BCi); - Eba ^= KeccakF_RoundConstants[round]; - Ebe = BCe ^ ((~BCi) & BCo); - Ebi = BCi ^ ((~BCo) & BCu); - Ebo = BCo ^ ((~BCu) & BCa); - Ebu = BCu ^ ((~BCa) & BCe); - - Abo ^= Do; - BCa = ROL(Abo, 28); - Agu ^= Du; - BCe = ROL(Agu, 20); - Aka ^= Da; - BCi = ROL(Aka, 3); - Ame ^= De; - BCo = ROL(Ame, 45); - Asi ^= Di; - BCu = ROL(Asi, 61); - Ega = BCa ^ ((~BCe) & BCi); - Ege = BCe ^ ((~BCi) & BCo); - Egi = BCi ^ ((~BCo) & BCu); - Ego = BCo ^ ((~BCu) & BCa); - Egu = BCu ^ ((~BCa) & BCe); - - Abe ^= De; - BCa = ROL(Abe, 1); - Agi ^= Di; - BCe = ROL(Agi, 6); - Ako ^= Do; - BCi = ROL(Ako, 25); - Amu ^= Du; - BCo = ROL(Amu, 8); - Asa ^= Da; - BCu = ROL(Asa, 18); - Eka = BCa ^ ((~BCe) & BCi); - Eke = BCe ^ ((~BCi) & BCo); - Eki = BCi ^ ((~BCo) & BCu); - Eko = BCo ^ ((~BCu) & BCa); - Eku = BCu ^ ((~BCa) & BCe); - - Abu ^= Du; - BCa = ROL(Abu, 27); - Aga ^= Da; - BCe = ROL(Aga, 36); - Ake ^= De; - BCi = ROL(Ake, 10); - Ami ^= Di; - BCo = ROL(Ami, 15); - Aso ^= Do; - BCu = ROL(Aso, 56); - Ema = BCa ^ ((~BCe) & BCi); - Eme = BCe ^ ((~BCi) & BCo); - Emi = BCi ^ ((~BCo) & BCu); - Emo = BCo ^ ((~BCu) & BCa); - Emu = BCu ^ ((~BCa) & BCe); - - Abi ^= Di; - BCa = ROL(Abi, 62); - Ago ^= Do; - BCe = ROL(Ago, 55); - Aku ^= Du; - BCi = ROL(Aku, 39); - Ama ^= Da; - BCo = ROL(Ama, 41); - Ase ^= De; - BCu = ROL(Ase, 2); - Esa = BCa ^ ((~BCe) & BCi); - Ese = BCe ^ ((~BCi) & BCo); - Esi = BCi ^ ((~BCo) & BCu); - Eso = BCo ^ ((~BCu) & BCa); - Esu = BCu ^ ((~BCa) & BCe); - - // prepareTheta - BCa = Eba ^ Ega ^ Eka ^ Ema ^ Esa; - BCe = Ebe ^ Ege ^ Eke ^ Eme ^ Ese; - BCi = Ebi ^ Egi ^ Eki ^ Emi ^ Esi; - BCo = Ebo ^ Ego ^ Eko ^ Emo ^ Eso; - BCu = Ebu ^ Egu ^ Eku ^ Emu ^ Esu; - - // thetaRhoPiChiIotaPrepareTheta(round+1, E, A) - Da = BCu ^ ROL(BCe, 1); - De = BCa ^ ROL(BCi, 1); - Di = BCe ^ ROL(BCo, 1); - Do = BCi ^ ROL(BCu, 1); - Du = BCo ^ ROL(BCa, 1); - - Eba ^= Da; - BCa = Eba; - Ege ^= De; - BCe = ROL(Ege, 44); - Eki ^= Di; - BCi = ROL(Eki, 43); - Emo ^= Do; - BCo = ROL(Emo, 21); - Esu ^= Du; - BCu = ROL(Esu, 14); - Aba = BCa ^ ((~BCe) & BCi); - Aba ^= KeccakF_RoundConstants[round + 1]; - Abe = BCe ^ ((~BCi) & BCo); - Abi = BCi ^ ((~BCo) & BCu); - Abo = BCo ^ ((~BCu) & BCa); - Abu = BCu ^ ((~BCa) & BCe); - - Ebo ^= Do; - BCa = ROL(Ebo, 28); - Egu ^= Du; - BCe = ROL(Egu, 20); - Eka ^= Da; - BCi = ROL(Eka, 3); - Eme ^= De; - BCo = ROL(Eme, 45); - Esi ^= Di; - BCu = ROL(Esi, 61); - Aga = BCa ^ ((~BCe) & BCi); - Age = BCe ^ ((~BCi) & BCo); - Agi = BCi ^ ((~BCo) & BCu); - Ago = BCo ^ ((~BCu) & BCa); - Agu = BCu ^ ((~BCa) & BCe); - - Ebe ^= De; - BCa = ROL(Ebe, 1); - Egi ^= Di; - BCe = ROL(Egi, 6); - Eko ^= Do; - BCi = ROL(Eko, 25); - Emu ^= Du; - BCo = ROL(Emu, 8); - Esa ^= Da; - BCu = ROL(Esa, 18); - Aka = BCa ^ ((~BCe) & BCi); - Ake = BCe ^ ((~BCi) & BCo); - Aki = BCi ^ ((~BCo) & BCu); - Ako = BCo ^ ((~BCu) & BCa); - Aku = BCu ^ ((~BCa) & BCe); - - Ebu ^= Du; - BCa = ROL(Ebu, 27); - Ega ^= Da; - BCe = ROL(Ega, 36); - Eke ^= De; - BCi = ROL(Eke, 10); - Emi ^= Di; - BCo = ROL(Emi, 15); - Eso ^= Do; - BCu = ROL(Eso, 56); - Ama = BCa ^ ((~BCe) & BCi); - Ame = BCe ^ ((~BCi) & BCo); - Ami = BCi ^ ((~BCo) & BCu); - Amo = BCo ^ ((~BCu) & BCa); - Amu = BCu ^ ((~BCa) & BCe); - - Ebi ^= Di; - BCa = ROL(Ebi, 62); - Ego ^= Do; - BCe = ROL(Ego, 55); - Eku ^= Du; - BCi = ROL(Eku, 39); - Ema ^= Da; - BCo = ROL(Ema, 41); - Ese ^= De; - BCu = ROL(Ese, 2); - Asa = BCa ^ ((~BCe) & BCi); - Ase = BCe ^ ((~BCi) & BCo); - Asi = BCi ^ ((~BCo) & BCu); - Aso = BCo ^ ((~BCu) & BCa); - Asu = BCu ^ ((~BCa) & BCe); - } - - // copyToState(state, A) - state[0] = Aba; - state[1] = Abe; - state[2] = Abi; - state[3] = Abo; - state[4] = Abu; - state[5] = Aga; - state[6] = Age; - state[7] = Agi; - state[8] = Ago; - state[9] = Agu; - state[10] = Aka; - state[11] = Ake; - state[12] = Aki; - state[13] = Ako; - state[14] = Aku; - state[15] = Ama; - state[16] = Ame; - state[17] = Ami; - state[18] = Amo; - state[19] = Amu; - state[20] = Asa; - state[21] = Ase; - state[22] = Asi; - state[23] = Aso; - state[24] = Asu; -} - -/************************************************* - * Name: keccak_absorb - * - * Description: Absorb step of Keccak; - * non-incremental, starts by zeroeing the state. - * - * Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state - * - uint32_t r: rate in bytes (e.g., 168 for SHAKE128) - * - const uint8_t *m: pointer to input to be absorbed into s - * - size_t mlen: length of input in bytes - * - uint8_t p: domain-separation byte for different - * Keccak-derived functions - **************************************************/ -static void keccak_absorb(uint64_t *s, uint32_t r, const uint8_t *m, size_t mlen, uint8_t p) { - size_t i; - uint8_t t[200]; - - /* Zero state */ - for (i = 0; i < 25; ++i) { - s[i] = 0; - } - - while (mlen >= r) { - for (i = 0; i < r / 8; ++i) { - s[i] ^= load64(m + 8 * i); - } - - KeccakF1600_StatePermute(s); - mlen -= r; - m += r; - } - - for (i = 0; i < r; ++i) { - t[i] = 0; - } - for (i = 0; i < mlen; ++i) { - t[i] = m[i]; - } - t[i] = p; - t[r - 1] |= 128; - for (i = 0; i < r / 8; ++i) { - s[i] ^= load64(t + 8 * i); - } -} - -/************************************************* - * Name: keccak_squeezeblocks - * - * Description: Squeeze step of Keccak. Squeezes full blocks of r bytes each. - * Modifies the state. Can be called multiple times to keep - * squeezing, i.e., is incremental. - * - * Arguments: - uint8_t *h: pointer to output blocks - * - size_t nblocks: number of blocks to be - * squeezed (written to h) - * - uint64_t *s: pointer to input/output Keccak state - * - uint32_t r: rate in bytes (e.g., 168 for SHAKE128) - **************************************************/ -static void keccak_squeezeblocks(uint8_t *h, size_t nblocks, uint64_t *s, uint32_t r) { - while (nblocks > 0) { - KeccakF1600_StatePermute(s); - for (size_t i = 0; i < (r >> 3); i++) { - store64(h + 8 * i, s[i]); - } - h += r; - nblocks--; - } -} - -/************************************************* - * Name: shake256_absorb - * - * Description: Absorb step of the SHAKE256 XOF. - * non-incremental, starts by zeroeing the state. - * - * Arguments: - shake256ctx *state: pointer to (uninitialized) output Keccak state - * - const uint8_t *input: pointer to input to be absorbed - * into s - * - size_t inlen: length of input in bytes - **************************************************/ -static void shake256_absorb(shake256_ctx *state, const uint8_t *input, size_t inlen) { - keccak_absorb(state->ctx, SHAKE256_RATE, input, inlen, 0x1F); -} - -/************************************************* - * Name: shake256_squeezeblocks - * - * Description: Squeeze step of SHAKE256 XOF. Squeezes full blocks of - * SHAKE256_RATE bytes each. Modifies the state. Can be called - * multiple times to keep squeezing, i.e., is incremental. - * - * Arguments: - uint8_t *output: pointer to output blocks - * - size_t nblocks: number of blocks to be squeezed - * (written to output) - * - shake256ctx *state: pointer to input/output Keccak state - **************************************************/ -static void shake256_squeezeblocks(uint8_t *output, size_t nblocks, shake256_ctx *state) { - keccak_squeezeblocks(output, nblocks, state->ctx, SHAKE256_RATE); -} - -/************************************************* - * Name: shake256 - * - * Description: SHAKE256 XOF with non-incremental API - * - * Arguments: - uint8_t *output: pointer to output - * - size_t outlen: requested output length in bytes - * - const uint8_t *input: pointer to input - * - size_t inlen: length of input in bytes - **************************************************/ -void shake256(uint8_t *output, size_t outlen, const uint8_t *input, size_t inlen) { - size_t nblocks = outlen / SHAKE256_RATE; - uint8_t t[SHAKE256_RATE]; - shake256_ctx s; - - shake256_absorb(&s, input, inlen); - shake256_squeezeblocks(output, nblocks, &s); - - output += nblocks * SHAKE256_RATE; - outlen -= nblocks * SHAKE256_RATE; - - if (outlen) { - shake256_squeezeblocks(t, 1, &s); - for (size_t i = 0; i < outlen; ++i) { - output[i] = t[i]; - } - } -} - +/* Based on the public domain implementation in + * crypto_hash/keccakc512/simple/ from http://bench.cr.yp.to/supercop.html + * by Ronny Van Keer + * and the public domain "TweetFips202" implementation + * from https://twitter.com/tweetfips202 + * by Gilles Van Assche, Daniel J. Bernstein, and Peter Schwabe */ + +#include <stddef.h> +#include <stdint.h> +#include "fips202.h" + +#define NROUNDS 24 +#define ROL(a, offset) (((a) << (offset)) ^ ((a) >> (64 - (offset)))) + +/************************************************* + * Name: load64 + * + * Description: Load 8 bytes into uint64_t in little-endian order + * + * Arguments: - const uint8_t *x: pointer to input byte array + * + * Returns the loaded 64-bit unsigned integer + **************************************************/ +static uint64_t load64(const uint8_t *x) { + uint64_t r = 0; + for (size_t i = 0; i < 8; ++i) { + r |= (uint64_t) x[i] << 8 * i; + } + + return r; +} + +/************************************************* + * Name: store64 + * + * Description: Store a 64-bit integer to a byte array in little-endian order + * + * Arguments: - uint8_t *x: pointer to the output byte array + * - uint64_t u: input 64-bit unsigned integer + **************************************************/ +static void store64(uint8_t *x, uint64_t u) { + for (size_t i = 0; i < 8; ++i) { + x[i] = (uint8_t) (u >> 8 * i); + } +} + +/* Keccak round constants */ +static const uint64_t KeccakF_RoundConstants[NROUNDS] = { + 0x0000000000000001ULL, 0x0000000000008082ULL, + 0x800000000000808aULL, 0x8000000080008000ULL, + 0x000000000000808bULL, 0x0000000080000001ULL, + 0x8000000080008081ULL, 0x8000000000008009ULL, + 0x000000000000008aULL, 0x0000000000000088ULL, + 0x0000000080008009ULL, 0x000000008000000aULL, + 0x000000008000808bULL, 0x800000000000008bULL, + 0x8000000000008089ULL, 0x8000000000008003ULL, + 0x8000000000008002ULL, 0x8000000000000080ULL, + 0x000000000000800aULL, 0x800000008000000aULL, + 0x8000000080008081ULL, 0x8000000000008080ULL, + 0x0000000080000001ULL, 0x8000000080008008ULL}; + +/************************************************* + * Name: KeccakF1600_StatePermute + * + * Description: The Keccak F1600 Permutation + * + * Arguments: - uint64_t *state: pointer to input/output Keccak state + **************************************************/ +static void KeccakF1600_StatePermute(uint64_t *state) { + int round; + + uint64_t Aba, Abe, Abi, Abo, Abu; + uint64_t Aga, Age, Agi, Ago, Agu; + uint64_t Aka, Ake, Aki, Ako, Aku; + uint64_t Ama, Ame, Ami, Amo, Amu; + uint64_t Asa, Ase, Asi, Aso, Asu; + uint64_t BCa, BCe, BCi, BCo, BCu; + + // copyFromState(A, state) + Aba = state[0]; + Abe = state[1]; + Abi = state[2]; + Abo = state[3]; + Abu = state[4]; + Aga = state[5]; + Age = state[6]; + Agi = state[7]; + Ago = state[8]; + Agu = state[9]; + Aka = state[10]; + Ake = state[11]; + Aki = state[12]; + Ako = state[13]; + Aku = state[14]; + Ama = state[15]; + Ame = state[16]; + Ami = state[17]; + Amo = state[18]; + Amu = state[19]; + Asa = state[20]; + Ase = state[21]; + Asi = state[22]; + Aso = state[23]; + Asu = state[24]; + + for (round = 0; round < NROUNDS; round += 2) { + uint64_t Da, De, Di, Do, Du; + uint64_t Eba, Ebe, Ebi, Ebo, Ebu; + uint64_t Ega, Ege, Egi, Ego, Egu; + uint64_t Eka, Eke, Eki, Eko, Eku; + uint64_t Ema, Eme, Emi, Emo, Emu; + uint64_t Esa, Ese, Esi, Eso, Esu; + + // prepareTheta + BCa = Aba ^ Aga ^ Aka ^ Ama ^ Asa; + BCe = Abe ^ Age ^ Ake ^ Ame ^ Ase; + BCi = Abi ^ Agi ^ Aki ^ Ami ^ Asi; + BCo = Abo ^ Ago ^ Ako ^ Amo ^ Aso; + BCu = Abu ^ Agu ^ Aku ^ Amu ^ Asu; + + // thetaRhoPiChiIotaPrepareTheta(round , A, E) + Da = BCu ^ ROL(BCe, 1); + De = BCa ^ ROL(BCi, 1); + Di = BCe ^ ROL(BCo, 1); + Do = BCi ^ ROL(BCu, 1); + Du = BCo ^ ROL(BCa, 1); + + Aba ^= Da; + BCa = Aba; + Age ^= De; + BCe = ROL(Age, 44); + Aki ^= Di; + BCi = ROL(Aki, 43); + Amo ^= Do; + BCo = ROL(Amo, 21); + Asu ^= Du; + BCu = ROL(Asu, 14); + Eba = BCa ^ ((~BCe) & BCi); + Eba ^= KeccakF_RoundConstants[round]; + Ebe = BCe ^ ((~BCi) & BCo); + Ebi = BCi ^ ((~BCo) & BCu); + Ebo = BCo ^ ((~BCu) & BCa); + Ebu = BCu ^ ((~BCa) & BCe); + + Abo ^= Do; + BCa = ROL(Abo, 28); + Agu ^= Du; + BCe = ROL(Agu, 20); + Aka ^= Da; + BCi = ROL(Aka, 3); + Ame ^= De; + BCo = ROL(Ame, 45); + Asi ^= Di; + BCu = ROL(Asi, 61); + Ega = BCa ^ ((~BCe) & BCi); + Ege = BCe ^ ((~BCi) & BCo); + Egi = BCi ^ ((~BCo) & BCu); + Ego = BCo ^ ((~BCu) & BCa); + Egu = BCu ^ ((~BCa) & BCe); + + Abe ^= De; + BCa = ROL(Abe, 1); + Agi ^= Di; + BCe = ROL(Agi, 6); + Ako ^= Do; + BCi = ROL(Ako, 25); + Amu ^= Du; + BCo = ROL(Amu, 8); + Asa ^= Da; + BCu = ROL(Asa, 18); + Eka = BCa ^ ((~BCe) & BCi); + Eke = BCe ^ ((~BCi) & BCo); + Eki = BCi ^ ((~BCo) & BCu); + Eko = BCo ^ ((~BCu) & BCa); + Eku = BCu ^ ((~BCa) & BCe); + + Abu ^= Du; + BCa = ROL(Abu, 27); + Aga ^= Da; + BCe = ROL(Aga, 36); + Ake ^= De; + BCi = ROL(Ake, 10); + Ami ^= Di; + BCo = ROL(Ami, 15); + Aso ^= Do; + BCu = ROL(Aso, 56); + Ema = BCa ^ ((~BCe) & BCi); + Eme = BCe ^ ((~BCi) & BCo); + Emi = BCi ^ ((~BCo) & BCu); + Emo = BCo ^ ((~BCu) & BCa); + Emu = BCu ^ ((~BCa) & BCe); + + Abi ^= Di; + BCa = ROL(Abi, 62); + Ago ^= Do; + BCe = ROL(Ago, 55); + Aku ^= Du; + BCi = ROL(Aku, 39); + Ama ^= Da; + BCo = ROL(Ama, 41); + Ase ^= De; + BCu = ROL(Ase, 2); + Esa = BCa ^ ((~BCe) & BCi); + Ese = BCe ^ ((~BCi) & BCo); + Esi = BCi ^ ((~BCo) & BCu); + Eso = BCo ^ ((~BCu) & BCa); + Esu = BCu ^ ((~BCa) & BCe); + + // prepareTheta + BCa = Eba ^ Ega ^ Eka ^ Ema ^ Esa; + BCe = Ebe ^ Ege ^ Eke ^ Eme ^ Ese; + BCi = Ebi ^ Egi ^ Eki ^ Emi ^ Esi; + BCo = Ebo ^ Ego ^ Eko ^ Emo ^ Eso; + BCu = Ebu ^ Egu ^ Eku ^ Emu ^ Esu; + + // thetaRhoPiChiIotaPrepareTheta(round+1, E, A) + Da = BCu ^ ROL(BCe, 1); + De = BCa ^ ROL(BCi, 1); + Di = BCe ^ ROL(BCo, 1); + Do = BCi ^ ROL(BCu, 1); + Du = BCo ^ ROL(BCa, 1); + + Eba ^= Da; + BCa = Eba; + Ege ^= De; + BCe = ROL(Ege, 44); + Eki ^= Di; + BCi = ROL(Eki, 43); + Emo ^= Do; + BCo = ROL(Emo, 21); + Esu ^= Du; + BCu = ROL(Esu, 14); + Aba = BCa ^ ((~BCe) & BCi); + Aba ^= KeccakF_RoundConstants[round + 1]; + Abe = BCe ^ ((~BCi) & BCo); + Abi = BCi ^ ((~BCo) & BCu); + Abo = BCo ^ ((~BCu) & BCa); + Abu = BCu ^ ((~BCa) & BCe); + + Ebo ^= Do; + BCa = ROL(Ebo, 28); + Egu ^= Du; + BCe = ROL(Egu, 20); + Eka ^= Da; + BCi = ROL(Eka, 3); + Eme ^= De; + BCo = ROL(Eme, 45); + Esi ^= Di; + BCu = ROL(Esi, 61); + Aga = BCa ^ ((~BCe) & BCi); + Age = BCe ^ ((~BCi) & BCo); + Agi = BCi ^ ((~BCo) & BCu); + Ago = BCo ^ ((~BCu) & BCa); + Agu = BCu ^ ((~BCa) & BCe); + + Ebe ^= De; + BCa = ROL(Ebe, 1); + Egi ^= Di; + BCe = ROL(Egi, 6); + Eko ^= Do; + BCi = ROL(Eko, 25); + Emu ^= Du; + BCo = ROL(Emu, 8); + Esa ^= Da; + BCu = ROL(Esa, 18); + Aka = BCa ^ ((~BCe) & BCi); + Ake = BCe ^ ((~BCi) & BCo); + Aki = BCi ^ ((~BCo) & BCu); + Ako = BCo ^ ((~BCu) & BCa); + Aku = BCu ^ ((~BCa) & BCe); + + Ebu ^= Du; + BCa = ROL(Ebu, 27); + Ega ^= Da; + BCe = ROL(Ega, 36); + Eke ^= De; + BCi = ROL(Eke, 10); + Emi ^= Di; + BCo = ROL(Emi, 15); + Eso ^= Do; + BCu = ROL(Eso, 56); + Ama = BCa ^ ((~BCe) & BCi); + Ame = BCe ^ ((~BCi) & BCo); + Ami = BCi ^ ((~BCo) & BCu); + Amo = BCo ^ ((~BCu) & BCa); + Amu = BCu ^ ((~BCa) & BCe); + + Ebi ^= Di; + BCa = ROL(Ebi, 62); + Ego ^= Do; + BCe = ROL(Ego, 55); + Eku ^= Du; + BCi = ROL(Eku, 39); + Ema ^= Da; + BCo = ROL(Ema, 41); + Ese ^= De; + BCu = ROL(Ese, 2); + Asa = BCa ^ ((~BCe) & BCi); + Ase = BCe ^ ((~BCi) & BCo); + Asi = BCi ^ ((~BCo) & BCu); + Aso = BCo ^ ((~BCu) & BCa); + Asu = BCu ^ ((~BCa) & BCe); + } + + // copyToState(state, A) + state[0] = Aba; + state[1] = Abe; + state[2] = Abi; + state[3] = Abo; + state[4] = Abu; + state[5] = Aga; + state[6] = Age; + state[7] = Agi; + state[8] = Ago; + state[9] = Agu; + state[10] = Aka; + state[11] = Ake; + state[12] = Aki; + state[13] = Ako; + state[14] = Aku; + state[15] = Ama; + state[16] = Ame; + state[17] = Ami; + state[18] = Amo; + state[19] = Amu; + state[20] = Asa; + state[21] = Ase; + state[22] = Asi; + state[23] = Aso; + state[24] = Asu; +} + +/************************************************* + * Name: keccak_absorb + * + * Description: Absorb step of Keccak; + * non-incremental, starts by zeroeing the state. + * + * Arguments: - uint64_t *s: pointer to (uninitialized) output Keccak state + * - uint32_t r: rate in bytes (e.g., 168 for SHAKE128) + * - const uint8_t *m: pointer to input to be absorbed into s + * - size_t mlen: length of input in bytes + * - uint8_t p: domain-separation byte for different + * Keccak-derived functions + **************************************************/ +static void keccak_absorb(uint64_t *s, uint32_t r, const uint8_t *m, size_t mlen, uint8_t p) { + size_t i; + uint8_t t[200]; + + /* Zero state */ + for (i = 0; i < 25; ++i) { + s[i] = 0; + } + + while (mlen >= r) { + for (i = 0; i < r / 8; ++i) { + s[i] ^= load64(m + 8 * i); + } + + KeccakF1600_StatePermute(s); + mlen -= r; + m += r; + } + + for (i = 0; i < r; ++i) { + t[i] = 0; + } + for (i = 0; i < mlen; ++i) { + t[i] = m[i]; + } + t[i] = p; + t[r - 1] |= 128; + for (i = 0; i < r / 8; ++i) { + s[i] ^= load64(t + 8 * i); + } +} + +/************************************************* + * Name: keccak_squeezeblocks + * + * Description: Squeeze step of Keccak. Squeezes full blocks of r bytes each. + * Modifies the state. Can be called multiple times to keep + * squeezing, i.e., is incremental. + * + * Arguments: - uint8_t *h: pointer to output blocks + * - size_t nblocks: number of blocks to be + * squeezed (written to h) + * - uint64_t *s: pointer to input/output Keccak state + * - uint32_t r: rate in bytes (e.g., 168 for SHAKE128) + **************************************************/ +static void keccak_squeezeblocks(uint8_t *h, size_t nblocks, uint64_t *s, uint32_t r) { + while (nblocks > 0) { + KeccakF1600_StatePermute(s); + for (size_t i = 0; i < (r >> 3); i++) { + store64(h + 8 * i, s[i]); + } + h += r; + nblocks--; + } +} + +/************************************************* + * Name: shake256_absorb + * + * Description: Absorb step of the SHAKE256 XOF. + * non-incremental, starts by zeroeing the state. + * + * Arguments: - shake256ctx *state: pointer to (uninitialized) output Keccak state + * - const uint8_t *input: pointer to input to be absorbed + * into s + * - size_t inlen: length of input in bytes + **************************************************/ +static void shake256_absorb(shake256_ctx *state, const uint8_t *input, size_t inlen) { + keccak_absorb(state->ctx, SHAKE256_RATE, input, inlen, 0x1F); +} + +/************************************************* + * Name: shake256_squeezeblocks + * + * Description: Squeeze step of SHAKE256 XOF. Squeezes full blocks of + * SHAKE256_RATE bytes each. Modifies the state. Can be called + * multiple times to keep squeezing, i.e., is incremental. + * + * Arguments: - uint8_t *output: pointer to output blocks + * - size_t nblocks: number of blocks to be squeezed + * (written to output) + * - shake256ctx *state: pointer to input/output Keccak state + **************************************************/ +static void shake256_squeezeblocks(uint8_t *output, size_t nblocks, shake256_ctx *state) { + keccak_squeezeblocks(output, nblocks, state->ctx, SHAKE256_RATE); +} + +/************************************************* + * Name: shake256 + * + * Description: SHAKE256 XOF with non-incremental API + * + * Arguments: - uint8_t *output: pointer to output + * - size_t outlen: requested output length in bytes + * - const uint8_t *input: pointer to input + * - size_t inlen: length of input in bytes + **************************************************/ +void shake256(uint8_t *output, size_t outlen, const uint8_t *input, size_t inlen) { + size_t nblocks = outlen / SHAKE256_RATE; + uint8_t t[SHAKE256_RATE]; + shake256_ctx s; + + shake256_absorb(&s, input, inlen); + shake256_squeezeblocks(output, nblocks, &s); + + output += nblocks * SHAKE256_RATE; + outlen -= nblocks * SHAKE256_RATE; + + if (outlen) { + shake256_squeezeblocks(t, 1, &s); + for (size_t i = 0; i < outlen; ++i) { + output[i] = t[i]; + } + } +} + diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/fips202.h b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/fips202.h index 1196bff2c0..8aa6d8c7bc 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/fips202.h +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/fips202.h @@ -1,14 +1,14 @@ -#ifndef FIPS202_H -#define FIPS202_H - -#define SHAKE256_RATE 136 - -/** Data structure for the state of the SHAKE-256 non-incremental hashing API. */ -typedef struct { -/** Internal state. */ - uint64_t ctx[25]; -} shake256_ctx; - -void shake256(uint8_t *output, size_t outlen, const uint8_t *input, size_t inlen); - -#endif // FIPS202_H +#ifndef FIPS202_H +#define FIPS202_H + +#define SHAKE256_RATE 136 + +/** Data structure for the state of the SHAKE-256 non-incremental hashing API. */ +typedef struct { +/** Internal state. */ + uint64_t ctx[25]; +} shake256_ctx; + +void shake256(uint8_t *output, size_t outlen, const uint8_t *input, size_t inlen); + +#endif // FIPS202_H diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/fp.c b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/fp.c index 0e09ce25a0..e340e333d0 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/fp.c +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/fp.c @@ -1,241 +1,241 @@ -/******************************************************************************************** -* SIDH: an efficient supersingular isogeny cryptography library -* -* Abstract: Portable C and x86_64 ASM functions for modular arithmetic for P434 -*********************************************************************************************/ - -#include "P434_internal.h" - -// Modular addition, c = a+b mod p434. -// Inputs: a, b in [0, 2*p434-1] -// Output: c in [0, 2*p434-1] -void fpadd434(const digit_t *a, const digit_t *b, digit_t *c) { -#if defined(S2N_SIKEP434R2_ASM) - if (s2n_sikep434r2_asm_is_enabled()) { - fpadd434_asm(a, b, c); - return; - } -#endif - - unsigned int i, carry = 0; - digit_t mask; - - for (i = 0; i < NWORDS_FIELD; i++) { - ADDC(carry, a[i], b[i], carry, c[i]); - } - - carry = 0; - for (i = 0; i < NWORDS_FIELD; i++) { - SUBC(carry, c[i], ((const digit_t *) p434x2)[i], carry, c[i]); - } - mask = 0 - (digit_t) carry; - - carry = 0; - for (i = 0; i < NWORDS_FIELD; i++) { - ADDC(carry, c[i], ((const digit_t *) p434x2)[i] & mask, carry, c[i]); - } -} - -// Modular subtraction, c = a-b mod p434. -// Inputs: a, b in [0, 2*p434-1] -// Output: c in [0, 2*p434-1] -void fpsub434(const digit_t *a, const digit_t *b, digit_t *c) { -#if defined(S2N_SIKEP434R2_ASM) - if (s2n_sikep434r2_asm_is_enabled()) { - fpsub434_asm(a, b, c); - return; - } -#endif - - unsigned int i, borrow = 0; - digit_t mask; - - for (i = 0; i < NWORDS_FIELD; i++) { - SUBC(borrow, a[i], b[i], borrow, c[i]); - } - mask = 0 - (digit_t) borrow; - - borrow = 0; - for (i = 0; i < NWORDS_FIELD; i++) { - ADDC(borrow, c[i], ((const digit_t *) p434x2)[i] & mask, borrow, c[i]); - } -} - -// Modular negation, a = -a mod p434. -// Input/output: a in [0, 2*p434-1] -void fpneg434(digit_t *a) { - unsigned int i, borrow = 0; - - for (i = 0; i < NWORDS_FIELD; i++) { - SUBC(borrow, ((const digit_t *) p434x2)[i], a[i], borrow, a[i]); - } -} - -// Modular division by two, c = a/2 mod p434. -// Input : a in [0, 2*p434-1] -// Output: c in [0, 2*p434-1] -void fpdiv2_434(const digit_t *a, digit_t *c) { - unsigned int i, carry = 0; - digit_t mask; - - mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p434 - for (i = 0; i < NWORDS_FIELD; i++) { - ADDC(carry, a[i], ((const digit_t *) p434)[i] & mask, carry, c[i]); - } - - mp_shiftr1(c, NWORDS_FIELD); -} - -// Modular correction to reduce field element a in [0, 2*p434-1] to [0, p434-1]. -void fpcorrection434(digit_t *a) { - unsigned int i, borrow = 0; - digit_t mask; - - for (i = 0; i < NWORDS_FIELD; i++) { - SUBC(borrow, a[i], ((const digit_t *) p434)[i], borrow, a[i]); - } - mask = 0 - (digit_t) borrow; - - borrow = 0; - for (i = 0; i < NWORDS_FIELD; i++) { - ADDC(borrow, a[i], ((const digit_t *) p434)[i] & mask, borrow, a[i]); - } -} - -// Digit multiplication, digit * digit -> 2-digit result -void digit_x_digit(const digit_t a, const digit_t b, digit_t *c) { - register digit_t al, ah, bl, bh, temp; - digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry; - digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t) * 4), mask_high = (digit_t)(-1) << (sizeof(digit_t) * 4); - - al = a & mask_low; // Low part - ah = a >> (sizeof(digit_t) * 4); // High part - bl = b & mask_low; - bh = b >> (sizeof(digit_t) * 4); - - albl = al * bl; - albh = al * bh; - ahbl = ah * bl; - ahbh = ah * bh; - c[0] = albl & mask_low; // C00 - - res1 = albl >> (sizeof(digit_t) * 4); - res2 = ahbl & mask_low; - res3 = albh & mask_low; - temp = res1 + res2 + res3; - carry = temp >> (sizeof(digit_t) * 4); - c[0] ^= temp << (sizeof(digit_t) * 4); // C01 - - res1 = ahbl >> (sizeof(digit_t) * 4); - res2 = albh >> (sizeof(digit_t) * 4); - res3 = ahbh & mask_low; - temp = res1 + res2 + res3 + carry; - c[1] = temp & mask_low; // C10 - carry = temp & mask_high; - c[1] ^= (ahbh & mask_high) + carry; // C11 -} - -// Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords. -void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords) { -#if defined(S2N_SIKEP434R2_ASM) - if (s2n_sikep434r2_asm_is_enabled()) { - UNREFERENCED_PARAMETER(nwords); - mul434_asm(a, b, c); - return; - } -#endif - - unsigned int i, j, carry; - digit_t t = 0, u = 0, v = 0, UV[2]; - - for (i = 0; i < nwords; i++) { - for (j = 0; j <= i; j++) { - MUL(a[j], b[i - j], UV + 1, UV[0]); - ADDC(0, UV[0], v, carry, v); - ADDC(carry, UV[1], u, carry, u); - t += carry; - } - c[i] = v; - v = u; - u = t; - t = 0; - } - - for (i = nwords; i < 2 * nwords - 1; i++) { - for (j = i - nwords + 1; j < nwords; j++) { - MUL(a[j], b[i - j], UV + 1, UV[0]); - ADDC(0, UV[0], v, carry, v); - ADDC(carry, UV[1], u, carry, u); - t += carry; - } - c[i] = v; - v = u; - u = t; - t = 0; - } - c[2 * nwords - 1] = v; -} - -// Efficient Montgomery reduction using comba and exploiting the special form of the prime p434. -// mc = ma*R^-1 mod p434x2, where R = 2^448. -// If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1]. -// ma is assumed to be in Montgomery representation. -void rdc_mont(const digit_t *ma, digit_t *mc) { -#if defined(S2N_SIKEP434R2_ASM) - if (s2n_sikep434r2_asm_is_enabled()) { - rdc434_asm(ma, mc); - return; - } -#endif - - unsigned int i, j, carry, count = p434_ZERO_WORDS; - digit_t UV[2], t = 0, u = 0, v = 0; - - for (i = 0; i < NWORDS_FIELD; i++) { - mc[i] = 0; - } - - for (i = 0; i < NWORDS_FIELD; i++) { - for (j = 0; j < i; j++) { - if (j < (i - p434_ZERO_WORDS + 1)) { - MUL(mc[j], ((const digit_t *) p434p1)[i - j], UV + 1, UV[0]); - ADDC(0, UV[0], v, carry, v); - ADDC(carry, UV[1], u, carry, u); - t += carry; - } - } - ADDC(0, v, ma[i], carry, v); - ADDC(carry, u, 0, carry, u); - t += carry; - mc[i] = v; - v = u; - u = t; - t = 0; - } - - for (i = NWORDS_FIELD; i < 2 * NWORDS_FIELD - 1; i++) { - if (count > 0) { - count -= 1; - } - for (j = i - NWORDS_FIELD + 1; j < NWORDS_FIELD; j++) { - if (j < (NWORDS_FIELD - count)) { - MUL(mc[j], ((const digit_t *) p434p1)[i - j], UV + 1, UV[0]); - ADDC(0, UV[0], v, carry, v); - ADDC(carry, UV[1], u, carry, u); - t += carry; - } - } - ADDC(0, v, ma[i], carry, v); - ADDC(carry, u, 0, carry, u); - t += carry; - mc[i - NWORDS_FIELD] = v; - v = u; - u = t; - t = 0; - } - - /* `carry` isn't read after this, but it's still a necessary argument to the macro */ - /* cppcheck-suppress unreadVariable */ - ADDC(0, v, ma[2 * NWORDS_FIELD - 1], carry, v); - mc[NWORDS_FIELD - 1] = v; -} +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: Portable C and x86_64 ASM functions for modular arithmetic for P434 +*********************************************************************************************/ + +#include "P434_internal.h" + +// Modular addition, c = a+b mod p434. +// Inputs: a, b in [0, 2*p434-1] +// Output: c in [0, 2*p434-1] +void fpadd434(const digit_t *a, const digit_t *b, digit_t *c) { +#if defined(S2N_SIKEP434R2_ASM) + if (s2n_sikep434r2_asm_is_enabled()) { + fpadd434_asm(a, b, c); + return; + } +#endif + + unsigned int i, carry = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], b[i], carry, c[i]); + } + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(carry, c[i], ((const digit_t *) p434x2)[i], carry, c[i]); + } + mask = 0 - (digit_t) carry; + + carry = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, c[i], ((const digit_t *) p434x2)[i] & mask, carry, c[i]); + } +} + +// Modular subtraction, c = a-b mod p434. +// Inputs: a, b in [0, 2*p434-1] +// Output: c in [0, 2*p434-1] +void fpsub434(const digit_t *a, const digit_t *b, digit_t *c) { +#if defined(S2N_SIKEP434R2_ASM) + if (s2n_sikep434r2_asm_is_enabled()) { + fpsub434_asm(a, b, c); + return; + } +#endif + + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], b[i], borrow, c[i]); + } + mask = 0 - (digit_t) borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, c[i], ((const digit_t *) p434x2)[i] & mask, borrow, c[i]); + } +} + +// Modular negation, a = -a mod p434. +// Input/output: a in [0, 2*p434-1] +void fpneg434(digit_t *a) { + unsigned int i, borrow = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, ((const digit_t *) p434x2)[i], a[i], borrow, a[i]); + } +} + +// Modular division by two, c = a/2 mod p434. +// Input : a in [0, 2*p434-1] +// Output: c in [0, 2*p434-1] +void fpdiv2_434(const digit_t *a, digit_t *c) { + unsigned int i, carry = 0; + digit_t mask; + + mask = 0 - (digit_t)(a[0] & 1); // If a is odd compute a+p434 + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(carry, a[i], ((const digit_t *) p434)[i] & mask, carry, c[i]); + } + + mp_shiftr1(c, NWORDS_FIELD); +} + +// Modular correction to reduce field element a in [0, 2*p434-1] to [0, p434-1]. +void fpcorrection434(digit_t *a) { + unsigned int i, borrow = 0; + digit_t mask; + + for (i = 0; i < NWORDS_FIELD; i++) { + SUBC(borrow, a[i], ((const digit_t *) p434)[i], borrow, a[i]); + } + mask = 0 - (digit_t) borrow; + + borrow = 0; + for (i = 0; i < NWORDS_FIELD; i++) { + ADDC(borrow, a[i], ((const digit_t *) p434)[i] & mask, borrow, a[i]); + } +} + +// Digit multiplication, digit * digit -> 2-digit result +void digit_x_digit(const digit_t a, const digit_t b, digit_t *c) { + register digit_t al, ah, bl, bh, temp; + digit_t albl, albh, ahbl, ahbh, res1, res2, res3, carry; + digit_t mask_low = (digit_t)(-1) >> (sizeof(digit_t) * 4), mask_high = (digit_t)(-1) << (sizeof(digit_t) * 4); + + al = a & mask_low; // Low part + ah = a >> (sizeof(digit_t) * 4); // High part + bl = b & mask_low; + bh = b >> (sizeof(digit_t) * 4); + + albl = al * bl; + albh = al * bh; + ahbl = ah * bl; + ahbh = ah * bh; + c[0] = albl & mask_low; // C00 + + res1 = albl >> (sizeof(digit_t) * 4); + res2 = ahbl & mask_low; + res3 = albh & mask_low; + temp = res1 + res2 + res3; + carry = temp >> (sizeof(digit_t) * 4); + c[0] ^= temp << (sizeof(digit_t) * 4); // C01 + + res1 = ahbl >> (sizeof(digit_t) * 4); + res2 = albh >> (sizeof(digit_t) * 4); + res3 = ahbh & mask_low; + temp = res1 + res2 + res3 + carry; + c[1] = temp & mask_low; // C10 + carry = temp & mask_high; + c[1] ^= (ahbh & mask_high) + carry; // C11 +} + +// Multiprecision comba multiply, c = a*b, where lng(a) = lng(b) = nwords. +void mp_mul(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords) { +#if defined(S2N_SIKEP434R2_ASM) + if (s2n_sikep434r2_asm_is_enabled()) { + UNREFERENCED_PARAMETER(nwords); + mul434_asm(a, b, c); + return; + } +#endif + + unsigned int i, j, carry; + digit_t t = 0, u = 0, v = 0, UV[2]; + + for (i = 0; i < nwords; i++) { + for (j = 0; j <= i; j++) { + MUL(a[j], b[i - j], UV + 1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = nwords; i < 2 * nwords - 1; i++) { + for (j = i - nwords + 1; j < nwords; j++) { + MUL(a[j], b[i - j], UV + 1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + c[i] = v; + v = u; + u = t; + t = 0; + } + c[2 * nwords - 1] = v; +} + +// Efficient Montgomery reduction using comba and exploiting the special form of the prime p434. +// mc = ma*R^-1 mod p434x2, where R = 2^448. +// If ma < 2^448*p434, the output mc is in the range [0, 2*p434-1]. +// ma is assumed to be in Montgomery representation. +void rdc_mont(const digit_t *ma, digit_t *mc) { +#if defined(S2N_SIKEP434R2_ASM) + if (s2n_sikep434r2_asm_is_enabled()) { + rdc434_asm(ma, mc); + return; + } +#endif + + unsigned int i, j, carry, count = p434_ZERO_WORDS; + digit_t UV[2], t = 0, u = 0, v = 0; + + for (i = 0; i < NWORDS_FIELD; i++) { + mc[i] = 0; + } + + for (i = 0; i < NWORDS_FIELD; i++) { + for (j = 0; j < i; j++) { + if (j < (i - p434_ZERO_WORDS + 1)) { + MUL(mc[j], ((const digit_t *) p434p1)[i - j], UV + 1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i] = v; + v = u; + u = t; + t = 0; + } + + for (i = NWORDS_FIELD; i < 2 * NWORDS_FIELD - 1; i++) { + if (count > 0) { + count -= 1; + } + for (j = i - NWORDS_FIELD + 1; j < NWORDS_FIELD; j++) { + if (j < (NWORDS_FIELD - count)) { + MUL(mc[j], ((const digit_t *) p434p1)[i - j], UV + 1, UV[0]); + ADDC(0, UV[0], v, carry, v); + ADDC(carry, UV[1], u, carry, u); + t += carry; + } + } + ADDC(0, v, ma[i], carry, v); + ADDC(carry, u, 0, carry, u); + t += carry; + mc[i - NWORDS_FIELD] = v; + v = u; + u = t; + t = 0; + } + + /* `carry` isn't read after this, but it's still a necessary argument to the macro */ + /* cppcheck-suppress unreadVariable */ + ADDC(0, v, ma[2 * NWORDS_FIELD - 1], carry, v); + mc[NWORDS_FIELD - 1] = v; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/fpx.c b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/fpx.c index e5b356b93b..169fcc95dc 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/fpx.c +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/fpx.c @@ -1,387 +1,387 @@ -/******************************************************************************************** -* SIDH: an efficient supersingular isogeny cryptography library -* -* Abstract: core functions over GF(p) and GF(p^2) -*********************************************************************************************/ - -// Conversion of GF(p^2) element from Montgomery to standard representation, and encoding by removing leading 0 bytes -void fp2_encode(const f2elm_t *x, unsigned char *enc) { - unsigned int i; - f2elm_t t; - - from_fp2mont(x, &t); - for (i = 0; i < FP2_ENCODED_BYTES / 2; i++) { - enc[i] = ((unsigned char *) t.e)[i]; - enc[i + FP2_ENCODED_BYTES / 2] = ((unsigned char *) t.e)[i + MAXBITS_FIELD / 8]; - } -} - -// Parse byte sequence back into GF(p^2) element, and conversion to Montgomery representation -void fp2_decode(const unsigned char *enc, f2elm_t *x) { - unsigned int i; - - for (i = 0; i < 2 * (MAXBITS_FIELD / 8); i++) - ((unsigned char *) x->e)[i] = 0; - for (i = 0; i < FP2_ENCODED_BYTES / 2; i++) { - ((unsigned char *) x->e)[i] = enc[i]; - ((unsigned char *) x->e)[i + MAXBITS_FIELD / 8] = enc[i + FP2_ENCODED_BYTES / 2]; - } - to_fp2mont(x, x); -} - -// Copy a field element, c = a. -__inline void fpcopy(const felm_t a, felm_t c) { - unsigned int i; - - for (i = 0; i < NWORDS_FIELD; i++) - c[i] = a[i]; -} - -// Zero a field element, a = 0. -__inline void fpzero(felm_t a) { - unsigned int i; - - for (i = 0; i < NWORDS_FIELD; i++) - a[i] = 0; -} - -// Conversion to Montgomery representation, -// mc = a*R^2*R^(-1) mod p = a*R mod p, where a in [0, p-1]. -// The Montgomery constant R^2 mod p is the global value "Montgomery_R2". -void to_mont(const felm_t a, felm_t mc) { - fpmul_mont(a, (const digit_t *) &Montgomery_R2, mc); -} - -// Conversion from Montgomery representation to standard representation, -// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1]. -void from_mont(const felm_t ma, felm_t c) { - digit_t one[NWORDS_FIELD] = {0}; - - one[0] = 1; - fpmul_mont(ma, one, c); - fpcorrection(c); -} - -// Copy wordsize digits, c = a, where lng(a) = nwords. -void copy_words(const digit_t *a, digit_t *c, const unsigned int nwords) { - unsigned int i; - - for (i = 0; i < nwords; i++) - c[i] = a[i]; -} - -// Multiprecision multiplication, c = a*b mod p. -void fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc) { - dfelm_t temp = {0}; - - mp_mul(ma, mb, temp, NWORDS_FIELD); - rdc_mont(temp, mc); -} - -// Multiprecision squaring, c = a^2 mod p. -void fpsqr_mont(const felm_t ma, felm_t mc) { - dfelm_t temp = {0}; - - mp_mul(ma, ma, temp, NWORDS_FIELD); - rdc_mont(temp, mc); -} - -// Field inversion using Montgomery arithmetic, a = a^(-1)*R mod p. -void fpinv_mont(felm_t a) { - felm_t tt; - - fpcopy(a, tt); - fpinv_chain_mont(tt); - fpsqr_mont(tt, tt); - fpsqr_mont(tt, tt); - fpmul_mont(a, tt, a); -} - -// Copy a GF(p^2) element, c = a. -void fp2copy(const f2elm_t *a, f2elm_t *c) { - fpcopy(a->e[0], c->e[0]); - fpcopy(a->e[1], c->e[1]); -} - -// Zero a GF(p^2) element, a = 0. -void fp2zero(f2elm_t *a) { - fpzero(a->e[0]); - fpzero(a->e[1]); -} - -// GF(p^2) negation, a = -a in GF(p^2). -void fp2neg(f2elm_t *a) { - fpneg(a->e[0]); - fpneg(a->e[1]); -} - -// GF(p^2) addition, c = a+b in GF(p^2). -__inline void fp2add(const f2elm_t *a, const f2elm_t *b, f2elm_t *c) { - fpadd(a->e[0], b->e[0], c->e[0]); - fpadd(a->e[1], b->e[1], c->e[1]); -} - -// GF(p^2) subtraction, c = a-b in GF(p^2). -__inline void fp2sub(const f2elm_t *a, const f2elm_t *b, f2elm_t *c) { - fpsub(a->e[0], b->e[0], c->e[0]); - fpsub(a->e[1], b->e[1], c->e[1]); -} - -// GF(p^2) division by two, c = a/2 in GF(p^2). -void fp2div2(const f2elm_t *a, f2elm_t *c) { - fpdiv2(a->e[0], c->e[0]); - fpdiv2(a->e[1], c->e[1]); -} - -// Modular correction, a = a in GF(p^2). -void fp2correction(f2elm_t *a) { - fpcorrection(a->e[0]); - fpcorrection(a->e[1]); -} - -// Multiprecision addition, c = a+b. -__inline static void mp_addfast(const digit_t *a, const digit_t *b, digit_t *c) { -#if defined(S2N_SIKEP434R2_ASM) - if (s2n_sikep434r2_asm_is_enabled()) { - mp_add_asm(a, b, c); - return; - } -#endif - - mp_add(a, b, c, NWORDS_FIELD); -} - -// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2). -// Inputs: a = a0+a1*i, where a0, a1 are in [0, 2*p-1] -// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1] -void fp2sqr_mont(const f2elm_t *a, f2elm_t *c) { - felm_t t1, t2, t3; - - mp_addfast(a->e[0], a->e[1], t1); // t1 = a0+a1 - fpsub(a->e[0], a->e[1], t2); // t2 = a0-a1 - mp_addfast(a->e[0], a->e[0], t3); // t3 = 2a0 - fpmul_mont(t1, t2, c->e[0]); // c0 = (a0+a1)(a0-a1) - fpmul_mont(t3, a->e[1], c->e[1]); // c1 = 2a0*a1 -} - -// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit. -unsigned int mp_sub(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords) { - unsigned int i, borrow = 0; - - for (i = 0; i < nwords; i++) - SUBC(borrow, a[i], b[i], borrow, c[i]); - - return borrow; -} - -// Multiprecision subtraction followed by addition with p*2^MAXBITS_FIELD, c = a-b+(p*2^MAXBITS_FIELD) if a-b < 0, otherwise c=a-b. -__inline static void mp_subaddfast(const digit_t *a, const digit_t *b, digit_t *c) { -#if defined(S2N_SIKEP434R2_ASM) - if (s2n_sikep434r2_asm_is_enabled()) { - mp_subaddx2_asm(a, b, c); - return; - } -#endif - - felm_t t1; - - digit_t mask = 0 - (digit_t) mp_sub(a, b, c, 2 * NWORDS_FIELD); - for (int i = 0; i < NWORDS_FIELD; i++) - t1[i] = ((const digit_t *) PRIME)[i] & mask; - mp_addfast((digit_t *) &c[NWORDS_FIELD], t1, (digit_t *) &c[NWORDS_FIELD]); -} - -// Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD. -__inline static void mp_dblsubfast(const digit_t *a, const digit_t *b, digit_t *c) { -#if defined(S2N_SIKEP434R2_ASM) - if (s2n_sikep434r2_asm_is_enabled()) { - mp_dblsubx2_asm(a, b, c); - return; - } -#endif - - mp_sub(c, a, c, 2 * NWORDS_FIELD); - mp_sub(c, b, c, 2 * NWORDS_FIELD); -} - -// GF(p^2) multiplication using Montgomery arithmetic, c = a*b in GF(p^2). -// Inputs: a = a0+a1*i and b = b0+b1*i, where a0, a1, b0, b1 are in [0, 2*p-1] -// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1] -void fp2mul_mont(const f2elm_t *a, const f2elm_t *b, f2elm_t *c) { - felm_t t1, t2; - dfelm_t tt1, tt2, tt3; - - mp_addfast(a->e[0], a->e[1], t1); // t1 = a0+a1 - mp_addfast(b->e[0], b->e[1], t2); // t2 = b0+b1 - mp_mul(a->e[0], b->e[0], tt1, NWORDS_FIELD); // tt1 = a0*b0 - mp_mul(a->e[1], b->e[1], tt2, NWORDS_FIELD); // tt2 = a1*b1 - mp_mul(t1, t2, tt3, NWORDS_FIELD); // tt3 = (a0+a1)*(b0+b1) - mp_dblsubfast(tt1, tt2, tt3); // tt3 = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 - mp_subaddfast(tt1, tt2, tt1); // tt1 = a0*b0 - a1*b1 + p*2^MAXBITS_FIELD if a0*b0 - a1*b1 < 0, else tt1 = a0*b0 - a1*b1 - rdc_mont(tt3, c->e[1]); // c[1] = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 - rdc_mont(tt1, c->e[0]); // c[0] = a0*b0 - a1*b1 -} - -// Chain to compute a^(p-3)/4 using Montgomery arithmetic. -void fpinv_chain_mont(felm_t a) { - unsigned int i, j; - - felm_t t[31], tt; - - // Precomputed table - fpsqr_mont(a, tt); - fpmul_mont(a, tt, t[0]); - for (i = 0; i <= 29; i++) - fpmul_mont(t[i], tt, t[i + 1]); - - fpcopy(a, tt); - for (i = 0; i < 7; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[5], tt, tt); - for (i = 0; i < 10; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[14], tt, tt); - for (i = 0; i < 6; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[3], tt, tt); - for (i = 0; i < 6; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[23], tt, tt); - for (i = 0; i < 6; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[13], tt, tt); - for (i = 0; i < 6; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[24], tt, tt); - for (i = 0; i < 6; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[7], tt, tt); - for (i = 0; i < 8; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[12], tt, tt); - for (i = 0; i < 8; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[30], tt, tt); - for (i = 0; i < 6; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[1], tt, tt); - for (i = 0; i < 6; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[30], tt, tt); - for (i = 0; i < 7; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[21], tt, tt); - for (i = 0; i < 9; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[2], tt, tt); - for (i = 0; i < 9; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[19], tt, tt); - for (i = 0; i < 9; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[1], tt, tt); - for (i = 0; i < 7; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[24], tt, tt); - for (i = 0; i < 6; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[26], tt, tt); - for (i = 0; i < 6; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[16], tt, tt); - for (i = 0; i < 7; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[10], tt, tt); - for (i = 0; i < 7; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[6], tt, tt); - for (i = 0; i < 7; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[0], tt, tt); - for (i = 0; i < 9; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[20], tt, tt); - for (i = 0; i < 8; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[9], tt, tt); - for (i = 0; i < 6; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[25], tt, tt); - for (i = 0; i < 9; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[30], tt, tt); - for (i = 0; i < 6; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[26], tt, tt); - for (i = 0; i < 6; i++) - fpsqr_mont(tt, tt); - fpmul_mont(a, tt, tt); - for (i = 0; i < 7; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[28], tt, tt); - for (i = 0; i < 6; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[6], tt, tt); - for (i = 0; i < 6; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[10], tt, tt); - for (i = 0; i < 9; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[22], tt, tt); - for (j = 0; j < 35; j++) { - for (i = 0; i < 6; i++) - fpsqr_mont(tt, tt); - fpmul_mont(t[30], tt, tt); - } - fpcopy(tt, a); -} - -// GF(p^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2). -void fp2inv_mont(f2elm_t *a) { - f2elm_t t1; - - fpsqr_mont(a->e[0], t1.e[0]); // t10 = a0^2 - fpsqr_mont(a->e[1], t1.e[1]); // t11 = a1^2 - fpadd(t1.e[0], t1.e[1], t1.e[0]); // t10 = a0^2+a1^2 - fpinv_mont(t1.e[0]); // t10 = (a0^2+a1^2)^-1 - fpneg(a->e[1]); // a = a0-i*a1 - fpmul_mont(a->e[0], t1.e[0], a->e[0]); - fpmul_mont(a->e[1], t1.e[0], a->e[1]); // a = (a0-i*a1)*(a0^2+a1^2)^-1 -} - -// Conversion of a GF(p^2) element to Montgomery representation, -// mc_i = a_i*R^2*R^(-1) = a_i*R in GF(p^2). -void to_fp2mont(const f2elm_t *a, f2elm_t *mc) { - to_mont(a->e[0], mc->e[0]); - to_mont(a->e[1], mc->e[1]); -} - -// Conversion of a GF(p^2) element from Montgomery representation to standard representation, -// c_i = ma_i*R^(-1) = a_i in GF(p^2). -void from_fp2mont(const f2elm_t *ma, f2elm_t *c) { - from_mont(ma->e[0], c->e[0]); - from_mont(ma->e[1], c->e[1]); -} - -// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit. -unsigned int mp_add(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords) { - unsigned int i, carry = 0; - - for (i = 0; i < nwords; i++) { - /* cppcheck-suppress shiftTooManyBits */ - /* cppcheck-suppress unmatchedSuppression */ - ADDC(carry, a[i], b[i], carry, c[i]); - } - - return carry; -} - -// Multiprecision right shift by one. -void mp_shiftr1(digit_t *x, const unsigned int nwords) { - unsigned int i; - - for (i = 0; i < nwords - 1; i++) { - SHIFTR(x[i + 1], x[i], 1, x[i], RADIX); - } - x[nwords - 1] >>= 1; -} +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: core functions over GF(p) and GF(p^2) +*********************************************************************************************/ + +// Conversion of GF(p^2) element from Montgomery to standard representation, and encoding by removing leading 0 bytes +void fp2_encode(const f2elm_t *x, unsigned char *enc) { + unsigned int i; + f2elm_t t; + + from_fp2mont(x, &t); + for (i = 0; i < FP2_ENCODED_BYTES / 2; i++) { + enc[i] = ((unsigned char *) t.e)[i]; + enc[i + FP2_ENCODED_BYTES / 2] = ((unsigned char *) t.e)[i + MAXBITS_FIELD / 8]; + } +} + +// Parse byte sequence back into GF(p^2) element, and conversion to Montgomery representation +void fp2_decode(const unsigned char *enc, f2elm_t *x) { + unsigned int i; + + for (i = 0; i < 2 * (MAXBITS_FIELD / 8); i++) + ((unsigned char *) x->e)[i] = 0; + for (i = 0; i < FP2_ENCODED_BYTES / 2; i++) { + ((unsigned char *) x->e)[i] = enc[i]; + ((unsigned char *) x->e)[i + MAXBITS_FIELD / 8] = enc[i + FP2_ENCODED_BYTES / 2]; + } + to_fp2mont(x, x); +} + +// Copy a field element, c = a. +__inline void fpcopy(const felm_t a, felm_t c) { + unsigned int i; + + for (i = 0; i < NWORDS_FIELD; i++) + c[i] = a[i]; +} + +// Zero a field element, a = 0. +__inline void fpzero(felm_t a) { + unsigned int i; + + for (i = 0; i < NWORDS_FIELD; i++) + a[i] = 0; +} + +// Conversion to Montgomery representation, +// mc = a*R^2*R^(-1) mod p = a*R mod p, where a in [0, p-1]. +// The Montgomery constant R^2 mod p is the global value "Montgomery_R2". +void to_mont(const felm_t a, felm_t mc) { + fpmul_mont(a, (const digit_t *) &Montgomery_R2, mc); +} + +// Conversion from Montgomery representation to standard representation, +// c = ma*R^(-1) mod p = a mod p, where ma in [0, p-1]. +void from_mont(const felm_t ma, felm_t c) { + digit_t one[NWORDS_FIELD] = {0}; + + one[0] = 1; + fpmul_mont(ma, one, c); + fpcorrection(c); +} + +// Copy wordsize digits, c = a, where lng(a) = nwords. +void copy_words(const digit_t *a, digit_t *c, const unsigned int nwords) { + unsigned int i; + + for (i = 0; i < nwords; i++) + c[i] = a[i]; +} + +// Multiprecision multiplication, c = a*b mod p. +void fpmul_mont(const felm_t ma, const felm_t mb, felm_t mc) { + dfelm_t temp = {0}; + + mp_mul(ma, mb, temp, NWORDS_FIELD); + rdc_mont(temp, mc); +} + +// Multiprecision squaring, c = a^2 mod p. +void fpsqr_mont(const felm_t ma, felm_t mc) { + dfelm_t temp = {0}; + + mp_mul(ma, ma, temp, NWORDS_FIELD); + rdc_mont(temp, mc); +} + +// Field inversion using Montgomery arithmetic, a = a^(-1)*R mod p. +void fpinv_mont(felm_t a) { + felm_t tt; + + fpcopy(a, tt); + fpinv_chain_mont(tt); + fpsqr_mont(tt, tt); + fpsqr_mont(tt, tt); + fpmul_mont(a, tt, a); +} + +// Copy a GF(p^2) element, c = a. +void fp2copy(const f2elm_t *a, f2elm_t *c) { + fpcopy(a->e[0], c->e[0]); + fpcopy(a->e[1], c->e[1]); +} + +// Zero a GF(p^2) element, a = 0. +void fp2zero(f2elm_t *a) { + fpzero(a->e[0]); + fpzero(a->e[1]); +} + +// GF(p^2) negation, a = -a in GF(p^2). +void fp2neg(f2elm_t *a) { + fpneg(a->e[0]); + fpneg(a->e[1]); +} + +// GF(p^2) addition, c = a+b in GF(p^2). +__inline void fp2add(const f2elm_t *a, const f2elm_t *b, f2elm_t *c) { + fpadd(a->e[0], b->e[0], c->e[0]); + fpadd(a->e[1], b->e[1], c->e[1]); +} + +// GF(p^2) subtraction, c = a-b in GF(p^2). +__inline void fp2sub(const f2elm_t *a, const f2elm_t *b, f2elm_t *c) { + fpsub(a->e[0], b->e[0], c->e[0]); + fpsub(a->e[1], b->e[1], c->e[1]); +} + +// GF(p^2) division by two, c = a/2 in GF(p^2). +void fp2div2(const f2elm_t *a, f2elm_t *c) { + fpdiv2(a->e[0], c->e[0]); + fpdiv2(a->e[1], c->e[1]); +} + +// Modular correction, a = a in GF(p^2). +void fp2correction(f2elm_t *a) { + fpcorrection(a->e[0]); + fpcorrection(a->e[1]); +} + +// Multiprecision addition, c = a+b. +__inline static void mp_addfast(const digit_t *a, const digit_t *b, digit_t *c) { +#if defined(S2N_SIKEP434R2_ASM) + if (s2n_sikep434r2_asm_is_enabled()) { + mp_add_asm(a, b, c); + return; + } +#endif + + mp_add(a, b, c, NWORDS_FIELD); +} + +// GF(p^2) squaring using Montgomery arithmetic, c = a^2 in GF(p^2). +// Inputs: a = a0+a1*i, where a0, a1 are in [0, 2*p-1] +// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1] +void fp2sqr_mont(const f2elm_t *a, f2elm_t *c) { + felm_t t1, t2, t3; + + mp_addfast(a->e[0], a->e[1], t1); // t1 = a0+a1 + fpsub(a->e[0], a->e[1], t2); // t2 = a0-a1 + mp_addfast(a->e[0], a->e[0], t3); // t3 = 2a0 + fpmul_mont(t1, t2, c->e[0]); // c0 = (a0+a1)(a0-a1) + fpmul_mont(t3, a->e[1], c->e[1]); // c1 = 2a0*a1 +} + +// Multiprecision subtraction, c = a-b, where lng(a) = lng(b) = nwords. Returns the borrow bit. +unsigned int mp_sub(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords) { + unsigned int i, borrow = 0; + + for (i = 0; i < nwords; i++) + SUBC(borrow, a[i], b[i], borrow, c[i]); + + return borrow; +} + +// Multiprecision subtraction followed by addition with p*2^MAXBITS_FIELD, c = a-b+(p*2^MAXBITS_FIELD) if a-b < 0, otherwise c=a-b. +__inline static void mp_subaddfast(const digit_t *a, const digit_t *b, digit_t *c) { +#if defined(S2N_SIKEP434R2_ASM) + if (s2n_sikep434r2_asm_is_enabled()) { + mp_subaddx2_asm(a, b, c); + return; + } +#endif + + felm_t t1; + + digit_t mask = 0 - (digit_t) mp_sub(a, b, c, 2 * NWORDS_FIELD); + for (int i = 0; i < NWORDS_FIELD; i++) + t1[i] = ((const digit_t *) PRIME)[i] & mask; + mp_addfast((digit_t *) &c[NWORDS_FIELD], t1, (digit_t *) &c[NWORDS_FIELD]); +} + +// Multiprecision subtraction, c = c-a-b, where lng(a) = lng(b) = 2*NWORDS_FIELD. +__inline static void mp_dblsubfast(const digit_t *a, const digit_t *b, digit_t *c) { +#if defined(S2N_SIKEP434R2_ASM) + if (s2n_sikep434r2_asm_is_enabled()) { + mp_dblsubx2_asm(a, b, c); + return; + } +#endif + + mp_sub(c, a, c, 2 * NWORDS_FIELD); + mp_sub(c, b, c, 2 * NWORDS_FIELD); +} + +// GF(p^2) multiplication using Montgomery arithmetic, c = a*b in GF(p^2). +// Inputs: a = a0+a1*i and b = b0+b1*i, where a0, a1, b0, b1 are in [0, 2*p-1] +// Output: c = c0+c1*i, where c0, c1 are in [0, 2*p-1] +void fp2mul_mont(const f2elm_t *a, const f2elm_t *b, f2elm_t *c) { + felm_t t1, t2; + dfelm_t tt1, tt2, tt3; + + mp_addfast(a->e[0], a->e[1], t1); // t1 = a0+a1 + mp_addfast(b->e[0], b->e[1], t2); // t2 = b0+b1 + mp_mul(a->e[0], b->e[0], tt1, NWORDS_FIELD); // tt1 = a0*b0 + mp_mul(a->e[1], b->e[1], tt2, NWORDS_FIELD); // tt2 = a1*b1 + mp_mul(t1, t2, tt3, NWORDS_FIELD); // tt3 = (a0+a1)*(b0+b1) + mp_dblsubfast(tt1, tt2, tt3); // tt3 = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 + mp_subaddfast(tt1, tt2, tt1); // tt1 = a0*b0 - a1*b1 + p*2^MAXBITS_FIELD if a0*b0 - a1*b1 < 0, else tt1 = a0*b0 - a1*b1 + rdc_mont(tt3, c->e[1]); // c[1] = (a0+a1)*(b0+b1) - a0*b0 - a1*b1 + rdc_mont(tt1, c->e[0]); // c[0] = a0*b0 - a1*b1 +} + +// Chain to compute a^(p-3)/4 using Montgomery arithmetic. +void fpinv_chain_mont(felm_t a) { + unsigned int i, j; + + felm_t t[31], tt; + + // Precomputed table + fpsqr_mont(a, tt); + fpmul_mont(a, tt, t[0]); + for (i = 0; i <= 29; i++) + fpmul_mont(t[i], tt, t[i + 1]); + + fpcopy(a, tt); + for (i = 0; i < 7; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[5], tt, tt); + for (i = 0; i < 10; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[14], tt, tt); + for (i = 0; i < 6; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[3], tt, tt); + for (i = 0; i < 6; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[23], tt, tt); + for (i = 0; i < 6; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[13], tt, tt); + for (i = 0; i < 6; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[24], tt, tt); + for (i = 0; i < 6; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[7], tt, tt); + for (i = 0; i < 8; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[12], tt, tt); + for (i = 0; i < 8; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[30], tt, tt); + for (i = 0; i < 6; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[1], tt, tt); + for (i = 0; i < 6; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[30], tt, tt); + for (i = 0; i < 7; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[21], tt, tt); + for (i = 0; i < 9; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[2], tt, tt); + for (i = 0; i < 9; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[19], tt, tt); + for (i = 0; i < 9; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[1], tt, tt); + for (i = 0; i < 7; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[24], tt, tt); + for (i = 0; i < 6; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[26], tt, tt); + for (i = 0; i < 6; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[16], tt, tt); + for (i = 0; i < 7; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[10], tt, tt); + for (i = 0; i < 7; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (i = 0; i < 7; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[0], tt, tt); + for (i = 0; i < 9; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[20], tt, tt); + for (i = 0; i < 8; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[9], tt, tt); + for (i = 0; i < 6; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[25], tt, tt); + for (i = 0; i < 9; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[30], tt, tt); + for (i = 0; i < 6; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[26], tt, tt); + for (i = 0; i < 6; i++) + fpsqr_mont(tt, tt); + fpmul_mont(a, tt, tt); + for (i = 0; i < 7; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[28], tt, tt); + for (i = 0; i < 6; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[6], tt, tt); + for (i = 0; i < 6; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[10], tt, tt); + for (i = 0; i < 9; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[22], tt, tt); + for (j = 0; j < 35; j++) { + for (i = 0; i < 6; i++) + fpsqr_mont(tt, tt); + fpmul_mont(t[30], tt, tt); + } + fpcopy(tt, a); +} + +// GF(p^2) inversion using Montgomery arithmetic, a = (a0-i*a1)/(a0^2+a1^2). +void fp2inv_mont(f2elm_t *a) { + f2elm_t t1; + + fpsqr_mont(a->e[0], t1.e[0]); // t10 = a0^2 + fpsqr_mont(a->e[1], t1.e[1]); // t11 = a1^2 + fpadd(t1.e[0], t1.e[1], t1.e[0]); // t10 = a0^2+a1^2 + fpinv_mont(t1.e[0]); // t10 = (a0^2+a1^2)^-1 + fpneg(a->e[1]); // a = a0-i*a1 + fpmul_mont(a->e[0], t1.e[0], a->e[0]); + fpmul_mont(a->e[1], t1.e[0], a->e[1]); // a = (a0-i*a1)*(a0^2+a1^2)^-1 +} + +// Conversion of a GF(p^2) element to Montgomery representation, +// mc_i = a_i*R^2*R^(-1) = a_i*R in GF(p^2). +void to_fp2mont(const f2elm_t *a, f2elm_t *mc) { + to_mont(a->e[0], mc->e[0]); + to_mont(a->e[1], mc->e[1]); +} + +// Conversion of a GF(p^2) element from Montgomery representation to standard representation, +// c_i = ma_i*R^(-1) = a_i in GF(p^2). +void from_fp2mont(const f2elm_t *ma, f2elm_t *c) { + from_mont(ma->e[0], c->e[0]); + from_mont(ma->e[1], c->e[1]); +} + +// Multiprecision addition, c = a+b, where lng(a) = lng(b) = nwords. Returns the carry bit. +unsigned int mp_add(const digit_t *a, const digit_t *b, digit_t *c, const unsigned int nwords) { + unsigned int i, carry = 0; + + for (i = 0; i < nwords; i++) { + /* cppcheck-suppress shiftTooManyBits */ + /* cppcheck-suppress unmatchedSuppression */ + ADDC(carry, a[i], b[i], carry, c[i]); + } + + return carry; +} + +// Multiprecision right shift by one. +void mp_shiftr1(digit_t *x, const unsigned int nwords) { + unsigned int i; + + for (i = 0; i < nwords - 1; i++) { + SHIFTR(x[i + 1], x[i], 1, x[i], RADIX); + } + x[nwords - 1] >>= 1; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/sidh.c b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/sidh.c index d3fdbe722c..45faa45cff 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/sidh.c +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/sidh.c @@ -1,286 +1,286 @@ -/******************************************************************************************** -* SIDH: an efficient supersingular isogeny cryptography library -* -* Abstract: ephemeral supersingular isogeny Diffie-Hellman key exchange (SIDH) -*********************************************************************************************/ - -#include "../s2n_pq_random.h" -#include "utils/s2n_safety.h" - -static void init_basis(const digit_t *gen, f2elm_t *XP, f2elm_t *XQ, f2elm_t *XR) { // Initialization of basis points - - fpcopy(gen, XP->e[0]); - fpcopy(gen + NWORDS_FIELD, XP->e[1]); - fpcopy(gen + 2 * NWORDS_FIELD, XQ->e[0]); - fpcopy(gen + 3 * NWORDS_FIELD, XQ->e[1]); - fpcopy(gen + 4 * NWORDS_FIELD, XR->e[0]); - fpcopy(gen + 5 * NWORDS_FIELD, XR->e[1]); -} - -int random_mod_order_A(unsigned char *random_digits) { // Generation of Alice's secret key - // Outputs random value in [0, 2^eA - 1] - GUARD_AS_POSIX(s2n_get_random_bytes(random_digits, SECRETKEY_A_BYTES)); - random_digits[SECRETKEY_A_BYTES - 1] &= MASK_ALICE; // Masking last byte - return S2N_SUCCESS; -} - -int random_mod_order_B(unsigned char *random_digits) { // Generation of Bob's secret key - // Outputs random value in [0, 2^Floor(Log(2, oB)) - 1] - GUARD_AS_POSIX(s2n_get_random_bytes(random_digits, SECRETKEY_B_BYTES)); - random_digits[SECRETKEY_B_BYTES - 1] &= MASK_BOB; // Masking last byte - return S2N_SUCCESS; -} - -int EphemeralKeyGeneration_A(const digit_t *PrivateKeyA, unsigned char *PublicKeyA) { // Alice's ephemeral public key generation - // Input: a private key PrivateKeyA in the range [0, 2^eA - 1]. - // Output: the public key PublicKeyA consisting of 3 elements in GF(p^2) which are encoded by removing leading 0 bytes. - point_proj_t R, phiP = {0}, phiQ = {0}, phiR = {0}, pts[MAX_INT_POINTS_ALICE]; - f2elm_t _XPA, _XQA, _XRA, coeff[3], _A24plus = {0}, _C24 = {0}, _A = {0}; - f2elm_t *XPA=&_XPA, *XQA=&_XQA, *XRA=&_XRA, *A24plus=&_A24plus, *C24=&_C24, *A=&_A; - unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0; - - // Initialize basis points - init_basis((const digit_t *) A_gen, XPA, XQA, XRA); - init_basis((const digit_t *) B_gen, &phiP->X, &phiQ->X, &phiR->X); - fpcopy((const digit_t *) &Montgomery_one, (phiP->Z.e)[0]); - fpcopy((const digit_t *) &Montgomery_one, (phiQ->Z.e)[0]); - fpcopy((const digit_t *) &Montgomery_one, (phiR->Z.e)[0]); - - // Initialize constants: A24plus = A+2C, C24 = 4C, where A=6, C=1 - fpcopy((const digit_t *) &Montgomery_one, A24plus->e[0]); - fp2add(A24plus, A24plus, A24plus); - fp2add(A24plus, A24plus, C24); - fp2add(A24plus, C24, A); - fp2add(C24, C24, A24plus); - - // Retrieve kernel point - LADDER3PT(XPA, XQA, XRA, PrivateKeyA, ALICE, R, A); - - // Traverse tree - index = 0; - for (row = 1; row < MAX_Alice; row++) { - while (index < MAX_Alice - row) { - fp2copy(&R->X, &pts[npts]->X); - fp2copy(&R->Z, &pts[npts]->Z); - pts_index[npts++] = index; - m = strat_Alice[ii++]; - xDBLe(R, R, A24plus, C24, (int) (2 * m)); - index += m; - } - get_4_isog(R, A24plus, C24, coeff); - - for (i = 0; i < npts; i++) { - eval_4_isog(pts[i], coeff); - } - eval_4_isog(phiP, coeff); - eval_4_isog(phiQ, coeff); - eval_4_isog(phiR, coeff); - - fp2copy(&pts[npts - 1]->X, &R->X); - fp2copy(&pts[npts - 1]->Z, &R->Z); - index = pts_index[npts - 1]; - npts -= 1; - } - - get_4_isog(R, A24plus, C24, coeff); - eval_4_isog(phiP, coeff); - eval_4_isog(phiQ, coeff); - eval_4_isog(phiR, coeff); - - inv_3_way(&phiP->Z, &phiQ->Z, &phiR->Z); - fp2mul_mont(&phiP->X, &phiP->Z, &phiP->X); - fp2mul_mont(&phiQ->X, &phiQ->Z, &phiQ->X); - fp2mul_mont(&phiR->X, &phiR->Z, &phiR->X); - - // Format public key - fp2_encode(&phiP->X, PublicKeyA); - fp2_encode(&phiQ->X, PublicKeyA + FP2_ENCODED_BYTES); - fp2_encode(&phiR->X, PublicKeyA + 2 * FP2_ENCODED_BYTES); - - return 0; -} - -int EphemeralKeyGeneration_B(const digit_t *PrivateKeyB, unsigned char *PublicKeyB) { // Bob's ephemeral public key generation - // Input: a private key PrivateKeyB in the range [0, 2^Floor(Log(2,oB)) - 1]. - // Output: the public key PublicKeyB consisting of 3 elements in GF(p^2) which are encoded by removing leading 0 bytes. - point_proj_t R, phiP = {0}, phiQ = {0}, phiR = {0}, pts[MAX_INT_POINTS_BOB]; - f2elm_t _XPB, _XQB, _XRB, coeff[3], _A24plus = {0}, _A24minus = {0}, _A = {0}; - f2elm_t *XPB=&_XPB, *XQB=&_XQB, *XRB=&_XRB, *A24plus=&_A24plus, *A24minus=&_A24minus, *A=&_A; - unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0; - - // Initialize basis points - init_basis((const digit_t *) B_gen, XPB, XQB, XRB); - init_basis((const digit_t *) A_gen, &phiP->X, &phiQ->X, &phiR->X); - fpcopy((const digit_t *) &Montgomery_one, (phiP->Z.e)[0]); - fpcopy((const digit_t *) &Montgomery_one, (phiQ->Z.e)[0]); - fpcopy((const digit_t *) &Montgomery_one, (phiR->Z.e)[0]); - - // Initialize constants: A24minus = A-2C, A24plus = A+2C, where A=6, C=1 - fpcopy((const digit_t *) &Montgomery_one, A24plus->e[0]); - fp2add(A24plus, A24plus, A24plus); - fp2add(A24plus, A24plus, A24minus); - fp2add(A24plus, A24minus, A); - fp2add(A24minus, A24minus, A24plus); - - // Retrieve kernel point - LADDER3PT(XPB, XQB, XRB, PrivateKeyB, BOB, R, A); - - // Traverse tree - index = 0; - for (row = 1; row < MAX_Bob; row++) { - while (index < MAX_Bob - row) { - fp2copy(&R->X, &pts[npts]->X); - fp2copy(&R->Z, &pts[npts]->Z); - pts_index[npts++] = index; - m = strat_Bob[ii++]; - xTPLe(R, R, A24minus, A24plus, (int) m); - index += m; - } - get_3_isog(R, A24minus, A24plus, coeff); - - for (i = 0; i < npts; i++) { - eval_3_isog(pts[i], coeff); - } - eval_3_isog(phiP, coeff); - eval_3_isog(phiQ, coeff); - eval_3_isog(phiR, coeff); - - fp2copy(&pts[npts - 1]->X, &R->X); - fp2copy(&pts[npts - 1]->Z, &R->Z); - index = pts_index[npts - 1]; - npts -= 1; - } - - get_3_isog(R, A24minus, A24plus, coeff); - eval_3_isog(phiP, coeff); - eval_3_isog(phiQ, coeff); - eval_3_isog(phiR, coeff); - - inv_3_way(&phiP->Z, &phiQ->Z, &phiR->Z); - fp2mul_mont(&phiP->X, &phiP->Z, &phiP->X); - fp2mul_mont(&phiQ->X, &phiQ->Z, &phiQ->X); - fp2mul_mont(&phiR->X, &phiR->Z, &phiR->X); - - // Format public key - fp2_encode(&phiP->X, PublicKeyB); - fp2_encode(&phiQ->X, PublicKeyB + FP2_ENCODED_BYTES); - fp2_encode(&phiR->X, PublicKeyB + 2 * FP2_ENCODED_BYTES); - - return 0; -} - -int EphemeralSecretAgreement_A(const digit_t *PrivateKeyA, const unsigned char *PublicKeyB, unsigned char *SharedSecretA) { // Alice's ephemeral shared secret computation - // It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB - // Inputs: Alice's PrivateKeyA is an integer in the range [0, oA-1]. - // Bob's PublicKeyB consists of 3 elements in GF(p^2) encoded by removing leading 0 bytes. - // Output: a shared secret SharedSecretA that consists of one element in GF(p^2) encoded by removing leading 0 bytes. - point_proj_t R, pts[MAX_INT_POINTS_ALICE]; - f2elm_t coeff[3], PKB[3], _jinv; - f2elm_t _A24plus = {0}, _C24 = {0}, _A = {0}; - f2elm_t *jinv=&_jinv, *A24plus=&_A24plus, *C24=&_C24, *A=&_A; - unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0; - - // Initialize images of Bob's basis - fp2_decode(PublicKeyB, &PKB[0]); - fp2_decode(PublicKeyB + FP2_ENCODED_BYTES, &PKB[1]); - fp2_decode(PublicKeyB + 2 * FP2_ENCODED_BYTES, &PKB[2]); - - // Initialize constants: A24plus = A+2C, C24 = 4C, where C=1 - get_A(&PKB[0], &PKB[1], &PKB[2], A); - fpadd((const digit_t *) &Montgomery_one, (const digit_t *) &Montgomery_one, C24->e[0]); - fp2add(A, C24, A24plus); - fpadd(C24->e[0], C24->e[0], C24->e[0]); - - // Retrieve kernel point - LADDER3PT(&PKB[0], &PKB[1], &PKB[2], PrivateKeyA, ALICE, R, A); - - // Traverse tree - index = 0; - for (row = 1; row < MAX_Alice; row++) { - while (index < MAX_Alice - row) { - fp2copy(&R->X, &pts[npts]->X); - fp2copy(&R->Z, &pts[npts]->Z); - pts_index[npts++] = index; - m = strat_Alice[ii++]; - xDBLe(R, R, A24plus, C24, (int) (2 * m)); - index += m; - } - get_4_isog(R, A24plus, C24, coeff); - - for (i = 0; i < npts; i++) { - eval_4_isog(pts[i], coeff); - } - - fp2copy(&pts[npts - 1]->X, &R->X); - fp2copy(&pts[npts - 1]->Z, &R->Z); - index = pts_index[npts - 1]; - npts -= 1; - } - - get_4_isog(R, A24plus, C24, coeff); - fp2add(A24plus, A24plus, A24plus); - fp2sub(A24plus, C24, A24plus); - fp2add(A24plus, A24plus, A24plus); - j_inv(A24plus, C24, jinv); - fp2_encode(jinv, SharedSecretA); // Format shared secret - - return 0; -} - -int EphemeralSecretAgreement_B(const digit_t *PrivateKeyB, const unsigned char *PublicKeyA, unsigned char *SharedSecretB) { // Bob's ephemeral shared secret computation - // It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA - // Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,oB)) - 1]. - // Alice's PublicKeyA consists of 3 elements in GF(p^2) encoded by removing leading 0 bytes. - // Output: a shared secret SharedSecretB that consists of one element in GF(p^2) encoded by removing leading 0 bytes. - point_proj_t R, pts[MAX_INT_POINTS_BOB]; - f2elm_t coeff[3], PKB[3], _jinv; - f2elm_t _A24plus = {0}, _A24minus = {0}, _A = {0}; - f2elm_t *jinv=&_jinv, *A24plus=&_A24plus, *A24minus=&_A24minus, *A=&_A; - unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0; - - // Initialize images of Alice's basis - fp2_decode(PublicKeyA, &PKB[0]); - fp2_decode(PublicKeyA + FP2_ENCODED_BYTES, &PKB[1]); - fp2_decode(PublicKeyA + 2 * FP2_ENCODED_BYTES, &PKB[2]); - - // Initialize constants: A24plus = A+2C, A24minus = A-2C, where C=1 - get_A(&PKB[0], &PKB[1], &PKB[2], A); - fpadd((const digit_t *) &Montgomery_one, (const digit_t *) &Montgomery_one, A24minus->e[0]); - fp2add(A, A24minus, A24plus); - fp2sub(A, A24minus, A24minus); - - // Retrieve kernel point - LADDER3PT(&PKB[0], &PKB[1], &PKB[2], PrivateKeyB, BOB, R, A); - - // Traverse tree - index = 0; - for (row = 1; row < MAX_Bob; row++) { - while (index < MAX_Bob - row) { - fp2copy(&R->X, &pts[npts]->X); - fp2copy(&R->Z, &pts[npts]->Z); - pts_index[npts++] = index; - m = strat_Bob[ii++]; - xTPLe(R, R, A24minus, A24plus, (int) m); - index += m; - } - get_3_isog(R, A24minus, A24plus, coeff); - - for (i = 0; i < npts; i++) { - eval_3_isog(pts[i], coeff); - } - - fp2copy(&pts[npts - 1]->X, &R->X); - fp2copy(&pts[npts - 1]->Z, &R->Z); - index = pts_index[npts - 1]; - npts -= 1; - } - - get_3_isog(R, A24minus, A24plus, coeff); - fp2add(A24plus, A24minus, A); - fp2add(A, A, A); - fp2sub(A24plus, A24minus, A24plus); - j_inv(A, A24plus, jinv); - fp2_encode(jinv, SharedSecretB); // Format shared secret - - return 0; -} +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: ephemeral supersingular isogeny Diffie-Hellman key exchange (SIDH) +*********************************************************************************************/ + +#include "../s2n_pq_random.h" +#include "utils/s2n_safety.h" + +static void init_basis(const digit_t *gen, f2elm_t *XP, f2elm_t *XQ, f2elm_t *XR) { // Initialization of basis points + + fpcopy(gen, XP->e[0]); + fpcopy(gen + NWORDS_FIELD, XP->e[1]); + fpcopy(gen + 2 * NWORDS_FIELD, XQ->e[0]); + fpcopy(gen + 3 * NWORDS_FIELD, XQ->e[1]); + fpcopy(gen + 4 * NWORDS_FIELD, XR->e[0]); + fpcopy(gen + 5 * NWORDS_FIELD, XR->e[1]); +} + +int random_mod_order_A(unsigned char *random_digits) { // Generation of Alice's secret key + // Outputs random value in [0, 2^eA - 1] + GUARD_AS_POSIX(s2n_get_random_bytes(random_digits, SECRETKEY_A_BYTES)); + random_digits[SECRETKEY_A_BYTES - 1] &= MASK_ALICE; // Masking last byte + return S2N_SUCCESS; +} + +int random_mod_order_B(unsigned char *random_digits) { // Generation of Bob's secret key + // Outputs random value in [0, 2^Floor(Log(2, oB)) - 1] + GUARD_AS_POSIX(s2n_get_random_bytes(random_digits, SECRETKEY_B_BYTES)); + random_digits[SECRETKEY_B_BYTES - 1] &= MASK_BOB; // Masking last byte + return S2N_SUCCESS; +} + +int EphemeralKeyGeneration_A(const digit_t *PrivateKeyA, unsigned char *PublicKeyA) { // Alice's ephemeral public key generation + // Input: a private key PrivateKeyA in the range [0, 2^eA - 1]. + // Output: the public key PublicKeyA consisting of 3 elements in GF(p^2) which are encoded by removing leading 0 bytes. + point_proj_t R, phiP = {0}, phiQ = {0}, phiR = {0}, pts[MAX_INT_POINTS_ALICE]; + f2elm_t _XPA, _XQA, _XRA, coeff[3], _A24plus = {0}, _C24 = {0}, _A = {0}; + f2elm_t *XPA=&_XPA, *XQA=&_XQA, *XRA=&_XRA, *A24plus=&_A24plus, *C24=&_C24, *A=&_A; + unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0; + + // Initialize basis points + init_basis((const digit_t *) A_gen, XPA, XQA, XRA); + init_basis((const digit_t *) B_gen, &phiP->X, &phiQ->X, &phiR->X); + fpcopy((const digit_t *) &Montgomery_one, (phiP->Z.e)[0]); + fpcopy((const digit_t *) &Montgomery_one, (phiQ->Z.e)[0]); + fpcopy((const digit_t *) &Montgomery_one, (phiR->Z.e)[0]); + + // Initialize constants: A24plus = A+2C, C24 = 4C, where A=6, C=1 + fpcopy((const digit_t *) &Montgomery_one, A24plus->e[0]); + fp2add(A24plus, A24plus, A24plus); + fp2add(A24plus, A24plus, C24); + fp2add(A24plus, C24, A); + fp2add(C24, C24, A24plus); + + // Retrieve kernel point + LADDER3PT(XPA, XQA, XRA, PrivateKeyA, ALICE, R, A); + + // Traverse tree + index = 0; + for (row = 1; row < MAX_Alice; row++) { + while (index < MAX_Alice - row) { + fp2copy(&R->X, &pts[npts]->X); + fp2copy(&R->Z, &pts[npts]->Z); + pts_index[npts++] = index; + m = strat_Alice[ii++]; + xDBLe(R, R, A24plus, C24, (int) (2 * m)); + index += m; + } + get_4_isog(R, A24plus, C24, coeff); + + for (i = 0; i < npts; i++) { + eval_4_isog(pts[i], coeff); + } + eval_4_isog(phiP, coeff); + eval_4_isog(phiQ, coeff); + eval_4_isog(phiR, coeff); + + fp2copy(&pts[npts - 1]->X, &R->X); + fp2copy(&pts[npts - 1]->Z, &R->Z); + index = pts_index[npts - 1]; + npts -= 1; + } + + get_4_isog(R, A24plus, C24, coeff); + eval_4_isog(phiP, coeff); + eval_4_isog(phiQ, coeff); + eval_4_isog(phiR, coeff); + + inv_3_way(&phiP->Z, &phiQ->Z, &phiR->Z); + fp2mul_mont(&phiP->X, &phiP->Z, &phiP->X); + fp2mul_mont(&phiQ->X, &phiQ->Z, &phiQ->X); + fp2mul_mont(&phiR->X, &phiR->Z, &phiR->X); + + // Format public key + fp2_encode(&phiP->X, PublicKeyA); + fp2_encode(&phiQ->X, PublicKeyA + FP2_ENCODED_BYTES); + fp2_encode(&phiR->X, PublicKeyA + 2 * FP2_ENCODED_BYTES); + + return 0; +} + +int EphemeralKeyGeneration_B(const digit_t *PrivateKeyB, unsigned char *PublicKeyB) { // Bob's ephemeral public key generation + // Input: a private key PrivateKeyB in the range [0, 2^Floor(Log(2,oB)) - 1]. + // Output: the public key PublicKeyB consisting of 3 elements in GF(p^2) which are encoded by removing leading 0 bytes. + point_proj_t R, phiP = {0}, phiQ = {0}, phiR = {0}, pts[MAX_INT_POINTS_BOB]; + f2elm_t _XPB, _XQB, _XRB, coeff[3], _A24plus = {0}, _A24minus = {0}, _A = {0}; + f2elm_t *XPB=&_XPB, *XQB=&_XQB, *XRB=&_XRB, *A24plus=&_A24plus, *A24minus=&_A24minus, *A=&_A; + unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0; + + // Initialize basis points + init_basis((const digit_t *) B_gen, XPB, XQB, XRB); + init_basis((const digit_t *) A_gen, &phiP->X, &phiQ->X, &phiR->X); + fpcopy((const digit_t *) &Montgomery_one, (phiP->Z.e)[0]); + fpcopy((const digit_t *) &Montgomery_one, (phiQ->Z.e)[0]); + fpcopy((const digit_t *) &Montgomery_one, (phiR->Z.e)[0]); + + // Initialize constants: A24minus = A-2C, A24plus = A+2C, where A=6, C=1 + fpcopy((const digit_t *) &Montgomery_one, A24plus->e[0]); + fp2add(A24plus, A24plus, A24plus); + fp2add(A24plus, A24plus, A24minus); + fp2add(A24plus, A24minus, A); + fp2add(A24minus, A24minus, A24plus); + + // Retrieve kernel point + LADDER3PT(XPB, XQB, XRB, PrivateKeyB, BOB, R, A); + + // Traverse tree + index = 0; + for (row = 1; row < MAX_Bob; row++) { + while (index < MAX_Bob - row) { + fp2copy(&R->X, &pts[npts]->X); + fp2copy(&R->Z, &pts[npts]->Z); + pts_index[npts++] = index; + m = strat_Bob[ii++]; + xTPLe(R, R, A24minus, A24plus, (int) m); + index += m; + } + get_3_isog(R, A24minus, A24plus, coeff); + + for (i = 0; i < npts; i++) { + eval_3_isog(pts[i], coeff); + } + eval_3_isog(phiP, coeff); + eval_3_isog(phiQ, coeff); + eval_3_isog(phiR, coeff); + + fp2copy(&pts[npts - 1]->X, &R->X); + fp2copy(&pts[npts - 1]->Z, &R->Z); + index = pts_index[npts - 1]; + npts -= 1; + } + + get_3_isog(R, A24minus, A24plus, coeff); + eval_3_isog(phiP, coeff); + eval_3_isog(phiQ, coeff); + eval_3_isog(phiR, coeff); + + inv_3_way(&phiP->Z, &phiQ->Z, &phiR->Z); + fp2mul_mont(&phiP->X, &phiP->Z, &phiP->X); + fp2mul_mont(&phiQ->X, &phiQ->Z, &phiQ->X); + fp2mul_mont(&phiR->X, &phiR->Z, &phiR->X); + + // Format public key + fp2_encode(&phiP->X, PublicKeyB); + fp2_encode(&phiQ->X, PublicKeyB + FP2_ENCODED_BYTES); + fp2_encode(&phiR->X, PublicKeyB + 2 * FP2_ENCODED_BYTES); + + return 0; +} + +int EphemeralSecretAgreement_A(const digit_t *PrivateKeyA, const unsigned char *PublicKeyB, unsigned char *SharedSecretA) { // Alice's ephemeral shared secret computation + // It produces a shared secret key SharedSecretA using her secret key PrivateKeyA and Bob's public key PublicKeyB + // Inputs: Alice's PrivateKeyA is an integer in the range [0, oA-1]. + // Bob's PublicKeyB consists of 3 elements in GF(p^2) encoded by removing leading 0 bytes. + // Output: a shared secret SharedSecretA that consists of one element in GF(p^2) encoded by removing leading 0 bytes. + point_proj_t R, pts[MAX_INT_POINTS_ALICE]; + f2elm_t coeff[3], PKB[3], _jinv; + f2elm_t _A24plus = {0}, _C24 = {0}, _A = {0}; + f2elm_t *jinv=&_jinv, *A24plus=&_A24plus, *C24=&_C24, *A=&_A; + unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_ALICE], npts = 0, ii = 0; + + // Initialize images of Bob's basis + fp2_decode(PublicKeyB, &PKB[0]); + fp2_decode(PublicKeyB + FP2_ENCODED_BYTES, &PKB[1]); + fp2_decode(PublicKeyB + 2 * FP2_ENCODED_BYTES, &PKB[2]); + + // Initialize constants: A24plus = A+2C, C24 = 4C, where C=1 + get_A(&PKB[0], &PKB[1], &PKB[2], A); + fpadd((const digit_t *) &Montgomery_one, (const digit_t *) &Montgomery_one, C24->e[0]); + fp2add(A, C24, A24plus); + fpadd(C24->e[0], C24->e[0], C24->e[0]); + + // Retrieve kernel point + LADDER3PT(&PKB[0], &PKB[1], &PKB[2], PrivateKeyA, ALICE, R, A); + + // Traverse tree + index = 0; + for (row = 1; row < MAX_Alice; row++) { + while (index < MAX_Alice - row) { + fp2copy(&R->X, &pts[npts]->X); + fp2copy(&R->Z, &pts[npts]->Z); + pts_index[npts++] = index; + m = strat_Alice[ii++]; + xDBLe(R, R, A24plus, C24, (int) (2 * m)); + index += m; + } + get_4_isog(R, A24plus, C24, coeff); + + for (i = 0; i < npts; i++) { + eval_4_isog(pts[i], coeff); + } + + fp2copy(&pts[npts - 1]->X, &R->X); + fp2copy(&pts[npts - 1]->Z, &R->Z); + index = pts_index[npts - 1]; + npts -= 1; + } + + get_4_isog(R, A24plus, C24, coeff); + fp2add(A24plus, A24plus, A24plus); + fp2sub(A24plus, C24, A24plus); + fp2add(A24plus, A24plus, A24plus); + j_inv(A24plus, C24, jinv); + fp2_encode(jinv, SharedSecretA); // Format shared secret + + return 0; +} + +int EphemeralSecretAgreement_B(const digit_t *PrivateKeyB, const unsigned char *PublicKeyA, unsigned char *SharedSecretB) { // Bob's ephemeral shared secret computation + // It produces a shared secret key SharedSecretB using his secret key PrivateKeyB and Alice's public key PublicKeyA + // Inputs: Bob's PrivateKeyB is an integer in the range [0, 2^Floor(Log(2,oB)) - 1]. + // Alice's PublicKeyA consists of 3 elements in GF(p^2) encoded by removing leading 0 bytes. + // Output: a shared secret SharedSecretB that consists of one element in GF(p^2) encoded by removing leading 0 bytes. + point_proj_t R, pts[MAX_INT_POINTS_BOB]; + f2elm_t coeff[3], PKB[3], _jinv; + f2elm_t _A24plus = {0}, _A24minus = {0}, _A = {0}; + f2elm_t *jinv=&_jinv, *A24plus=&_A24plus, *A24minus=&_A24minus, *A=&_A; + unsigned int i, row, m, index = 0, pts_index[MAX_INT_POINTS_BOB], npts = 0, ii = 0; + + // Initialize images of Alice's basis + fp2_decode(PublicKeyA, &PKB[0]); + fp2_decode(PublicKeyA + FP2_ENCODED_BYTES, &PKB[1]); + fp2_decode(PublicKeyA + 2 * FP2_ENCODED_BYTES, &PKB[2]); + + // Initialize constants: A24plus = A+2C, A24minus = A-2C, where C=1 + get_A(&PKB[0], &PKB[1], &PKB[2], A); + fpadd((const digit_t *) &Montgomery_one, (const digit_t *) &Montgomery_one, A24minus->e[0]); + fp2add(A, A24minus, A24plus); + fp2sub(A, A24minus, A24minus); + + // Retrieve kernel point + LADDER3PT(&PKB[0], &PKB[1], &PKB[2], PrivateKeyB, BOB, R, A); + + // Traverse tree + index = 0; + for (row = 1; row < MAX_Bob; row++) { + while (index < MAX_Bob - row) { + fp2copy(&R->X, &pts[npts]->X); + fp2copy(&R->Z, &pts[npts]->Z); + pts_index[npts++] = index; + m = strat_Bob[ii++]; + xTPLe(R, R, A24minus, A24plus, (int) m); + index += m; + } + get_3_isog(R, A24minus, A24plus, coeff); + + for (i = 0; i < npts; i++) { + eval_3_isog(pts[i], coeff); + } + + fp2copy(&pts[npts - 1]->X, &R->X); + fp2copy(&pts[npts - 1]->Z, &R->Z); + index = pts_index[npts - 1]; + npts -= 1; + } + + get_3_isog(R, A24minus, A24plus, coeff); + fp2add(A24plus, A24minus, A); + fp2add(A, A, A); + fp2sub(A24plus, A24minus, A24plus); + j_inv(A, A24plus, jinv); + fp2_encode(jinv, SharedSecretB); // Format shared secret + + return 0; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/sike_r2_kem.c b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/sike_r2_kem.c index 453e3b4690..a47fd66c31 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/sike_r2_kem.c +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/sike_r2_kem.c @@ -1,116 +1,116 @@ -/******************************************************************************************** -* SIDH: an efficient supersingular isogeny cryptography library -* -* Abstract: supersingular isogeny key encapsulation (SIKE) protocol -*********************************************************************************************/ - -#include <string.h> -#include "../s2n_pq_random.h" -#include "fips202.h" -#include "utils/s2n_safety.h" -#include "tls/s2n_kem.h" - -int SIKE_P434_r2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) { - // SIKE's key generation - // Outputs: secret key sk (CRYPTO_SECRETKEYBYTES = MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes) - // public key pk (CRYPTO_PUBLICKEYBYTES bytes) - - digit_t _sk[(SECRETKEY_B_BYTES / sizeof(digit_t)) + 1]; - - // Generate lower portion of secret key sk <- s||SK - GUARD_AS_POSIX(s2n_get_random_bytes(sk, MSG_BYTES)); - GUARD(random_mod_order_B((unsigned char *)_sk)); - - // Generate public key pk - EphemeralKeyGeneration_B(_sk, pk); - - memcpy(sk + MSG_BYTES, _sk, SECRETKEY_B_BYTES); - - // Append public key pk to secret key sk - memcpy(&sk[MSG_BYTES + SECRETKEY_B_BYTES], pk, CRYPTO_PUBLICKEYBYTES); - - return 0; -} - -int SIKE_P434_r2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) { - // SIKE's encapsulation - // Input: public key pk (CRYPTO_PUBLICKEYBYTES bytes) - // Outputs: shared secret ss (CRYPTO_BYTES bytes) - // ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes) - - union { - unsigned char b[SECRETKEY_A_BYTES]; - digit_t d[SECRETKEY_A_BYTES/sizeof(digit_t)]; - } ephemeralsk; - unsigned char jinvariant[FP2_ENCODED_BYTES]; - unsigned char h[MSG_BYTES]; - unsigned char temp[CRYPTO_CIPHERTEXTBYTES + MSG_BYTES]; - - // Generate ephemeralsk <- G(m||pk) mod oA - GUARD_AS_POSIX(s2n_get_random_bytes(temp, MSG_BYTES)); - memcpy(&temp[MSG_BYTES], pk, CRYPTO_PUBLICKEYBYTES); - shake256(ephemeralsk.b, SECRETKEY_A_BYTES, temp, CRYPTO_PUBLICKEYBYTES + MSG_BYTES); - - /* ephemeralsk is a union; the memory set here through .b will get accessed through the .d member later */ - /* cppcheck-suppress unreadVariable */ - /* cppcheck-suppress unmatchedSuppression */ - ephemeralsk.b[SECRETKEY_A_BYTES - 1] &= MASK_ALICE; - - // Encrypt - EphemeralKeyGeneration_A(ephemeralsk.d, ct); - EphemeralSecretAgreement_A(ephemeralsk.d, pk, jinvariant); - shake256(h, MSG_BYTES, jinvariant, FP2_ENCODED_BYTES); - for (int i = 0; i < MSG_BYTES; i++) { - ct[i + CRYPTO_PUBLICKEYBYTES] = temp[i] ^ h[i]; - } - // Generate shared secret ss <- H(m||ct) - memcpy(&temp[MSG_BYTES], ct, CRYPTO_CIPHERTEXTBYTES); - shake256(ss, CRYPTO_BYTES, temp, CRYPTO_CIPHERTEXTBYTES + MSG_BYTES); - - return 0; -} - -int SIKE_P434_r2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) { - // SIKE's decapsulation - // Input: secret key sk (CRYPTO_SECRETKEYBYTES = MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes) - // ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes) - // Outputs: shared secret ss (CRYPTO_BYTES bytes) - - union { - unsigned char b[SECRETKEY_A_BYTES]; - digit_t d[SECRETKEY_A_BYTES/sizeof(digit_t)]; - } ephemeralsk_; - unsigned char jinvariant_[FP2_ENCODED_BYTES]; - unsigned char h_[MSG_BYTES]; - unsigned char c0_[CRYPTO_PUBLICKEYBYTES]; - unsigned char temp[CRYPTO_CIPHERTEXTBYTES + MSG_BYTES]; - - digit_t _sk[(SECRETKEY_B_BYTES / sizeof(digit_t)) + 1]; - memcpy(_sk, sk + MSG_BYTES, SECRETKEY_B_BYTES); - - // Decrypt - EphemeralSecretAgreement_B(_sk, ct, jinvariant_); - shake256(h_, MSG_BYTES, jinvariant_, FP2_ENCODED_BYTES); - for (int i = 0; i < MSG_BYTES; i++) { - temp[i] = ct[i + CRYPTO_PUBLICKEYBYTES] ^ h_[i]; - } - // Generate ephemeralsk_ <- G(m||pk) mod oA - memcpy(&temp[MSG_BYTES], &sk[MSG_BYTES + SECRETKEY_B_BYTES], CRYPTO_PUBLICKEYBYTES); - shake256(ephemeralsk_.b, SECRETKEY_A_BYTES, temp, CRYPTO_PUBLICKEYBYTES + MSG_BYTES); - - /* ephemeralsk_ is a union; the memory set here through .b will get accessed through the .d member later */ - /* cppcheck-suppress unreadVariable */ - /* cppcheck-suppress uninitvar */ - /* cppcheck-suppress unmatchedSuppression */ - ephemeralsk_.b[SECRETKEY_A_BYTES - 1] &= MASK_ALICE; - - // Generate shared secret ss <- H(m||ct) or output ss <- H(s||ct) - EphemeralKeyGeneration_A(ephemeralsk_.d, c0_); - if (memcmp(c0_, ct, CRYPTO_PUBLICKEYBYTES) != 0) { - memcpy(temp, sk, MSG_BYTES); - } - memcpy(&temp[MSG_BYTES], ct, CRYPTO_CIPHERTEXTBYTES); - shake256(ss, CRYPTO_BYTES, temp, CRYPTO_CIPHERTEXTBYTES + MSG_BYTES); - - return 0; -} +/******************************************************************************************** +* SIDH: an efficient supersingular isogeny cryptography library +* +* Abstract: supersingular isogeny key encapsulation (SIKE) protocol +*********************************************************************************************/ + +#include <string.h> +#include "../s2n_pq_random.h" +#include "fips202.h" +#include "utils/s2n_safety.h" +#include "tls/s2n_kem.h" + +int SIKE_P434_r2_crypto_kem_keypair(unsigned char *pk, unsigned char *sk) { + // SIKE's key generation + // Outputs: secret key sk (CRYPTO_SECRETKEYBYTES = MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes) + // public key pk (CRYPTO_PUBLICKEYBYTES bytes) + + digit_t _sk[(SECRETKEY_B_BYTES / sizeof(digit_t)) + 1]; + + // Generate lower portion of secret key sk <- s||SK + GUARD_AS_POSIX(s2n_get_random_bytes(sk, MSG_BYTES)); + GUARD(random_mod_order_B((unsigned char *)_sk)); + + // Generate public key pk + EphemeralKeyGeneration_B(_sk, pk); + + memcpy(sk + MSG_BYTES, _sk, SECRETKEY_B_BYTES); + + // Append public key pk to secret key sk + memcpy(&sk[MSG_BYTES + SECRETKEY_B_BYTES], pk, CRYPTO_PUBLICKEYBYTES); + + return 0; +} + +int SIKE_P434_r2_crypto_kem_enc(unsigned char *ct, unsigned char *ss, const unsigned char *pk) { + // SIKE's encapsulation + // Input: public key pk (CRYPTO_PUBLICKEYBYTES bytes) + // Outputs: shared secret ss (CRYPTO_BYTES bytes) + // ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes) + + union { + unsigned char b[SECRETKEY_A_BYTES]; + digit_t d[SECRETKEY_A_BYTES/sizeof(digit_t)]; + } ephemeralsk; + unsigned char jinvariant[FP2_ENCODED_BYTES]; + unsigned char h[MSG_BYTES]; + unsigned char temp[CRYPTO_CIPHERTEXTBYTES + MSG_BYTES]; + + // Generate ephemeralsk <- G(m||pk) mod oA + GUARD_AS_POSIX(s2n_get_random_bytes(temp, MSG_BYTES)); + memcpy(&temp[MSG_BYTES], pk, CRYPTO_PUBLICKEYBYTES); + shake256(ephemeralsk.b, SECRETKEY_A_BYTES, temp, CRYPTO_PUBLICKEYBYTES + MSG_BYTES); + + /* ephemeralsk is a union; the memory set here through .b will get accessed through the .d member later */ + /* cppcheck-suppress unreadVariable */ + /* cppcheck-suppress unmatchedSuppression */ + ephemeralsk.b[SECRETKEY_A_BYTES - 1] &= MASK_ALICE; + + // Encrypt + EphemeralKeyGeneration_A(ephemeralsk.d, ct); + EphemeralSecretAgreement_A(ephemeralsk.d, pk, jinvariant); + shake256(h, MSG_BYTES, jinvariant, FP2_ENCODED_BYTES); + for (int i = 0; i < MSG_BYTES; i++) { + ct[i + CRYPTO_PUBLICKEYBYTES] = temp[i] ^ h[i]; + } + // Generate shared secret ss <- H(m||ct) + memcpy(&temp[MSG_BYTES], ct, CRYPTO_CIPHERTEXTBYTES); + shake256(ss, CRYPTO_BYTES, temp, CRYPTO_CIPHERTEXTBYTES + MSG_BYTES); + + return 0; +} + +int SIKE_P434_r2_crypto_kem_dec(unsigned char *ss, const unsigned char *ct, const unsigned char *sk) { + // SIKE's decapsulation + // Input: secret key sk (CRYPTO_SECRETKEYBYTES = MSG_BYTES + SECRETKEY_B_BYTES + CRYPTO_PUBLICKEYBYTES bytes) + // ciphertext message ct (CRYPTO_CIPHERTEXTBYTES = CRYPTO_PUBLICKEYBYTES + MSG_BYTES bytes) + // Outputs: shared secret ss (CRYPTO_BYTES bytes) + + union { + unsigned char b[SECRETKEY_A_BYTES]; + digit_t d[SECRETKEY_A_BYTES/sizeof(digit_t)]; + } ephemeralsk_; + unsigned char jinvariant_[FP2_ENCODED_BYTES]; + unsigned char h_[MSG_BYTES]; + unsigned char c0_[CRYPTO_PUBLICKEYBYTES]; + unsigned char temp[CRYPTO_CIPHERTEXTBYTES + MSG_BYTES]; + + digit_t _sk[(SECRETKEY_B_BYTES / sizeof(digit_t)) + 1]; + memcpy(_sk, sk + MSG_BYTES, SECRETKEY_B_BYTES); + + // Decrypt + EphemeralSecretAgreement_B(_sk, ct, jinvariant_); + shake256(h_, MSG_BYTES, jinvariant_, FP2_ENCODED_BYTES); + for (int i = 0; i < MSG_BYTES; i++) { + temp[i] = ct[i + CRYPTO_PUBLICKEYBYTES] ^ h_[i]; + } + // Generate ephemeralsk_ <- G(m||pk) mod oA + memcpy(&temp[MSG_BYTES], &sk[MSG_BYTES + SECRETKEY_B_BYTES], CRYPTO_PUBLICKEYBYTES); + shake256(ephemeralsk_.b, SECRETKEY_A_BYTES, temp, CRYPTO_PUBLICKEYBYTES + MSG_BYTES); + + /* ephemeralsk_ is a union; the memory set here through .b will get accessed through the .d member later */ + /* cppcheck-suppress unreadVariable */ + /* cppcheck-suppress uninitvar */ + /* cppcheck-suppress unmatchedSuppression */ + ephemeralsk_.b[SECRETKEY_A_BYTES - 1] &= MASK_ALICE; + + // Generate shared secret ss <- H(m||ct) or output ss <- H(s||ct) + EphemeralKeyGeneration_A(ephemeralsk_.d, c0_); + if (memcmp(c0_, ct, CRYPTO_PUBLICKEYBYTES) != 0) { + memcpy(temp, sk, MSG_BYTES); + } + memcpy(&temp[MSG_BYTES], ct, CRYPTO_CIPHERTEXTBYTES); + shake256(ss, CRYPTO_BYTES, temp, CRYPTO_CIPHERTEXTBYTES + MSG_BYTES); + + return 0; +} diff --git a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/sikep434r2_fp_x64_asm.S b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/sikep434r2_fp_x64_asm.S index 831fc1b7fb..70973e0cde 100644 --- a/contrib/restricted/aws/s2n/pq-crypto/sike_r2/sikep434r2_fp_x64_asm.S +++ b/contrib/restricted/aws/s2n/pq-crypto/sike_r2/sikep434r2_fp_x64_asm.S @@ -1,962 +1,962 @@ -//******************************************************************************************* -// SIDH: an efficient supersingular isogeny cryptography library -// -// Abstract: field arithmetic in x64 assembly for P434 on Linux -//******************************************************************************************* - -.intel_syntax noprefix - -/* Requires bmi2 instruction set for mulx. adx instructions are optional, but preferred. */ - -// Registers that are used for parameter passing: -#define reg_p1 rdi -#define reg_p2 rsi -#define reg_p3 rdx - -// Define addition instructions -#ifdef S2N_ADX - -#define ADD1 adox -#define ADC1 adox -#define ADD2 adcx -#define ADC2 adcx - -#else // S2N_ADX - -#define ADD1 add -#define ADC1 adc -#define ADD2 add -#define ADC2 adc - -#endif // S2N_ADX - -// The constants below (asm_p434, asm_p434p1, and asm_p434x2) are duplicated from -// P434.c, and correspond to the arrays p434, p434p1, and p434x2. The values are -// idenctical; they are just represented here as standard (base 10) ints, instead -// of hex. If, for any reason, the constants are changed in one file, they should be -// updated in the other file as well. - -.text -.align 32 -.type asm_p434, @object -.size asm_p434, 56 -asm_p434: - .quad -1 - .quad -1 - .quad -1 - .quad -161717841442111489 - .quad 8918917783347572387 - .quad 7853257225132122198 - .quad 620258357900100 -.align 32 -.type asm_p434p1, @object -.size asm_p434p1, 56 -asm_p434p1: - .quad 0 - .quad 0 - .quad 0 - .quad -161717841442111488 - .quad 8918917783347572387 - .quad 7853257225132122198 - .quad 620258357900100 -.align 32 -.type asm_p434x2, @object -.size asm_p434x2, 56 -asm_p434x2: - .quad -2 - .quad -1 - .quad -1 - .quad -323435682884222977 - .quad -608908507014406841 - .quad -2740229623445307220 - .quad 1240516715800200 - -//*********************************************************************** -// Field addition -// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2] +//******************************************************************************************* +// SIDH: an efficient supersingular isogeny cryptography library +// +// Abstract: field arithmetic in x64 assembly for P434 on Linux +//******************************************************************************************* + +.intel_syntax noprefix + +/* Requires bmi2 instruction set for mulx. adx instructions are optional, but preferred. */ + +// Registers that are used for parameter passing: +#define reg_p1 rdi +#define reg_p2 rsi +#define reg_p3 rdx + +// Define addition instructions +#ifdef S2N_ADX + +#define ADD1 adox +#define ADC1 adox +#define ADD2 adcx +#define ADC2 adcx + +#else // S2N_ADX + +#define ADD1 add +#define ADC1 adc +#define ADD2 add +#define ADC2 adc + +#endif // S2N_ADX + +// The constants below (asm_p434, asm_p434p1, and asm_p434x2) are duplicated from +// P434.c, and correspond to the arrays p434, p434p1, and p434x2. The values are +// idenctical; they are just represented here as standard (base 10) ints, instead +// of hex. If, for any reason, the constants are changed in one file, they should be +// updated in the other file as well. + +.text +.align 32 +.type asm_p434, @object +.size asm_p434, 56 +asm_p434: + .quad -1 + .quad -1 + .quad -1 + .quad -161717841442111489 + .quad 8918917783347572387 + .quad 7853257225132122198 + .quad 620258357900100 +.align 32 +.type asm_p434p1, @object +.size asm_p434p1, 56 +asm_p434p1: + .quad 0 + .quad 0 + .quad 0 + .quad -161717841442111488 + .quad 8918917783347572387 + .quad 7853257225132122198 + .quad 620258357900100 +.align 32 +.type asm_p434x2, @object +.size asm_p434x2, 56 +asm_p434x2: + .quad -2 + .quad -1 + .quad -1 + .quad -323435682884222977 + .quad -608908507014406841 + .quad -2740229623445307220 + .quad 1240516715800200 + //*********************************************************************** -.global fpadd434_asm -fpadd434_asm: - push r12 - push r13 - push r14 - push r15 - push rbx - push rbp - - xor rax, rax - mov r8, [reg_p1] - mov r9, [reg_p1+8] - mov r10, [reg_p1+16] - mov r11, [reg_p1+24] - mov r12, [reg_p1+32] - mov r13, [reg_p1+40] - mov r14, [reg_p1+48] - add r8, [reg_p2] - adc r9, [reg_p2+8] - adc r10, [reg_p2+16] - adc r11, [reg_p2+24] - adc r12, [reg_p2+32] - adc r13, [reg_p2+40] - adc r14, [reg_p2+48] - - mov rbx, [rip+asm_p434x2] - sub r8, rbx - mov rcx, [rip+asm_p434x2+8] - sbb r9, rcx - sbb r10, rcx - mov rdi, [rip+asm_p434x2+24] - sbb r11, rdi - mov rsi, [rip+asm_p434x2+32] - sbb r12, rsi - mov rbp, [rip+asm_p434x2+40] - sbb r13, rbp - mov r15, [rip+asm_p434x2+48] - sbb r14, r15 - sbb rax, 0 - - and rbx, rax - and rcx, rax - and rdi, rax - and rsi, rax - and rbp, rax - and r15, rax - - add r8, rbx - adc r9, rcx - adc r10, rcx - adc r11, rdi - adc r12, rsi - adc r13, rbp - adc r14, r15 - mov [reg_p3], r8 - mov [reg_p3+8], r9 - mov [reg_p3+16], r10 - mov [reg_p3+24], r11 - mov [reg_p3+32], r12 - mov [reg_p3+40], r13 - mov [reg_p3+48], r14 - - pop rbp - pop rbx - pop r15 - pop r14 - pop r13 - pop r12 - ret - -//*********************************************************************** -// Field subtraction -// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] -//*********************************************************************** -.global fpsub434_asm -fpsub434_asm: - push r12 - push r13 - push r14 - - xor rax, rax - mov r8, [reg_p1] - mov r9, [reg_p1+8] - mov r10, [reg_p1+16] - mov r11, [reg_p1+24] - mov r12, [reg_p1+32] - mov r13, [reg_p1+40] - mov r14, [reg_p1+48] - sub r8, [reg_p2] - sbb r9, [reg_p2+8] - sbb r10, [reg_p2+16] - sbb r11, [reg_p2+24] - sbb r12, [reg_p2+32] - sbb r13, [reg_p2+40] - sbb r14, [reg_p2+48] - sbb rax, 0 - - mov rcx, [rip+asm_p434x2] - mov rdi, [rip+asm_p434x2+8] - mov rsi, [rip+asm_p434x2+24] - and rcx, rax - and rdi, rax - and rsi, rax - add r8, rcx - adc r9, rdi - adc r10, rdi - adc r11, rsi - mov [reg_p3], r8 - mov [reg_p3+8], r9 - mov [reg_p3+16], r10 +// Field addition +// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2] +//*********************************************************************** +.global fpadd434_asm +fpadd434_asm: + push r12 + push r13 + push r14 + push r15 + push rbx + push rbp + + xor rax, rax + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + add r8, [reg_p2] + adc r9, [reg_p2+8] + adc r10, [reg_p2+16] + adc r11, [reg_p2+24] + adc r12, [reg_p2+32] + adc r13, [reg_p2+40] + adc r14, [reg_p2+48] + + mov rbx, [rip+asm_p434x2] + sub r8, rbx + mov rcx, [rip+asm_p434x2+8] + sbb r9, rcx + sbb r10, rcx + mov rdi, [rip+asm_p434x2+24] + sbb r11, rdi + mov rsi, [rip+asm_p434x2+32] + sbb r12, rsi + mov rbp, [rip+asm_p434x2+40] + sbb r13, rbp + mov r15, [rip+asm_p434x2+48] + sbb r14, r15 + sbb rax, 0 + + and rbx, rax + and rcx, rax + and rdi, rax + and rsi, rax + and rbp, rax + and r15, rax + + add r8, rbx + adc r9, rcx + adc r10, rcx + adc r11, rdi + adc r12, rsi + adc r13, rbp + adc r14, r15 + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 mov [reg_p3+24], r11 - setc cl - - mov r8, [rip+asm_p434x2+32] - mov rdi, [rip+asm_p434x2+40] - mov rsi, [rip+asm_p434x2+48] - and r8, rax - and rdi, rax - and rsi, rax - bt rcx, 0 - adc r12, r8 - adc r13, rdi - adc r14, rsi - mov [reg_p3+32], r12 - mov [reg_p3+40], r13 - mov [reg_p3+48], r14 - - pop r14 - pop r13 - pop r12 - ret - -///////////////////////////////////////////////////////////////// MACRO -// Schoolbook integer multiplication, a full row at a time -// Inputs: memory pointers M0 and M1 -// Outputs: memory pointer C -// Temps: regs T0:T9 -///////////////////////////////////////////////////////////////// - -#ifdef S2N_ADX -.macro MUL192_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6 - mov rdx, \M0 - mulx \T0, \T1, \M1 // T0:T1 = A0*B0 - mov \C, \T1 // C0_final - mulx \T1, \T2, 8\M1 // T1:T2 = A0*B1 - xor rax, rax - adox \T0, \T2 - mulx \T2, \T3, 16\M1 // T2:T3 = A0*B2 - adox \T1, \T3 - - mov rdx, 8\M0 - mulx \T3, \T4, \M1 // T3:T4 = A1*B0 - adox \T2, rax - xor rax, rax - mulx \T5, \T6, 8\M1 // T5:T6 = A1*B1 - adox \T4, \T0 - mov 8\C, \T4 // C1_final - adcx \T3, \T6 - mulx \T6, \T0, 16\M1 // T6:T0 = A1*B2 - adox \T3, \T1 - adcx \T5, \T0 - adcx \T6, rax - adox \T5, \T2 - - mov rdx, 16\M0 - mulx \T1, \T0, \M1 // T1:T0 = A2*B0 - adox \T6, rax - xor rax, rax - mulx \T4, \T2, 8\M1 // T4:T2 = A2*B1 - adox \T0, \T3 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + + pop rbp + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + ret + +//*********************************************************************** +// Field subtraction +// Operation: c [reg_p3] = a [reg_p1] - b [reg_p2] +//*********************************************************************** +.global fpsub434_asm +fpsub434_asm: + push r12 + push r13 + push r14 + + xor rax, rax + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + mov r13, [reg_p1+40] + mov r14, [reg_p1+48] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + sbb r14, [reg_p2+48] + sbb rax, 0 + + mov rcx, [rip+asm_p434x2] + mov rdi, [rip+asm_p434x2+8] + mov rsi, [rip+asm_p434x2+24] + and rcx, rax + and rdi, rax + and rsi, rax + add r8, rcx + adc r9, rdi + adc r10, rdi + adc r11, rsi + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + setc cl + + mov r8, [rip+asm_p434x2+32] + mov rdi, [rip+asm_p434x2+40] + mov rsi, [rip+asm_p434x2+48] + and r8, rax + and rdi, rax + and rsi, rax + bt rcx, 0 + adc r12, r8 + adc r13, rdi + adc r14, rsi + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], r14 + + pop r14 + pop r13 + pop r12 + ret + +///////////////////////////////////////////////////////////////// MACRO +// Schoolbook integer multiplication, a full row at a time +// Inputs: memory pointers M0 and M1 +// Outputs: memory pointer C +// Temps: regs T0:T9 +///////////////////////////////////////////////////////////////// + +#ifdef S2N_ADX +.macro MUL192_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6 + mov rdx, \M0 + mulx \T0, \T1, \M1 // T0:T1 = A0*B0 + mov \C, \T1 // C0_final + mulx \T1, \T2, 8\M1 // T1:T2 = A0*B1 + xor rax, rax + adox \T0, \T2 + mulx \T2, \T3, 16\M1 // T2:T3 = A0*B2 + adox \T1, \T3 + + mov rdx, 8\M0 + mulx \T3, \T4, \M1 // T3:T4 = A1*B0 + adox \T2, rax + xor rax, rax + mulx \T5, \T6, 8\M1 // T5:T6 = A1*B1 + adox \T4, \T0 + mov 8\C, \T4 // C1_final + adcx \T3, \T6 + mulx \T6, \T0, 16\M1 // T6:T0 = A1*B2 + adox \T3, \T1 + adcx \T5, \T0 + adcx \T6, rax + adox \T5, \T2 + + mov rdx, 16\M0 + mulx \T1, \T0, \M1 // T1:T0 = A2*B0 + adox \T6, rax + xor rax, rax + mulx \T4, \T2, 8\M1 // T4:T2 = A2*B1 + adox \T0, \T3 + mov 16\C, \T0 // C2_final + adcx \T1, \T5 + mulx \T0, \T3, 16\M1 // T0:T3 = A2*B2 + adcx \T4, \T6 + adcx \T0, rax + adox \T1, \T2 + adox \T3, \T4 + adox \T0, rax + mov 24\C, \T1 // C3_final + mov 32\C, \T3 // C4_final + mov 40\C, \T0 // C5_final +.endm + +.macro MUL256_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9 + mov rdx, \M0 + mulx \T0, \T1, \M1 // T0:T1 = A0*B0 + mov \C, \T1 // C0_final + mulx \T1, \T2, 8\M1 // T1:T2 = A0*B1 + xor rax, rax + adox \T0, \T2 + mulx \T2, \T3, 16\M1 // T2:T3 = A0*B2 + adox \T1, \T3 + mulx \T3, \T4, 24\M1 // T3:T4 = A0*B3 + adox \T2, \T4 + + mov rdx, 8\M0 + mulx \T5, \T4, \M1 // T5:T4 = A1*B0 + adox \T3, rax + xor rax, rax + mulx \T6, \T7, 8\M1 // T6:T7 = A1*B1 + adox \T4, \T0 + mov 8\C, \T4 // C1_final + adcx \T5, \T7 + mulx \T7, \T8, 16\M1 // T7:T8 = A1*B2 + adcx \T6, \T8 + adox \T5, \T1 + mulx \T8, \T9, 24\M1 // T8:T9 = A1*B3 + adcx \T7, \T9 + adcx \T8, rax + adox \T6, \T2 + + mov rdx, 16\M0 + mulx \T1, \T0, \M1 // T1:T0 = A2*B0 + adox \T7, \T3 + adox \T8, rax + xor rax, rax + mulx \T2, \T3, 8\M1 // T2:T3 = A2*B1 + adox \T0, \T5 + mov 16\C, \T0 // C2_final + adcx \T1, \T3 + mulx \T3, \T4, 16\M1 // T3:T4 = A2*B2 + adcx \T2, \T4 + adox \T1, \T6 + mulx \T4,\T9, 24\M1 // T3:T4 = A2*B3 + adcx \T3, \T9 + mov rdx, 24\M0 + adcx \T4, rax + + adox \T2, \T7 + adox \T3, \T8 + adox \T4, rax + + mulx \T5, \T0, \M1 // T5:T0 = A3*B0 + xor rax, rax + mulx \T6, \T7, 8\M1 // T6:T7 = A3*B1 + adcx \T5, \T7 + adox \T1, \T0 + mulx \T7, \T8, 16\M1 // T7:T8 = A3*B2 + adcx \T6, \T8 + adox \T2, \T5 + mulx \T8, \T9, 24\M1 // T8:T9 = A3*B3 + adcx \T7, \T9 + adcx \T8, rax + + adox \T3, \T6 + adox \T4, \T7 + adox \T8, rax + mov 24\C, \T1 // C3_final + mov 32\C, \T2 // C4_final + mov 40\C, \T3 // C5_final + mov 48\C, \T4 // C6_final + mov 56\C, \T8 // C7_final +.endm + +#else // S2N_ADX + +.macro MUL192_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6 + mov rdx, \M0 + mulx \T0, \T1, \M1 // T0:T1 = A0*B0 + mov \C, \T1 // C0_final + mulx \T1, \T2, 8\M1 // T1:T2 = A0*B1 + add \T0, \T2 + mulx \T2, \T3, 16\M1 // T2:T3 = A0*B2 + adc \T1, \T3 + + mov rdx, 8\M0 + mulx \T3, \T4, \M1 // T3:T4 = A1*B0 + adc \T2, 0 + mulx \T5, \T6, 8\M1 // T5:T6 = A1*B1 + add \T4, \T0 + mov 8\C, \T4 // C1_final + adc \T3, \T1 + adc \T5, \T2 + mulx \T0, \T1, 16\M1 // T0:T1 = A1*B2 + adc \T0, 0 + + add \T3, \T6 + adc \T5, \T1 + adc \T0, 0 + + mov rdx, 16\M0 + mulx \T1, \T2, \M1 // T1:T2 = A2*B0 + add \T2, \T3 + mov 16\C, \T2 // C2_final + mulx \T4, \T6, 8\M1 // T4:T6 = A2*B1 + adc \T1, \T5 + adc \T0, \T4 + mulx \T2, \T3, 16\M1 // T0:T3 = A2*B2 + adc \T2, 0 + add \T1, \T6 + adc \T0, \T3 + adc \T2, 0 + mov 24\C, \T1 // C3_final + mov 32\C, \T0 // C4_final + mov 40\C, \T2 // C5_final +.endm + +.macro MUL256_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9 + mov rdx, \M0 + mulx \T0, \T1, \M1 // T0:T1 = A0*B0 + mov \C, \T1 // C0_final + mulx \T1, \T2, 8\M1 // T1:T2 = A0*B1 + add \T0, \T2 + mulx \T2, \T3, 16\M1 // T2:T3 = A0*B2 + adc \T1, \T3 + mulx \T3, \T4, 24\M1 // T3:T4 = A0*B3 + adc \T2, \T4 + mov rdx, 8\M0 + adc \T3, 0 + + mulx \T5, \T4, \M1 // T5:T4 = A1*B0 + mulx \T6, \T7, 8\M1 // T6:T7 = A1*B1 + add \T5, \T7 + mulx \T7, \T8, 16\M1 // T7:T8 = A1*B2 + adc \T6, \T8 + mulx \T8, \T9, 24\M1 // T8:T9 = A1*B3 + adc \T7, \T9 + adc \T8, 0 + + add \T4, \T0 + mov 8\C, \T4 // C1_final + adc \T5, \T1 + adc \T6, \T2 + adc \T7, \T3 + mov rdx, 16\M0 + adc \T8, 0 + + mulx \T1, \T0, \M1 // T1:T0 = A2*B0 + mulx \T2, \T3, 8\M1 // T2:T3 = A2*B1 + add \T1, \T3 + mulx \T3, \T4, 16\M1 // T3:T4 = A2*B2 + adc \T2, \T4 + mulx \T4,\T9, 24\M1 // T3:T4 = A2*B3 + adc \T3, \T9 + mov rdx, 24\M0 + adc \T4, 0 + + add \T0, \T5 mov 16\C, \T0 // C2_final - adcx \T1, \T5 - mulx \T0, \T3, 16\M1 // T0:T3 = A2*B2 - adcx \T4, \T6 - adcx \T0, rax - adox \T1, \T2 - adox \T3, \T4 - adox \T0, rax - mov 24\C, \T1 // C3_final - mov 32\C, \T3 // C4_final - mov 40\C, \T0 // C5_final + adc \T1, \T6 + adc \T2, \T7 + adc \T3, \T8 + adc \T4, 0 + + mulx \T5, \T0, \M1 // T5:T0 = A3*B0 + mulx \T6, \T7, 8\M1 // T6:T7 = A3*B1 + add \T5, \T7 + mulx \T7, \T8, 16\M1 // T7:T8 = A3*B2 + adc \T6, \T8 + mulx \T8, \T9, 24\M1 // T8:T9 = A3*B3 + adc \T7, \T9 + adc \T8, 0 + + add \T1, \T0 + mov 24\C, \T1 // C3_final + adc \T2, \T5 + mov 32\C, \T2 // C4_final + adc \T3, \T6 + mov 40\C, \T3 // C5_final + adc \T4, \T7 + mov 48\C, \T4 // C6_final + adc \T8, 0 + mov 56\C, \T8 // C7_final .endm - -.macro MUL256_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9 - mov rdx, \M0 - mulx \T0, \T1, \M1 // T0:T1 = A0*B0 - mov \C, \T1 // C0_final - mulx \T1, \T2, 8\M1 // T1:T2 = A0*B1 - xor rax, rax - adox \T0, \T2 - mulx \T2, \T3, 16\M1 // T2:T3 = A0*B2 - adox \T1, \T3 - mulx \T3, \T4, 24\M1 // T3:T4 = A0*B3 - adox \T2, \T4 - - mov rdx, 8\M0 - mulx \T5, \T4, \M1 // T5:T4 = A1*B0 - adox \T3, rax - xor rax, rax - mulx \T6, \T7, 8\M1 // T6:T7 = A1*B1 - adox \T4, \T0 - mov 8\C, \T4 // C1_final - adcx \T5, \T7 - mulx \T7, \T8, 16\M1 // T7:T8 = A1*B2 - adcx \T6, \T8 - adox \T5, \T1 - mulx \T8, \T9, 24\M1 // T8:T9 = A1*B3 - adcx \T7, \T9 - adcx \T8, rax - adox \T6, \T2 - - mov rdx, 16\M0 - mulx \T1, \T0, \M1 // T1:T0 = A2*B0 - adox \T7, \T3 - adox \T8, rax +#endif // S2N_ADX + +//***************************************************************************** +// 434-bit multiplication using Karatsuba (one level), schoolbook (one level) +//***************************************************************************** +.global mul434_asm +mul434_asm: + push r12 + push r13 + push r14 + push r15 + mov rcx, reg_p3 + + // r8-r11 <- AH + AL, rax <- mask xor rax, rax - mulx \T2, \T3, 8\M1 // T2:T3 = A2*B1 - adox \T0, \T5 - mov 16\C, \T0 // C2_final - adcx \T1, \T3 - mulx \T3, \T4, 16\M1 // T3:T4 = A2*B2 - adcx \T2, \T4 - adox \T1, \T6 - mulx \T4,\T9, 24\M1 // T3:T4 = A2*B3 - adcx \T3, \T9 - mov rdx, 24\M0 - adcx \T4, rax - - adox \T2, \T7 - adox \T3, \T8 - adox \T4, rax - - mulx \T5, \T0, \M1 // T5:T0 = A3*B0 + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + push rbx + push rbp + sub rsp, 96 + add r8, [reg_p1+32] + adc r9, [reg_p1+40] + adc r10, [reg_p1+48] + adc r11, 0 + sbb rax, 0 + mov [rsp], r8 + mov [rsp+8], r9 + mov [rsp+16], r10 + mov [rsp+24], r11 + + // r12-r15 <- BH + BL, rbx <- mask + xor rbx, rbx + mov r12, [reg_p2] + mov r13, [reg_p2+8] + mov r14, [reg_p2+16] + mov r15, [reg_p2+24] + add r12, [reg_p2+32] + adc r13, [reg_p2+40] + adc r14, [reg_p2+48] + adc r15, 0 + sbb rbx, 0 + mov [rsp+32], r12 + mov [rsp+40], r13 + mov [rsp+48], r14 + mov [rsp+56], r15 + + // r12-r15 <- masked (BH + BL) + and r12, rax + and r13, rax + and r14, rax + and r15, rax + + // r8-r11 <- masked (AH + AL) + and r8, rbx + and r9, rbx + and r10, rbx + and r11, rbx + + // r8-r11 <- masked (AH + AL) + masked (BH + BL) + add r8, r12 + adc r9, r13 + adc r10, r14 + adc r11, r15 + mov [rsp+64], r8 + mov [rsp+72], r9 + mov [rsp+80], r10 + mov [rsp+88], r11 + + // [rsp] <- (AH+AL) x (BH+BL), low part + MUL256_SCHOOL [rsp], [rsp+32], [rsp], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rbp + + // [rcx] <- AL x BL + MUL256_SCHOOL [reg_p1], [reg_p2], [rcx], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rbp // Result C0-C3 + + // [rcx+64] <- AH x BH + MUL192_SCHOOL [reg_p1+32], [reg_p2+32], [rcx+64], r8, r9, r10, r11, r12, r13, r14 + + // r8-r11 <- (AH+AL) x (BH+BL), final step + mov r8, [rsp+64] + mov r9, [rsp+72] + mov r10, [rsp+80] + mov r11, [rsp+88] + mov rax, [rsp+32] + add r8, rax + mov rax, [rsp+40] + adc r9, rax + mov rax, [rsp+48] + adc r10, rax + mov rax, [rsp+56] + adc r11, rax + + // [rsp], x3-x5 <- (AH+AL) x (BH+BL) - ALxBL + mov r12, [rsp] + mov r13, [rsp+8] + mov r14, [rsp+16] + mov r15, [rsp+24] + sub r12, [rcx] + sbb r13, [rcx+8] + sbb r14, [rcx+16] + sbb r15, [rcx+24] + sbb r8, [rcx+32] + sbb r9, [rcx+40] + sbb r10, [rcx+48] + sbb r11, [rcx+56] + + // r8-r15 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH + sub r12, [rcx+64] + sbb r13, [rcx+72] + sbb r14, [rcx+80] + sbb r15, [rcx+88] + sbb r8, [rcx+96] + sbb r9, [rcx+104] + sbb r10, 0 + sbb r11, 0 + + add r12, [rcx+32] + mov [rcx+32], r12 // Result C4-C7 + adc r13, [rcx+40] + mov [rcx+40], r13 + adc r14, [rcx+48] + mov [rcx+48], r14 + adc r15, [rcx+56] + mov [rcx+56], r15 + adc r8, [rcx+64] + mov [rcx+64], r8 // Result C8-C15 + adc r9, [rcx+72] + mov [rcx+72], r9 + adc r10, [rcx+80] + mov [rcx+80], r10 + adc r11, [rcx+88] + mov [rcx+88], r11 + mov r12, [rcx+96] + adc r12, 0 + mov [rcx+96], r12 + mov r13, [rcx+104] + adc r13, 0 + mov [rcx+104], r13 + + add rsp, 96 + pop rbp + pop rbx + pop r15 + pop r14 + pop r13 + pop r12 + ret + +///////////////////////////////////////////////////////////////// MACRO +// Schoolbook integer multiplication +// Inputs: memory pointers M0 and M1 +// Outputs: regs T0:T5 +// Temps: regs T7:T6 +///////////////////////////////////////////////////////////////// +.macro MUL64x256_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5 + mov rdx, \M0 + mulx \T1, \T0, \M1 // T0 <- C0_final + mulx \T2, \T4, 8\M1 xor rax, rax - mulx \T6, \T7, 8\M1 // T6:T7 = A3*B1 - adcx \T5, \T7 - adox \T1, \T0 - mulx \T7, \T8, 16\M1 // T7:T8 = A3*B2 - adcx \T6, \T8 - adox \T2, \T5 - mulx \T8, \T9, 24\M1 // T8:T9 = A3*B3 - adcx \T7, \T9 - adcx \T8, rax - - adox \T3, \T6 - adox \T4, \T7 - adox \T8, rax - mov 24\C, \T1 // C3_final - mov 32\C, \T2 // C4_final - mov 40\C, \T3 // C5_final - mov 48\C, \T4 // C6_final - mov 56\C, \T8 // C7_final + mulx \T3, \T5, 16\M1 + ADD1 \T1, \T4 // T1 <- C1_final + ADC1 \T2, \T5 // T2 <- C2_final + mulx \T4, \T5, 24\M1 + ADC1 \T3, \T5 // T3 <- C3_final + ADC1 \T4, rax // T4 <- C4_final .endm - -#else // S2N_ADX - -.macro MUL192_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6 - mov rdx, \M0 - mulx \T0, \T1, \M1 // T0:T1 = A0*B0 - mov \C, \T1 // C0_final - mulx \T1, \T2, 8\M1 // T1:T2 = A0*B1 - add \T0, \T2 - mulx \T2, \T3, 16\M1 // T2:T3 = A0*B2 - adc \T1, \T3 - - mov rdx, 8\M0 - mulx \T3, \T4, \M1 // T3:T4 = A1*B0 - adc \T2, 0 - mulx \T5, \T6, 8\M1 // T5:T6 = A1*B1 - add \T4, \T0 - mov 8\C, \T4 // C1_final - adc \T3, \T1 - adc \T5, \T2 - mulx \T0, \T1, 16\M1 // T0:T1 = A1*B2 - adc \T0, 0 - - add \T3, \T6 - adc \T5, \T1 - adc \T0, 0 - - mov rdx, 16\M0 - mulx \T1, \T2, \M1 // T1:T2 = A2*B0 - add \T2, \T3 - mov 16\C, \T2 // C2_final - mulx \T4, \T6, 8\M1 // T4:T6 = A2*B1 - adc \T1, \T5 - adc \T0, \T4 - mulx \T2, \T3, 16\M1 // T0:T3 = A2*B2 - adc \T2, 0 - add \T1, \T6 - adc \T0, \T3 - adc \T2, 0 - mov 24\C, \T1 // C3_final - mov 32\C, \T0 // C4_final - mov 40\C, \T2 // C5_final + +#ifdef S2N_ADX +.macro MUL128x256_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6 + mov rdx, \M0 + mulx \T1, \T0, \M1 // T0 <- C0_final + mulx \T2, \T4, 8\M1 + xor rax, rax + mulx \T3, \T5, 16\M1 + ADD1 \T1, \T4 + ADC1 \T2, \T5 + mulx \T4, \T5, 24\M1 + ADC1 \T3, \T5 + ADC1 \T4, rax + + xor rax, rax + mov rdx, 8\M0 + mulx \T6, \T5, \M1 + ADD2 \T1, \T5 // T1 <- C1_final + ADC2 \T2, \T6 + mulx \T5, \T6, 8\M1 + ADC2 \T3, \T5 + ADD1 \T2, \T6 + mulx \T5, \T6, 16\M1 + ADC2 \T4, \T5 + ADC1 \T3, \T6 + mulx \T5, \T6, 24\M1 + ADC2 \T5, rax + ADC1 \T4, \T6 + ADC1 \T5, rax .endm - -.macro MUL256_SCHOOL M0, M1, C, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9 - mov rdx, \M0 - mulx \T0, \T1, \M1 // T0:T1 = A0*B0 - mov \C, \T1 // C0_final - mulx \T1, \T2, 8\M1 // T1:T2 = A0*B1 - add \T0, \T2 - mulx \T2, \T3, 16\M1 // T2:T3 = A0*B2 - adc \T1, \T3 - mulx \T3, \T4, 24\M1 // T3:T4 = A0*B3 - adc \T2, \T4 - mov rdx, 8\M0 - adc \T3, 0 - - mulx \T5, \T4, \M1 // T5:T4 = A1*B0 - mulx \T6, \T7, 8\M1 // T6:T7 = A1*B1 - add \T5, \T7 - mulx \T7, \T8, 16\M1 // T7:T8 = A1*B2 - adc \T6, \T8 - mulx \T8, \T9, 24\M1 // T8:T9 = A1*B3 - adc \T7, \T9 - adc \T8, 0 - - add \T4, \T0 - mov 8\C, \T4 // C1_final - adc \T5, \T1 - adc \T6, \T2 - adc \T7, \T3 - mov rdx, 16\M0 - adc \T8, 0 - - mulx \T1, \T0, \M1 // T1:T0 = A2*B0 - mulx \T2, \T3, 8\M1 // T2:T3 = A2*B1 - add \T1, \T3 - mulx \T3, \T4, 16\M1 // T3:T4 = A2*B2 - adc \T2, \T4 - mulx \T4,\T9, 24\M1 // T3:T4 = A2*B3 - adc \T3, \T9 - mov rdx, 24\M0 - adc \T4, 0 - - add \T0, \T5 - mov 16\C, \T0 // C2_final - adc \T1, \T6 - adc \T2, \T7 - adc \T3, \T8 - adc \T4, 0 - - mulx \T5, \T0, \M1 // T5:T0 = A3*B0 - mulx \T6, \T7, 8\M1 // T6:T7 = A3*B1 - add \T5, \T7 - mulx \T7, \T8, 16\M1 // T7:T8 = A3*B2 - adc \T6, \T8 - mulx \T8, \T9, 24\M1 // T8:T9 = A3*B3 - adc \T7, \T9 - adc \T8, 0 - - add \T1, \T0 - mov 24\C, \T1 // C3_final - adc \T2, \T5 - mov 32\C, \T2 // C4_final - adc \T3, \T6 - mov 40\C, \T3 // C5_final - adc \T4, \T7 - mov 48\C, \T4 // C6_final - adc \T8, 0 - mov 56\C, \T8 // C7_final -.endm -#endif // S2N_ADX - -//***************************************************************************** -// 434-bit multiplication using Karatsuba (one level), schoolbook (one level) -//***************************************************************************** -.global mul434_asm -mul434_asm: - push r12 - push r13 - push r14 - push r15 - mov rcx, reg_p3 - - // r8-r11 <- AH + AL, rax <- mask - xor rax, rax - mov r8, [reg_p1] - mov r9, [reg_p1+8] - mov r10, [reg_p1+16] - mov r11, [reg_p1+24] - push rbx - push rbp - sub rsp, 96 - add r8, [reg_p1+32] - adc r9, [reg_p1+40] - adc r10, [reg_p1+48] - adc r11, 0 - sbb rax, 0 - mov [rsp], r8 - mov [rsp+8], r9 - mov [rsp+16], r10 - mov [rsp+24], r11 - - // r12-r15 <- BH + BL, rbx <- mask - xor rbx, rbx - mov r12, [reg_p2] - mov r13, [reg_p2+8] - mov r14, [reg_p2+16] - mov r15, [reg_p2+24] - add r12, [reg_p2+32] - adc r13, [reg_p2+40] - adc r14, [reg_p2+48] - adc r15, 0 - sbb rbx, 0 - mov [rsp+32], r12 - mov [rsp+40], r13 - mov [rsp+48], r14 - mov [rsp+56], r15 - - // r12-r15 <- masked (BH + BL) - and r12, rax - and r13, rax - and r14, rax - and r15, rax - - // r8-r11 <- masked (AH + AL) - and r8, rbx - and r9, rbx - and r10, rbx - and r11, rbx - - // r8-r11 <- masked (AH + AL) + masked (BH + BL) - add r8, r12 - adc r9, r13 - adc r10, r14 - adc r11, r15 - mov [rsp+64], r8 - mov [rsp+72], r9 - mov [rsp+80], r10 - mov [rsp+88], r11 - - // [rsp] <- (AH+AL) x (BH+BL), low part - MUL256_SCHOOL [rsp], [rsp+32], [rsp], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rbp - - // [rcx] <- AL x BL - MUL256_SCHOOL [reg_p1], [reg_p2], [rcx], r8, r9, r10, r11, r12, r13, r14, r15, rbx, rbp // Result C0-C3 - - // [rcx+64] <- AH x BH - MUL192_SCHOOL [reg_p1+32], [reg_p2+32], [rcx+64], r8, r9, r10, r11, r12, r13, r14 - - // r8-r11 <- (AH+AL) x (BH+BL), final step - mov r8, [rsp+64] - mov r9, [rsp+72] - mov r10, [rsp+80] - mov r11, [rsp+88] - mov rax, [rsp+32] - add r8, rax - mov rax, [rsp+40] - adc r9, rax - mov rax, [rsp+48] - adc r10, rax - mov rax, [rsp+56] - adc r11, rax - - // [rsp], x3-x5 <- (AH+AL) x (BH+BL) - ALxBL - mov r12, [rsp] - mov r13, [rsp+8] - mov r14, [rsp+16] - mov r15, [rsp+24] - sub r12, [rcx] - sbb r13, [rcx+8] - sbb r14, [rcx+16] - sbb r15, [rcx+24] - sbb r8, [rcx+32] - sbb r9, [rcx+40] - sbb r10, [rcx+48] - sbb r11, [rcx+56] - - // r8-r15 <- (AH+AL) x (BH+BL) - ALxBL - AHxBH - sub r12, [rcx+64] - sbb r13, [rcx+72] - sbb r14, [rcx+80] - sbb r15, [rcx+88] - sbb r8, [rcx+96] - sbb r9, [rcx+104] - sbb r10, 0 - sbb r11, 0 - - add r12, [rcx+32] - mov [rcx+32], r12 // Result C4-C7 - adc r13, [rcx+40] - mov [rcx+40], r13 - adc r14, [rcx+48] - mov [rcx+48], r14 - adc r15, [rcx+56] - mov [rcx+56], r15 - adc r8, [rcx+64] - mov [rcx+64], r8 // Result C8-C15 - adc r9, [rcx+72] - mov [rcx+72], r9 - adc r10, [rcx+80] - mov [rcx+80], r10 - adc r11, [rcx+88] - mov [rcx+88], r11 - mov r12, [rcx+96] - adc r12, 0 - mov [rcx+96], r12 - mov r13, [rcx+104] - adc r13, 0 - mov [rcx+104], r13 - - add rsp, 96 - pop rbp - pop rbx - pop r15 - pop r14 - pop r13 - pop r12 - ret - -///////////////////////////////////////////////////////////////// MACRO -// Schoolbook integer multiplication -// Inputs: memory pointers M0 and M1 -// Outputs: regs T0:T5 -// Temps: regs T7:T6 -///////////////////////////////////////////////////////////////// -.macro MUL64x256_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5 - mov rdx, \M0 - mulx \T1, \T0, \M1 // T0 <- C0_final - mulx \T2, \T4, 8\M1 - xor rax, rax - mulx \T3, \T5, 16\M1 - ADD1 \T1, \T4 // T1 <- C1_final - ADC1 \T2, \T5 // T2 <- C2_final - mulx \T4, \T5, 24\M1 - ADC1 \T3, \T5 // T3 <- C3_final - ADC1 \T4, rax // T4 <- C4_final -.endm - -#ifdef S2N_ADX -.macro MUL128x256_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6 - mov rdx, \M0 - mulx \T1, \T0, \M1 // T0 <- C0_final - mulx \T2, \T4, 8\M1 - xor rax, rax - mulx \T3, \T5, 16\M1 - ADD1 \T1, \T4 - ADC1 \T2, \T5 - mulx \T4, \T5, 24\M1 - ADC1 \T3, \T5 - ADC1 \T4, rax - - xor rax, rax - mov rdx, 8\M0 - mulx \T6, \T5, \M1 - ADD2 \T1, \T5 // T1 <- C1_final - ADC2 \T2, \T6 - mulx \T5, \T6, 8\M1 - ADC2 \T3, \T5 - ADD1 \T2, \T6 - mulx \T5, \T6, 16\M1 - ADC2 \T4, \T5 - ADC1 \T3, \T6 - mulx \T5, \T6, 24\M1 - ADC2 \T5, rax - ADC1 \T4, \T6 - ADC1 \T5, rax -.endm - -#else // S2N_ADX - -.macro MUL128x256_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6 - mov rdx, \M0 - mulx \T1, \T0, \M1 // T0 <- C0_final - mulx \T2, \T4, 8\M1 - mulx \T3, \T5, 16\M1 - add \T1, \T4 - adc \T2, \T5 - mulx \T4, \T5, 24\M1 - adc \T3, \T5 - adc \T4, 0 - - mov rdx, 8\M0 - mulx \T6, \T5, \M1 - add \T1, \T5 // T1 <- C1_final - adc \T2, \T6 - mulx \T5, \T6, 8\M1 - adc \T3, \T5 - mulx \T5, rax, 16\M1 - adc \T4, \T5 - mulx \T5, rdx, 24\M1 - adc \T5, 0 - add \T2, \T6 - adc \T3, rax - adc \T4, rdx + +#else // S2N_ADX + +.macro MUL128x256_SCHOOL M0, M1, T0, T1, T2, T3, T4, T5, T6 + mov rdx, \M0 + mulx \T1, \T0, \M1 // T0 <- C0_final + mulx \T2, \T4, 8\M1 + mulx \T3, \T5, 16\M1 + add \T1, \T4 + adc \T2, \T5 + mulx \T4, \T5, 24\M1 + adc \T3, \T5 + adc \T4, 0 + + mov rdx, 8\M0 + mulx \T6, \T5, \M1 + add \T1, \T5 // T1 <- C1_final + adc \T2, \T6 + mulx \T5, \T6, 8\M1 + adc \T3, \T5 + mulx \T5, rax, 16\M1 + adc \T4, \T5 + mulx \T5, rdx, 24\M1 adc \T5, 0 -.endm -#endif // S2N_ADX - -//************************************************************************************** -// Montgomery reduction -// Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015 -// Operation: c [reg_p2] = a [reg_p1] -// NOTE: a=c is not allowed + add \T2, \T6 + adc \T3, rax + adc \T4, rdx + adc \T5, 0 +.endm +#endif // S2N_ADX + //************************************************************************************** -.global rdc434_asm -rdc434_asm: - push r12 - push r13 - - // a[0-1] x p434p1_nz --> result: r8:r13 - MUL128x256_SCHOOL [reg_p1], [rip+asm_p434p1+24], r8, r9, r10, r11, r12, r13, rcx - - xor rcx, rcx - add r8, [reg_p1+24] - adc r9, [reg_p1+32] - adc r10, [reg_p1+40] - adc r11, [reg_p1+48] - adc r12, [reg_p1+56] - adc r13, [reg_p1+64] - adc rcx, [reg_p1+72] - mov [reg_p1+24], r8 - mov [reg_p1+32], r9 - mov [reg_p1+40], r10 - mov [reg_p1+48], r11 - mov [reg_p1+56], r12 - mov [reg_p1+64], r13 - mov [reg_p1+72], rcx - mov r8, [reg_p1+80] - mov r9, [reg_p1+88] - mov r10, [reg_p1+96] - mov r11, [reg_p1+104] - adc r8, 0 - adc r9, 0 - adc r10, 0 - adc r11, 0 - mov [reg_p1+80], r8 - mov [reg_p1+88], r9 - mov [reg_p1+96], r10 - mov [reg_p1+104], r11 - - // a[2-3] x p434p1_nz --> result: r8:r13 - MUL128x256_SCHOOL [reg_p1+16], [rip+asm_p434p1+24], r8, r9, r10, r11, r12, r13, rcx - - xor rcx, rcx - add r8, [reg_p1+40] - adc r9, [reg_p1+48] - adc r10, [reg_p1+56] - adc r11, [reg_p1+64] - adc r12, [reg_p1+72] - adc r13, [reg_p1+80] - adc rcx, [reg_p1+88] - mov [reg_p1+40], r8 - mov [reg_p1+48], r9 - mov [reg_p1+56], r10 - mov [reg_p1+64], r11 - mov [reg_p1+72], r12 - mov [reg_p1+80], r13 - mov [reg_p1+88], rcx - mov r8, [reg_p1+96] - mov r9, [reg_p1+104] - adc r8, 0 +// Montgomery reduction +// Based on method described in Faz-Hernandez et al. https://eprint.iacr.org/2017/1015 +// Operation: c [reg_p2] = a [reg_p1] +// NOTE: a=c is not allowed +//************************************************************************************** +.global rdc434_asm +rdc434_asm: + push r12 + push r13 + + // a[0-1] x p434p1_nz --> result: r8:r13 + MUL128x256_SCHOOL [reg_p1], [rip+asm_p434p1+24], r8, r9, r10, r11, r12, r13, rcx + + xor rcx, rcx + add r8, [reg_p1+24] + adc r9, [reg_p1+32] + adc r10, [reg_p1+40] + adc r11, [reg_p1+48] + adc r12, [reg_p1+56] + adc r13, [reg_p1+64] + adc rcx, [reg_p1+72] + mov [reg_p1+24], r8 + mov [reg_p1+32], r9 + mov [reg_p1+40], r10 + mov [reg_p1+48], r11 + mov [reg_p1+56], r12 + mov [reg_p1+64], r13 + mov [reg_p1+72], rcx + mov r8, [reg_p1+80] + mov r9, [reg_p1+88] + mov r10, [reg_p1+96] + mov r11, [reg_p1+104] + adc r8, 0 adc r9, 0 - mov [reg_p1+96], r8 - mov [reg_p1+104], r9 - - // a[4-5] x p434p1_nz --> result: r8:r13 - MUL128x256_SCHOOL [reg_p1+32], [rip+asm_p434p1+24], r8, r9, r10, r11, r12, r13, rcx - - xor rcx, rcx - add r8, [reg_p1+56] - adc r9, [reg_p1+64] - adc r10, [reg_p1+72] - adc r11, [reg_p1+80] - adc r12, [reg_p1+88] - adc r13, [reg_p1+96] - adc rcx, [reg_p1+104] - mov [reg_p2], r8 // Final result c0-c1 - mov [reg_p2+8], r9 - mov [reg_p1+72], r10 - mov [reg_p1+80], r11 - mov [reg_p1+88], r12 - mov [reg_p1+96], r13 - mov [reg_p1+104], rcx - - // a[6-7] x p434p1_nz --> result: r8:r12 - MUL64x256_SCHOOL [reg_p1+48], [rip+asm_p434p1+24], r8, r9, r10, r11, r12, r13 - - // Final result c2:c6 - add r8, [reg_p1+72] - adc r9, [reg_p1+80] - adc r10, [reg_p1+88] - adc r11, [reg_p1+96] - adc r12, [reg_p1+104] - mov [reg_p2+16], r8 - mov [reg_p2+24], r9 - mov [reg_p2+32], r10 - mov [reg_p2+40], r11 - mov [reg_p2+48], r12 - - pop r13 - pop r12 - ret - -//*********************************************************************** -// 434-bit multiprecision addition -// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2] + adc r10, 0 + adc r11, 0 + mov [reg_p1+80], r8 + mov [reg_p1+88], r9 + mov [reg_p1+96], r10 + mov [reg_p1+104], r11 + + // a[2-3] x p434p1_nz --> result: r8:r13 + MUL128x256_SCHOOL [reg_p1+16], [rip+asm_p434p1+24], r8, r9, r10, r11, r12, r13, rcx + + xor rcx, rcx + add r8, [reg_p1+40] + adc r9, [reg_p1+48] + adc r10, [reg_p1+56] + adc r11, [reg_p1+64] + adc r12, [reg_p1+72] + adc r13, [reg_p1+80] + adc rcx, [reg_p1+88] + mov [reg_p1+40], r8 + mov [reg_p1+48], r9 + mov [reg_p1+56], r10 + mov [reg_p1+64], r11 + mov [reg_p1+72], r12 + mov [reg_p1+80], r13 + mov [reg_p1+88], rcx + mov r8, [reg_p1+96] + mov r9, [reg_p1+104] + adc r8, 0 + adc r9, 0 + mov [reg_p1+96], r8 + mov [reg_p1+104], r9 + + // a[4-5] x p434p1_nz --> result: r8:r13 + MUL128x256_SCHOOL [reg_p1+32], [rip+asm_p434p1+24], r8, r9, r10, r11, r12, r13, rcx + + xor rcx, rcx + add r8, [reg_p1+56] + adc r9, [reg_p1+64] + adc r10, [reg_p1+72] + adc r11, [reg_p1+80] + adc r12, [reg_p1+88] + adc r13, [reg_p1+96] + adc rcx, [reg_p1+104] + mov [reg_p2], r8 // Final result c0-c1 + mov [reg_p2+8], r9 + mov [reg_p1+72], r10 + mov [reg_p1+80], r11 + mov [reg_p1+88], r12 + mov [reg_p1+96], r13 + mov [reg_p1+104], rcx + + // a[6-7] x p434p1_nz --> result: r8:r12 + MUL64x256_SCHOOL [reg_p1+48], [rip+asm_p434p1+24], r8, r9, r10, r11, r12, r13 + + // Final result c2:c6 + add r8, [reg_p1+72] + adc r9, [reg_p1+80] + adc r10, [reg_p1+88] + adc r11, [reg_p1+96] + adc r12, [reg_p1+104] + mov [reg_p2+16], r8 + mov [reg_p2+24], r9 + mov [reg_p2+32], r10 + mov [reg_p2+40], r11 + mov [reg_p2+48], r12 + + pop r13 + pop r12 + ret + +//*********************************************************************** +// 434-bit multiprecision addition +// Operation: c [reg_p3] = a [reg_p1] + b [reg_p2] +//*********************************************************************** +.global mp_add434_asm +mp_add434_asm: + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + add r8, [reg_p2] + adc r9, [reg_p2+8] + adc r10, [reg_p2+16] + adc r11, [reg_p2+24] + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + + mov r8, [reg_p1+32] + mov r9, [reg_p1+40] + mov r10, [reg_p1+48] + adc r8, [reg_p2+32] + adc r9, [reg_p2+40] + adc r10, [reg_p2+48] + mov [reg_p3+32], r8 + mov [reg_p3+40], r9 + mov [reg_p3+48], r10 + ret + //*********************************************************************** -.global mp_add434_asm -mp_add434_asm: - mov r8, [reg_p1] - mov r9, [reg_p1+8] - mov r10, [reg_p1+16] - mov r11, [reg_p1+24] - add r8, [reg_p2] - adc r9, [reg_p2+8] - adc r10, [reg_p2+16] - adc r11, [reg_p2+24] - mov [reg_p3], r8 - mov [reg_p3+8], r9 - mov [reg_p3+16], r10 - mov [reg_p3+24], r11 - - mov r8, [reg_p1+32] - mov r9, [reg_p1+40] - mov r10, [reg_p1+48] - adc r8, [reg_p2+32] - adc r9, [reg_p2+40] - adc r10, [reg_p2+48] - mov [reg_p3+32], r8 - mov [reg_p3+40], r9 - mov [reg_p3+48], r10 - ret - -//*********************************************************************** -// 2x434-bit multiprecision subtraction/addition -// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p434*2^448 +// 2x434-bit multiprecision subtraction/addition +// Operation: c [x2] = a [x0] - b [x1]. If c < 0, add p434*2^448 +//*********************************************************************** +.global mp_subadd434x2_asm +mp_subadd434x2_asm: + push r12 + push r13 + push r14 + push r15 + xor rax, rax + mov r8, [reg_p1] + mov r9, [reg_p1+8] + mov r10, [reg_p1+16] + mov r11, [reg_p1+24] + mov r12, [reg_p1+32] + sub r8, [reg_p2] + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + + mov r8, [reg_p1+40] + mov r9, [reg_p1+48] + mov r10, [reg_p1+56] + mov r11, [reg_p1+64] + mov r12, [reg_p1+72] + sbb r8, [reg_p2+40] + sbb r9, [reg_p2+48] + sbb r10, [reg_p2+56] + sbb r11, [reg_p2+64] + sbb r12, [reg_p2+72] + mov [reg_p3+40], r8 + mov [reg_p3+48], r9 + mov [reg_p3+56], r10 + + mov r13, [reg_p1+80] + mov r14, [reg_p1+88] + mov r15, [reg_p1+96] + mov rcx, [reg_p1+104] + sbb r13, [reg_p2+80] + sbb r14, [reg_p2+88] + sbb r15, [reg_p2+96] + sbb rcx, [reg_p2+104] + sbb rax, 0 + + // Add p434 anded with the mask in rax + mov r8, [rip+asm_p434] + mov r9, [rip+asm_p434+24] + mov r10, [rip+asm_p434+32] + mov rdi, [rip+asm_p434+40] + mov rsi, [rip+asm_p434+48] + and r8, rax + and r9, rax + and r10, rax + and rdi, rax + and rsi, rax + mov rax, [reg_p3+56] + add rax, r8 + adc r11, r8 + adc r12, r8 + adc r13, r9 + adc r14, r10 + adc r15, rdi + adc rcx, rsi + + mov [reg_p3+56], rax + mov [reg_p3+64], r11 + mov [reg_p3+72], r12 + mov [reg_p3+80], r13 + mov [reg_p3+88], r14 + mov [reg_p3+96], r15 + mov [reg_p3+104], rcx + pop r15 + pop r14 + pop r13 + pop r12 + ret + //*********************************************************************** -.global mp_subadd434x2_asm -mp_subadd434x2_asm: - push r12 +// Double 2x434-bit multiprecision subtraction +// Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2] +//*********************************************************************** +.global mp_dblsub434x2_asm +mp_dblsub434x2_asm: + push r12 push r13 - push r14 - push r15 - xor rax, rax - mov r8, [reg_p1] - mov r9, [reg_p1+8] - mov r10, [reg_p1+16] - mov r11, [reg_p1+24] - mov r12, [reg_p1+32] + + xor rax, rax + mov r8, [reg_p3] + mov r9, [reg_p3+8] + mov r10, [reg_p3+16] + mov r11, [reg_p3+24] + mov r12, [reg_p3+32] + mov r13, [reg_p3+40] + mov rcx, [reg_p3+48] + sub r8, [reg_p1] + sbb r9, [reg_p1+8] + sbb r10, [reg_p1+16] + sbb r11, [reg_p1+24] + sbb r12, [reg_p1+32] + sbb r13, [reg_p1+40] + sbb rcx, [reg_p1+48] + adc rax, 0 sub r8, [reg_p2] - sbb r9, [reg_p2+8] - sbb r10, [reg_p2+16] - sbb r11, [reg_p2+24] - sbb r12, [reg_p2+32] - mov [reg_p3], r8 - mov [reg_p3+8], r9 - mov [reg_p3+16], r10 - mov [reg_p3+24], r11 - mov [reg_p3+32], r12 - - mov r8, [reg_p1+40] - mov r9, [reg_p1+48] - mov r10, [reg_p1+56] - mov r11, [reg_p1+64] - mov r12, [reg_p1+72] - sbb r8, [reg_p2+40] - sbb r9, [reg_p2+48] - sbb r10, [reg_p2+56] - sbb r11, [reg_p2+64] - sbb r12, [reg_p2+72] - mov [reg_p3+40], r8 - mov [reg_p3+48], r9 - mov [reg_p3+56], r10 - - mov r13, [reg_p1+80] - mov r14, [reg_p1+88] - mov r15, [reg_p1+96] - mov rcx, [reg_p1+104] - sbb r13, [reg_p2+80] - sbb r14, [reg_p2+88] - sbb r15, [reg_p2+96] - sbb rcx, [reg_p2+104] - sbb rax, 0 - - // Add p434 anded with the mask in rax - mov r8, [rip+asm_p434] - mov r9, [rip+asm_p434+24] - mov r10, [rip+asm_p434+32] - mov rdi, [rip+asm_p434+40] - mov rsi, [rip+asm_p434+48] - and r8, rax - and r9, rax - and r10, rax - and rdi, rax - and rsi, rax - mov rax, [reg_p3+56] - add rax, r8 - adc r11, r8 - adc r12, r8 - adc r13, r9 - adc r14, r10 - adc r15, rdi - adc rcx, rsi - - mov [reg_p3+56], rax - mov [reg_p3+64], r11 - mov [reg_p3+72], r12 - mov [reg_p3+80], r13 - mov [reg_p3+88], r14 - mov [reg_p3+96], r15 - mov [reg_p3+104], rcx - pop r15 - pop r14 - pop r13 - pop r12 - ret - -//*********************************************************************** -// Double 2x434-bit multiprecision subtraction -// Operation: c [reg_p3] = c [reg_p3] - a [reg_p1] - b [reg_p2] -//*********************************************************************** -.global mp_dblsub434x2_asm -mp_dblsub434x2_asm: - push r12 - push r13 - - xor rax, rax - mov r8, [reg_p3] - mov r9, [reg_p3+8] - mov r10, [reg_p3+16] - mov r11, [reg_p3+24] - mov r12, [reg_p3+32] - mov r13, [reg_p3+40] - mov rcx, [reg_p3+48] - sub r8, [reg_p1] - sbb r9, [reg_p1+8] - sbb r10, [reg_p1+16] - sbb r11, [reg_p1+24] - sbb r12, [reg_p1+32] - sbb r13, [reg_p1+40] - sbb rcx, [reg_p1+48] - adc rax, 0 - sub r8, [reg_p2] - sbb r9, [reg_p2+8] - sbb r10, [reg_p2+16] - sbb r11, [reg_p2+24] - sbb r12, [reg_p2+32] - sbb r13, [reg_p2+40] - sbb rcx, [reg_p2+48] - adc rax, 0 - mov [reg_p3], r8 - mov [reg_p3+8], r9 - mov [reg_p3+16], r10 - mov [reg_p3+24], r11 - mov [reg_p3+32], r12 - mov [reg_p3+40], r13 - mov [reg_p3+48], rcx - - mov r8, [reg_p3+56] - mov r9, [reg_p3+64] - mov r10, [reg_p3+72] - mov r11, [reg_p3+80] - mov r12, [reg_p3+88] - mov r13, [reg_p3+96] - mov rcx, [reg_p3+104] - sub r8, rax - sbb r8, [reg_p1+56] - sbb r9, [reg_p1+64] - sbb r10, [reg_p1+72] - sbb r11, [reg_p1+80] - sbb r12, [reg_p1+88] - sbb r13, [reg_p1+96] - sbb rcx, [reg_p1+104] - sub r8, [reg_p2+56] - sbb r9, [reg_p2+64] - sbb r10, [reg_p2+72] - sbb r11, [reg_p2+80] - sbb r12, [reg_p2+88] - sbb r13, [reg_p2+96] - sbb rcx, [reg_p2+104] - mov [reg_p3+56], r8 - mov [reg_p3+64], r9 - mov [reg_p3+72], r10 - mov [reg_p3+80], r11 - mov [reg_p3+88], r12 - mov [reg_p3+96], r13 - mov [reg_p3+104], rcx - - pop r13 - pop r12 - ret + sbb r9, [reg_p2+8] + sbb r10, [reg_p2+16] + sbb r11, [reg_p2+24] + sbb r12, [reg_p2+32] + sbb r13, [reg_p2+40] + sbb rcx, [reg_p2+48] + adc rax, 0 + mov [reg_p3], r8 + mov [reg_p3+8], r9 + mov [reg_p3+16], r10 + mov [reg_p3+24], r11 + mov [reg_p3+32], r12 + mov [reg_p3+40], r13 + mov [reg_p3+48], rcx + + mov r8, [reg_p3+56] + mov r9, [reg_p3+64] + mov r10, [reg_p3+72] + mov r11, [reg_p3+80] + mov r12, [reg_p3+88] + mov r13, [reg_p3+96] + mov rcx, [reg_p3+104] + sub r8, rax + sbb r8, [reg_p1+56] + sbb r9, [reg_p1+64] + sbb r10, [reg_p1+72] + sbb r11, [reg_p1+80] + sbb r12, [reg_p1+88] + sbb r13, [reg_p1+96] + sbb rcx, [reg_p1+104] + sub r8, [reg_p2+56] + sbb r9, [reg_p2+64] + sbb r10, [reg_p2+72] + sbb r11, [reg_p2+80] + sbb r12, [reg_p2+88] + sbb r13, [reg_p2+96] + sbb rcx, [reg_p2+104] + mov [reg_p3+56], r8 + mov [reg_p3+64], r9 + mov [reg_p3+72], r10 + mov [reg_p3+80], r11 + mov [reg_p3+88], r12 + mov [reg_p3+96], r13 + mov [reg_p3+104], rcx + + pop r13 + pop r12 + ret |