diff options
author | e-sidorov <e-sidorov@yandex-team.ru> | 2022-02-10 16:46:06 +0300 |
---|---|---|
committer | Daniil Cherednik <dcherednik@yandex-team.ru> | 2022-02-10 16:46:06 +0300 |
commit | ce2ad6f6a6f6025e37fb7f8debe7cefd3aa2307c (patch) | |
tree | 1a2c5ffcf89eb53ecd79dbc9bc0a195c27404d0c /library/cpp/digest/argonish/internal/argon2/argon2_base.h | |
parent | 1ec091f8998d76a211c6015ba6865a73b29d676a (diff) | |
download | ydb-ce2ad6f6a6f6025e37fb7f8debe7cefd3aa2307c.tar.gz |
Restoring authorship annotation for <e-sidorov@yandex-team.ru>. Commit 2 of 2.
Diffstat (limited to 'library/cpp/digest/argonish/internal/argon2/argon2_base.h')
-rw-r--r-- | library/cpp/digest/argonish/internal/argon2/argon2_base.h | 748 |
1 files changed, 374 insertions, 374 deletions
diff --git a/library/cpp/digest/argonish/internal/argon2/argon2_base.h b/library/cpp/digest/argonish/internal/argon2/argon2_base.h index 8de5b6bb42..2385cc947c 100644 --- a/library/cpp/digest/argonish/internal/argon2/argon2_base.h +++ b/library/cpp/digest/argonish/internal/argon2/argon2_base.h @@ -1,388 +1,388 @@ -#pragma once - -#include <util/generic/yexception.h> +#pragma once + +#include <util/generic/yexception.h> #include <library/cpp/digest/argonish/argon2.h> #include <library/cpp/digest/argonish/internal/blake2b/blake2b.h> #include <library/cpp/threading/poor_man_openmp/thread_helper.h> - -namespace NArgonish { - const ui32 ARGON2_PREHASH_DIGEST_LENGTH = 64; + +namespace NArgonish { + const ui32 ARGON2_PREHASH_DIGEST_LENGTH = 64; const ui32 ARGON2_SECRET_MAX_LENGTH = 64; - const ui32 ARGON2_PREHASH_SEED_LENGTH = 72; - const ui32 ARGON2_BLOCK_SIZE = 1024; - const ui32 ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8; - const ui32 ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16; - const ui32 ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32; - const ui32 ARGON2_ADDRESSES_IN_BLOCK = 128; - const ui32 ARGON2_SYNC_POINTS = 4; - const ui32 ARGON2_SALT_MIN_LEN = 8; - const ui32 ARGON2_MIN_OUTLEN = 4; - - struct TBlock { - ui64 V[ARGON2_QWORDS_IN_BLOCK]; - }; - - template <EInstructionSet instructionSet, ui32 mcost, ui32 threads> - class TArgon2: public IArgon2Base { - public: - TArgon2(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen) - : SecretLen_(keylen) - , Tcost_(tcost) - , Atype_(atype) - { - if (SecretLen_) - memcpy(Secret_, key, keylen); - } - - virtual ~TArgon2() override { - if (SecretLen_) { - SecureZeroMemory_(Secret_, SecretLen_); - SecretLen_ = 0; - } - } - - virtual void Hash(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, - ui8* out, ui32 outlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override { - TArrayHolder<TBlock> buffer(new TBlock[MemoryBlocks_]); - InternalHash_(buffer.Get(), pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen); - } - - virtual bool Verify(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, - const ui8* hash, ui32 hashlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override { - TArrayHolder<ui8> hashResult(new ui8[hashlen]); - Hash(pwd, pwdlen, salt, saltlen, hashResult.Get(), hashlen, aad, aadlen); - - return SecureCompare_(hash, hashResult.Get(), hashlen); - } - - virtual void HashWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, - const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen, - const ui8* aad = nullptr, ui32 aadlen = 0) const override { - if (memory == nullptr || mlen < sizeof(TBlock) * MemoryBlocks_) - ythrow yexception() << "memory is null or its size is not enough"; - - InternalHash_((TBlock*)memory, pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen); - } - - virtual bool VerifyWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, - const ui8* salt, ui32 saltlen, const ui8* hash, ui32 hashlen, - const ui8* aad = nullptr, ui32 aadlen = 0) const override { - TArrayHolder<ui8> hashResult(new ui8[hashlen]); - HashWithCustomMemory(memory, mlen, pwd, pwdlen, salt, saltlen, hashResult.Get(), hashlen, aad, aadlen); - - return SecureCompare_(hashResult.Get(), hash, hashlen); - } - - virtual size_t GetMemorySize() const override { - return MemoryBlocks_ * sizeof(TBlock); - } - - protected: /* Constants */ - ui8 Secret_[ARGON2_SECRET_MAX_LENGTH] = {0}; - ui32 SecretLen_ = 0; - ui32 Tcost_; - EArgon2Type Atype_; - - static constexpr ui32 Lanes_ = threads; - static constexpr ui32 MemoryBlocks_ = (mcost >= 2 * ARGON2_SYNC_POINTS * Lanes_) ? (mcost - mcost % (Lanes_ * ARGON2_SYNC_POINTS)) : 2 * ARGON2_SYNC_POINTS * Lanes_; - static constexpr ui32 SegmentLength_ = MemoryBlocks_ / (Lanes_ * ARGON2_SYNC_POINTS); - static constexpr ui32 LaneLength_ = SegmentLength_ * ARGON2_SYNC_POINTS; - - protected: /* Prototypes */ - virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, - TBlock* nextBlock, bool withXor) const = 0; - - virtual void CopyBlock_(TBlock* dst, const TBlock* src) const = 0; - virtual void XorBlock_(TBlock* dst, const TBlock* src) const = 0; - - protected: /* Static functions */ - static bool SecureCompare_(const ui8* buffer1, const ui8* buffer2, ui32 len) { - bool result = true; - for (ui32 i = 0; i < len; ++i) { - result &= (buffer1[i] == buffer2[i]); - } - return result; - } - - static void SecureZeroMemory_(void* src, size_t len) { - static void* (*const volatile memset_v)(void*, int, size_t) = &memset; - memset_v(src, 0, len); - } - - static void Store32_(ui32 value, void* mem) { - *((ui32*)mem) = value; - } - - static void Blake2BHash64_(ui8 out[BLAKE2B_OUTBYTES], const ui8 in[BLAKE2B_OUTBYTES]) { - TBlake2B<instructionSet> hash(BLAKE2B_OUTBYTES); - hash.Update(in, BLAKE2B_OUTBYTES); - hash.Final(out, BLAKE2B_OUTBYTES); - } - - static void ExpandBlockhash_(ui8 expanded[ARGON2_BLOCK_SIZE], const ui8 blockhash[ARGON2_PREHASH_SEED_LENGTH]) { - ui8 out_buffer[BLAKE2B_OUTBYTES]; - ui8 in_buffer[BLAKE2B_OUTBYTES]; - const ui32 HALF_OUT_BYTES = BLAKE2B_OUTBYTES / 2; - const ui32 HASH_BLOCKS_COUNT = ((ARGON2_BLOCK_SIZE / HALF_OUT_BYTES)); - - TBlake2B<instructionSet> hash(BLAKE2B_OUTBYTES); - hash.Update(ARGON2_BLOCK_SIZE); - hash.Update(blockhash, ARGON2_PREHASH_SEED_LENGTH); - hash.Final(out_buffer, BLAKE2B_OUTBYTES); - - memcpy(expanded, out_buffer, HALF_OUT_BYTES); - - for (ui32 i = 1; i < HASH_BLOCKS_COUNT - 2; ++i) { - memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); - Blake2BHash64_(out_buffer, in_buffer); - memcpy(expanded + (i * HALF_OUT_BYTES), out_buffer, HALF_OUT_BYTES); - } - - Blake2BHash64_(in_buffer, out_buffer); - memcpy(expanded + HALF_OUT_BYTES * (HASH_BLOCKS_COUNT - 2), in_buffer, BLAKE2B_OUTBYTES); - } - - static void Blake2BLong_(ui8* out, ui32 outlen, const ui8* in, ui32 inlen) { - if (outlen < BLAKE2B_OUTBYTES) { - TBlake2B<instructionSet> hash(outlen); - hash.Update(outlen); - hash.Update(in, inlen); - hash.Final(out, outlen); - } else { - ui8 out_buffer[BLAKE2B_OUTBYTES]; - ui8 in_buffer[BLAKE2B_OUTBYTES]; - ui32 toproduce = outlen - BLAKE2B_OUTBYTES / 2; - + const ui32 ARGON2_PREHASH_SEED_LENGTH = 72; + const ui32 ARGON2_BLOCK_SIZE = 1024; + const ui32 ARGON2_QWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 8; + const ui32 ARGON2_OWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 16; + const ui32 ARGON2_HWORDS_IN_BLOCK = ARGON2_BLOCK_SIZE / 32; + const ui32 ARGON2_ADDRESSES_IN_BLOCK = 128; + const ui32 ARGON2_SYNC_POINTS = 4; + const ui32 ARGON2_SALT_MIN_LEN = 8; + const ui32 ARGON2_MIN_OUTLEN = 4; + + struct TBlock { + ui64 V[ARGON2_QWORDS_IN_BLOCK]; + }; + + template <EInstructionSet instructionSet, ui32 mcost, ui32 threads> + class TArgon2: public IArgon2Base { + public: + TArgon2(EArgon2Type atype, ui32 tcost, const ui8* key, ui32 keylen) + : SecretLen_(keylen) + , Tcost_(tcost) + , Atype_(atype) + { + if (SecretLen_) + memcpy(Secret_, key, keylen); + } + + virtual ~TArgon2() override { + if (SecretLen_) { + SecureZeroMemory_(Secret_, SecretLen_); + SecretLen_ = 0; + } + } + + virtual void Hash(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, + ui8* out, ui32 outlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override { + TArrayHolder<TBlock> buffer(new TBlock[MemoryBlocks_]); + InternalHash_(buffer.Get(), pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen); + } + + virtual bool Verify(const ui8* pwd, ui32 pwdlen, const ui8* salt, ui32 saltlen, + const ui8* hash, ui32 hashlen, const ui8* aad = nullptr, ui32 aadlen = 0) const override { + TArrayHolder<ui8> hashResult(new ui8[hashlen]); + Hash(pwd, pwdlen, salt, saltlen, hashResult.Get(), hashlen, aad, aadlen); + + return SecureCompare_(hash, hashResult.Get(), hashlen); + } + + virtual void HashWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, + const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen, + const ui8* aad = nullptr, ui32 aadlen = 0) const override { + if (memory == nullptr || mlen < sizeof(TBlock) * MemoryBlocks_) + ythrow yexception() << "memory is null or its size is not enough"; + + InternalHash_((TBlock*)memory, pwd, pwdlen, salt, saltlen, out, outlen, aad, aadlen); + } + + virtual bool VerifyWithCustomMemory(ui8* memory, size_t mlen, const ui8* pwd, ui32 pwdlen, + const ui8* salt, ui32 saltlen, const ui8* hash, ui32 hashlen, + const ui8* aad = nullptr, ui32 aadlen = 0) const override { + TArrayHolder<ui8> hashResult(new ui8[hashlen]); + HashWithCustomMemory(memory, mlen, pwd, pwdlen, salt, saltlen, hashResult.Get(), hashlen, aad, aadlen); + + return SecureCompare_(hashResult.Get(), hash, hashlen); + } + + virtual size_t GetMemorySize() const override { + return MemoryBlocks_ * sizeof(TBlock); + } + + protected: /* Constants */ + ui8 Secret_[ARGON2_SECRET_MAX_LENGTH] = {0}; + ui32 SecretLen_ = 0; + ui32 Tcost_; + EArgon2Type Atype_; + + static constexpr ui32 Lanes_ = threads; + static constexpr ui32 MemoryBlocks_ = (mcost >= 2 * ARGON2_SYNC_POINTS * Lanes_) ? (mcost - mcost % (Lanes_ * ARGON2_SYNC_POINTS)) : 2 * ARGON2_SYNC_POINTS * Lanes_; + static constexpr ui32 SegmentLength_ = MemoryBlocks_ / (Lanes_ * ARGON2_SYNC_POINTS); + static constexpr ui32 LaneLength_ = SegmentLength_ * ARGON2_SYNC_POINTS; + + protected: /* Prototypes */ + virtual void FillBlock_(const TBlock* prevBlock, const TBlock* refBlock, + TBlock* nextBlock, bool withXor) const = 0; + + virtual void CopyBlock_(TBlock* dst, const TBlock* src) const = 0; + virtual void XorBlock_(TBlock* dst, const TBlock* src) const = 0; + + protected: /* Static functions */ + static bool SecureCompare_(const ui8* buffer1, const ui8* buffer2, ui32 len) { + bool result = true; + for (ui32 i = 0; i < len; ++i) { + result &= (buffer1[i] == buffer2[i]); + } + return result; + } + + static void SecureZeroMemory_(void* src, size_t len) { + static void* (*const volatile memset_v)(void*, int, size_t) = &memset; + memset_v(src, 0, len); + } + + static void Store32_(ui32 value, void* mem) { + *((ui32*)mem) = value; + } + + static void Blake2BHash64_(ui8 out[BLAKE2B_OUTBYTES], const ui8 in[BLAKE2B_OUTBYTES]) { + TBlake2B<instructionSet> hash(BLAKE2B_OUTBYTES); + hash.Update(in, BLAKE2B_OUTBYTES); + hash.Final(out, BLAKE2B_OUTBYTES); + } + + static void ExpandBlockhash_(ui8 expanded[ARGON2_BLOCK_SIZE], const ui8 blockhash[ARGON2_PREHASH_SEED_LENGTH]) { + ui8 out_buffer[BLAKE2B_OUTBYTES]; + ui8 in_buffer[BLAKE2B_OUTBYTES]; + const ui32 HALF_OUT_BYTES = BLAKE2B_OUTBYTES / 2; + const ui32 HASH_BLOCKS_COUNT = ((ARGON2_BLOCK_SIZE / HALF_OUT_BYTES)); + + TBlake2B<instructionSet> hash(BLAKE2B_OUTBYTES); + hash.Update(ARGON2_BLOCK_SIZE); + hash.Update(blockhash, ARGON2_PREHASH_SEED_LENGTH); + hash.Final(out_buffer, BLAKE2B_OUTBYTES); + + memcpy(expanded, out_buffer, HALF_OUT_BYTES); + + for (ui32 i = 1; i < HASH_BLOCKS_COUNT - 2; ++i) { + memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); + Blake2BHash64_(out_buffer, in_buffer); + memcpy(expanded + (i * HALF_OUT_BYTES), out_buffer, HALF_OUT_BYTES); + } + + Blake2BHash64_(in_buffer, out_buffer); + memcpy(expanded + HALF_OUT_BYTES * (HASH_BLOCKS_COUNT - 2), in_buffer, BLAKE2B_OUTBYTES); + } + + static void Blake2BLong_(ui8* out, ui32 outlen, const ui8* in, ui32 inlen) { + if (outlen < BLAKE2B_OUTBYTES) { + TBlake2B<instructionSet> hash(outlen); + hash.Update(outlen); + hash.Update(in, inlen); + hash.Final(out, outlen); + } else { + ui8 out_buffer[BLAKE2B_OUTBYTES]; + ui8 in_buffer[BLAKE2B_OUTBYTES]; + ui32 toproduce = outlen - BLAKE2B_OUTBYTES / 2; + TBlake2B<instructionSet> hash1(BLAKE2B_OUTBYTES); hash1.Update(outlen); hash1.Update(in, inlen); hash1.Final(out_buffer, BLAKE2B_OUTBYTES); - - memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); - out += BLAKE2B_OUTBYTES / 2; - - while (toproduce > BLAKE2B_OUTBYTES) { - memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); + + memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); + out += BLAKE2B_OUTBYTES / 2; + + while (toproduce > BLAKE2B_OUTBYTES) { + memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); TBlake2B<instructionSet> hash2(BLAKE2B_OUTBYTES); hash2.Update(in_buffer, BLAKE2B_OUTBYTES); hash2.Final(out_buffer, BLAKE2B_OUTBYTES); - memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); - out += BLAKE2B_OUTBYTES / 2; - toproduce -= BLAKE2B_OUTBYTES / 2; - } - - memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); - { + memcpy(out, out_buffer, BLAKE2B_OUTBYTES / 2); + out += BLAKE2B_OUTBYTES / 2; + toproduce -= BLAKE2B_OUTBYTES / 2; + } + + memcpy(in_buffer, out_buffer, BLAKE2B_OUTBYTES); + { TBlake2B<instructionSet> hash3(toproduce); hash3.Update(in_buffer, BLAKE2B_OUTBYTES); hash3.Final(out_buffer, toproduce); - memcpy(out, out_buffer, toproduce); - } - } - } - - static void InitBlockValue_(TBlock* b, ui8 in) { - memset(b->V, in, sizeof(b->V)); - } - - protected: /* Functions */ - void InternalHash_(TBlock* memory, const ui8* pwd, ui32 pwdlen, - const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen, - const ui8* aad, ui32 aadlen) const { - /* - * all parameters checks are in proxy objects - */ - - Initialize_(memory, outlen, pwd, pwdlen, salt, saltlen, aad, aadlen); - FillMemoryBlocks_(memory); - Finalize_(memory, out, outlen); - } - - void InitialHash_(ui8 blockhash[ARGON2_PREHASH_DIGEST_LENGTH], - ui32 outlen, const ui8* pwd, ui32 pwdlen, - const ui8* salt, ui32 saltlen, const ui8* aad, ui32 aadlen) const { - TBlake2B<instructionSet> hash(ARGON2_PREHASH_DIGEST_LENGTH); - /* lanes, but lanes == threads */ - hash.Update(Lanes_); - /* outlen */ - hash.Update(outlen); - /* m_cost */ - hash.Update(mcost); - /* t_cost */ - hash.Update(Tcost_); - /* version */ - hash.Update(0x00000013); - /* Argon2 type */ - hash.Update((ui32)Atype_); - /* pwdlen */ - hash.Update(pwdlen); - /* pwd */ - hash.Update(pwd, pwdlen); - /* saltlen */ - hash.Update(saltlen); - /* salt */ - if (saltlen) - hash.Update(salt, saltlen); - /* secret */ - hash.Update(SecretLen_); - if (SecretLen_) - hash.Update((void*)Secret_, SecretLen_); - /* aadlen */ - hash.Update(aadlen); - if (aadlen) - hash.Update((void*)aad, aadlen); - hash.Final(blockhash, ARGON2_PREHASH_DIGEST_LENGTH); - } - - void FillFirstBlocks_(TBlock* blocks, ui8* blockhash) const { - for (ui32 l = 0; l < Lanes_; l++) { - /* fill the first block of the lane */ - Store32_(l, blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4); - Store32_(0, blockhash + ARGON2_PREHASH_DIGEST_LENGTH); - ExpandBlockhash_((ui8*)&(blocks[l * LaneLength_]), blockhash); - - /* fill the second block of the lane */ - Store32_(1, blockhash + ARGON2_PREHASH_DIGEST_LENGTH); - ExpandBlockhash_((ui8*)&(blocks[l * LaneLength_ + 1]), blockhash); - } - } - - /* The 'if' will be optimized out as the number of threads is known at the compile time */ - void FillMemoryBlocks_(TBlock* memory) const { - for (ui32 t = 0; t < Tcost_; ++t) { - for (ui32 s = 0; s < ARGON2_SYNC_POINTS; ++s) { - if (Lanes_ == 1) - FillSegment_(memory, t, 0, s); - else { - NYmp::SetThreadCount(Lanes_); - NYmp::ParallelForStaticAutoChunk<ui32>(0, Lanes_, [this, &memory, s, t](int k) { - this->FillSegment_(memory, t, k, s); - }); - } - } - } - } - - void Initialize_(TBlock* memory, ui32 outlen, const ui8* pwd, ui32 pwdlen, - const ui8* salt, ui32 saltlen, const ui8* aad, ui32 aadlen) const { - ui8 blockhash[ARGON2_PREHASH_SEED_LENGTH]; - InitialHash_(blockhash, outlen, pwd, pwdlen, salt, saltlen, aad, aadlen); - FillFirstBlocks_(memory, blockhash); - } - - ui32 ComputeReferenceArea_(ui32 pass, ui32 slice, ui32 index, bool sameLane) const { - ui32 passVal = pass == 0 ? (slice * SegmentLength_) : (LaneLength_ - SegmentLength_); - return sameLane ? passVal + (index - 1) : passVal + (index == 0 ? -1 : 0); - } - - ui32 IndexAlpha_(ui32 pass, ui32 slice, ui32 index, ui32 pseudoRand, bool sameLane) const { - ui32 referenceAreaSize = ComputeReferenceArea_(pass, slice, index, sameLane); - - ui64 relativePosition = pseudoRand; - relativePosition = relativePosition * relativePosition >> 32; - relativePosition = referenceAreaSize - 1 - (referenceAreaSize * relativePosition >> 32); - - ui32 startPosition = 0; - if (pass != 0) - startPosition = (slice == ARGON2_SYNC_POINTS - 1) ? 0 : (slice + 1) * SegmentLength_; - - return (ui32)((startPosition + relativePosition) % LaneLength_); - } - - void NextAddresses_(TBlock* addressBlock, TBlock* inputBlock, const TBlock* zeroBlock) const { - inputBlock->V[6]++; - FillBlock_(zeroBlock, inputBlock, addressBlock, false); - FillBlock_(zeroBlock, addressBlock, addressBlock, false); - } - - void Finalize_(const TBlock* memory, ui8* out, ui32 outlen) const { - TBlock blockhash; - CopyBlock_(&blockhash, memory + LaneLength_ - 1); - - /* XOR the last blocks */ - for (ui32 l = 1; l < Lanes_; ++l) { - ui32 lastBlockInLane = l * LaneLength_ + (LaneLength_ - 1); - XorBlock_(&blockhash, memory + lastBlockInLane); - } - - Blake2BLong_(out, outlen, (ui8*)blockhash.V, ARGON2_BLOCK_SIZE); - } - - /* The switch will be optimized out by the compiler as the type is known at the compile time */ - void FillSegment_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice) const { - switch (Atype_) { - case EArgon2Type::Argon2d: - FillSegmentD_(memory, pass, lane, slice); - return; - case EArgon2Type::Argon2i: - FillSegmentI_(memory, pass, lane, slice, EArgon2Type::Argon2i); - return; - case EArgon2Type::Argon2id: - if (pass == 0 && slice < ARGON2_SYNC_POINTS / 2) - FillSegmentI_(memory, pass, lane, slice, EArgon2Type::Argon2id); - else - FillSegmentD_(memory, pass, lane, slice); - return; - } - } - - void FillSegmentD_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice) const { - ui32 startingIndex = (pass == 0 && slice == 0) ? 2 : 0; - ui32 currOffset = lane * LaneLength_ + slice * SegmentLength_ + startingIndex; - ui32 prevOffset = currOffset + ((currOffset % LaneLength_ == 0) ? LaneLength_ : 0) - 1; - - for (ui32 i = startingIndex; i < SegmentLength_; ++i, ++currOffset, ++prevOffset) { - if (currOffset % LaneLength_ == 1) { - prevOffset = currOffset - 1; - } - - ui64 pseudoRand = memory[prevOffset].V[0]; - ui64 refLane = (pass == 0 && slice == 0) ? lane : (((pseudoRand >> 32)) % Lanes_); - ui64 refIndex = IndexAlpha_(pass, slice, i, (ui32)(pseudoRand & 0xFFFFFFFF), refLane == lane); - - TBlock* refBlock = memory + LaneLength_ * refLane + refIndex; - FillBlock_(memory + prevOffset, refBlock, memory + currOffset, pass != 0); - } - } - - void FillSegmentI_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice, EArgon2Type atp) const { - TBlock addressBlock, inputBlock, zeroBlock; - InitBlockValue_(&zeroBlock, 0); - InitBlockValue_(&inputBlock, 0); - - inputBlock.V[0] = pass; - inputBlock.V[1] = lane; - inputBlock.V[2] = slice; - inputBlock.V[3] = MemoryBlocks_; - inputBlock.V[4] = Tcost_; - inputBlock.V[5] = (ui64)atp; - - ui32 startingIndex = 0; - - if (pass == 0 && slice == 0) { - startingIndex = 2; - NextAddresses_(&addressBlock, &inputBlock, &zeroBlock); - } - - ui32 currOffset = lane * LaneLength_ + slice * SegmentLength_ + startingIndex; - ui32 prevOffset = currOffset + ((currOffset % LaneLength_ == 0) ? LaneLength_ : 0) - 1; - - for (ui32 i = startingIndex; i < SegmentLength_; ++i, ++currOffset, ++prevOffset) { - if (currOffset % LaneLength_ == 1) { - prevOffset = currOffset - 1; - } - - if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { - NextAddresses_(&addressBlock, &inputBlock, &zeroBlock); - } - - ui64 pseudoRand = addressBlock.V[i % ARGON2_ADDRESSES_IN_BLOCK]; - ui64 refLane = (pass == 0 && slice == 0) ? lane : (((pseudoRand >> 32)) % Lanes_); - ui64 refIndex = IndexAlpha_(pass, slice, i, (ui32)(pseudoRand & 0xFFFFFFFF), refLane == lane); - - TBlock* refBlock = memory + LaneLength_ * refLane + refIndex; - FillBlock_(memory + prevOffset, refBlock, memory + currOffset, pass != 0); - } - } - }; -} + memcpy(out, out_buffer, toproduce); + } + } + } + + static void InitBlockValue_(TBlock* b, ui8 in) { + memset(b->V, in, sizeof(b->V)); + } + + protected: /* Functions */ + void InternalHash_(TBlock* memory, const ui8* pwd, ui32 pwdlen, + const ui8* salt, ui32 saltlen, ui8* out, ui32 outlen, + const ui8* aad, ui32 aadlen) const { + /* + * all parameters checks are in proxy objects + */ + + Initialize_(memory, outlen, pwd, pwdlen, salt, saltlen, aad, aadlen); + FillMemoryBlocks_(memory); + Finalize_(memory, out, outlen); + } + + void InitialHash_(ui8 blockhash[ARGON2_PREHASH_DIGEST_LENGTH], + ui32 outlen, const ui8* pwd, ui32 pwdlen, + const ui8* salt, ui32 saltlen, const ui8* aad, ui32 aadlen) const { + TBlake2B<instructionSet> hash(ARGON2_PREHASH_DIGEST_LENGTH); + /* lanes, but lanes == threads */ + hash.Update(Lanes_); + /* outlen */ + hash.Update(outlen); + /* m_cost */ + hash.Update(mcost); + /* t_cost */ + hash.Update(Tcost_); + /* version */ + hash.Update(0x00000013); + /* Argon2 type */ + hash.Update((ui32)Atype_); + /* pwdlen */ + hash.Update(pwdlen); + /* pwd */ + hash.Update(pwd, pwdlen); + /* saltlen */ + hash.Update(saltlen); + /* salt */ + if (saltlen) + hash.Update(salt, saltlen); + /* secret */ + hash.Update(SecretLen_); + if (SecretLen_) + hash.Update((void*)Secret_, SecretLen_); + /* aadlen */ + hash.Update(aadlen); + if (aadlen) + hash.Update((void*)aad, aadlen); + hash.Final(blockhash, ARGON2_PREHASH_DIGEST_LENGTH); + } + + void FillFirstBlocks_(TBlock* blocks, ui8* blockhash) const { + for (ui32 l = 0; l < Lanes_; l++) { + /* fill the first block of the lane */ + Store32_(l, blockhash + ARGON2_PREHASH_DIGEST_LENGTH + 4); + Store32_(0, blockhash + ARGON2_PREHASH_DIGEST_LENGTH); + ExpandBlockhash_((ui8*)&(blocks[l * LaneLength_]), blockhash); + + /* fill the second block of the lane */ + Store32_(1, blockhash + ARGON2_PREHASH_DIGEST_LENGTH); + ExpandBlockhash_((ui8*)&(blocks[l * LaneLength_ + 1]), blockhash); + } + } + + /* The 'if' will be optimized out as the number of threads is known at the compile time */ + void FillMemoryBlocks_(TBlock* memory) const { + for (ui32 t = 0; t < Tcost_; ++t) { + for (ui32 s = 0; s < ARGON2_SYNC_POINTS; ++s) { + if (Lanes_ == 1) + FillSegment_(memory, t, 0, s); + else { + NYmp::SetThreadCount(Lanes_); + NYmp::ParallelForStaticAutoChunk<ui32>(0, Lanes_, [this, &memory, s, t](int k) { + this->FillSegment_(memory, t, k, s); + }); + } + } + } + } + + void Initialize_(TBlock* memory, ui32 outlen, const ui8* pwd, ui32 pwdlen, + const ui8* salt, ui32 saltlen, const ui8* aad, ui32 aadlen) const { + ui8 blockhash[ARGON2_PREHASH_SEED_LENGTH]; + InitialHash_(blockhash, outlen, pwd, pwdlen, salt, saltlen, aad, aadlen); + FillFirstBlocks_(memory, blockhash); + } + + ui32 ComputeReferenceArea_(ui32 pass, ui32 slice, ui32 index, bool sameLane) const { + ui32 passVal = pass == 0 ? (slice * SegmentLength_) : (LaneLength_ - SegmentLength_); + return sameLane ? passVal + (index - 1) : passVal + (index == 0 ? -1 : 0); + } + + ui32 IndexAlpha_(ui32 pass, ui32 slice, ui32 index, ui32 pseudoRand, bool sameLane) const { + ui32 referenceAreaSize = ComputeReferenceArea_(pass, slice, index, sameLane); + + ui64 relativePosition = pseudoRand; + relativePosition = relativePosition * relativePosition >> 32; + relativePosition = referenceAreaSize - 1 - (referenceAreaSize * relativePosition >> 32); + + ui32 startPosition = 0; + if (pass != 0) + startPosition = (slice == ARGON2_SYNC_POINTS - 1) ? 0 : (slice + 1) * SegmentLength_; + + return (ui32)((startPosition + relativePosition) % LaneLength_); + } + + void NextAddresses_(TBlock* addressBlock, TBlock* inputBlock, const TBlock* zeroBlock) const { + inputBlock->V[6]++; + FillBlock_(zeroBlock, inputBlock, addressBlock, false); + FillBlock_(zeroBlock, addressBlock, addressBlock, false); + } + + void Finalize_(const TBlock* memory, ui8* out, ui32 outlen) const { + TBlock blockhash; + CopyBlock_(&blockhash, memory + LaneLength_ - 1); + + /* XOR the last blocks */ + for (ui32 l = 1; l < Lanes_; ++l) { + ui32 lastBlockInLane = l * LaneLength_ + (LaneLength_ - 1); + XorBlock_(&blockhash, memory + lastBlockInLane); + } + + Blake2BLong_(out, outlen, (ui8*)blockhash.V, ARGON2_BLOCK_SIZE); + } + + /* The switch will be optimized out by the compiler as the type is known at the compile time */ + void FillSegment_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice) const { + switch (Atype_) { + case EArgon2Type::Argon2d: + FillSegmentD_(memory, pass, lane, slice); + return; + case EArgon2Type::Argon2i: + FillSegmentI_(memory, pass, lane, slice, EArgon2Type::Argon2i); + return; + case EArgon2Type::Argon2id: + if (pass == 0 && slice < ARGON2_SYNC_POINTS / 2) + FillSegmentI_(memory, pass, lane, slice, EArgon2Type::Argon2id); + else + FillSegmentD_(memory, pass, lane, slice); + return; + } + } + + void FillSegmentD_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice) const { + ui32 startingIndex = (pass == 0 && slice == 0) ? 2 : 0; + ui32 currOffset = lane * LaneLength_ + slice * SegmentLength_ + startingIndex; + ui32 prevOffset = currOffset + ((currOffset % LaneLength_ == 0) ? LaneLength_ : 0) - 1; + + for (ui32 i = startingIndex; i < SegmentLength_; ++i, ++currOffset, ++prevOffset) { + if (currOffset % LaneLength_ == 1) { + prevOffset = currOffset - 1; + } + + ui64 pseudoRand = memory[prevOffset].V[0]; + ui64 refLane = (pass == 0 && slice == 0) ? lane : (((pseudoRand >> 32)) % Lanes_); + ui64 refIndex = IndexAlpha_(pass, slice, i, (ui32)(pseudoRand & 0xFFFFFFFF), refLane == lane); + + TBlock* refBlock = memory + LaneLength_ * refLane + refIndex; + FillBlock_(memory + prevOffset, refBlock, memory + currOffset, pass != 0); + } + } + + void FillSegmentI_(TBlock* memory, ui32 pass, ui32 lane, ui32 slice, EArgon2Type atp) const { + TBlock addressBlock, inputBlock, zeroBlock; + InitBlockValue_(&zeroBlock, 0); + InitBlockValue_(&inputBlock, 0); + + inputBlock.V[0] = pass; + inputBlock.V[1] = lane; + inputBlock.V[2] = slice; + inputBlock.V[3] = MemoryBlocks_; + inputBlock.V[4] = Tcost_; + inputBlock.V[5] = (ui64)atp; + + ui32 startingIndex = 0; + + if (pass == 0 && slice == 0) { + startingIndex = 2; + NextAddresses_(&addressBlock, &inputBlock, &zeroBlock); + } + + ui32 currOffset = lane * LaneLength_ + slice * SegmentLength_ + startingIndex; + ui32 prevOffset = currOffset + ((currOffset % LaneLength_ == 0) ? LaneLength_ : 0) - 1; + + for (ui32 i = startingIndex; i < SegmentLength_; ++i, ++currOffset, ++prevOffset) { + if (currOffset % LaneLength_ == 1) { + prevOffset = currOffset - 1; + } + + if (i % ARGON2_ADDRESSES_IN_BLOCK == 0) { + NextAddresses_(&addressBlock, &inputBlock, &zeroBlock); + } + + ui64 pseudoRand = addressBlock.V[i % ARGON2_ADDRESSES_IN_BLOCK]; + ui64 refLane = (pass == 0 && slice == 0) ? lane : (((pseudoRand >> 32)) % Lanes_); + ui64 refIndex = IndexAlpha_(pass, slice, i, (ui32)(pseudoRand & 0xFFFFFFFF), refLane == lane); + + TBlock* refBlock = memory + LaneLength_ * refLane + refIndex; + FillBlock_(memory + prevOffset, refBlock, memory + currOffset, pass != 0); + } + } + }; +} |